Merge "Optimizing/ARM: Fix CmpConstant()."
diff --git a/Android.mk b/Android.mk
index 97a82e2..4f73127 100644
--- a/Android.mk
+++ b/Android.mk
@@ -98,16 +98,26 @@
 ART_HOST_DEPENDENCIES := \
 	$(ART_HOST_EXECUTABLES) \
 	$(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar \
-	$(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+	$(HOST_OUT_JAVA_LIBRARIES)/core-oj-hostdex.jar \
+	$(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+	$(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+	$(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvm$(ART_HOST_SHLIB_EXTENSION)
 ART_TARGET_DEPENDENCIES := \
 	$(ART_TARGET_EXECUTABLES) \
 	$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar \
-	$(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+	$(TARGET_OUT_JAVA_LIBRARIES)/core-oj.jar \
+	$(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
+	$(TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so \
+	$(TARGET_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
 ifdef TARGET_2ND_ARCH
 ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so
+ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
 endif
 ifdef HOST_2ND_ARCH
 ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libjavacore.so
+ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libopenjdk.so
+ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
 endif
 
 ########################################################################
@@ -390,6 +400,35 @@
 	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 
+####################################################################################################
+# Fake packages to ensure generation of libopenjdkd when one builds with mm/mmm/mmma.
+#
+# The library is required for starting a runtime in debug mode, but libartd does not depend on it
+# (dependency cycle otherwise).
+#
+# Note: * As the package is phony to create a dependency the package name is irrelevant.
+#       * We make MULTILIB explicit to "both," just to state here that we want both libraries on
+#         64-bit systems, even if it is the default.
+
+# ART on the host.
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+include $(CLEAR_VARS)
+LOCAL_MODULE := art-libartd-libopenjdkd-host-dependency
+LOCAL_MULTILIB := both
+LOCAL_REQUIRED_MODULES := libopenjdkd
+LOCAL_IS_HOST_MODULE := true
+include $(BUILD_PHONY_PACKAGE)
+endif
+
+# ART on the target.
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+include $(CLEAR_VARS)
+LOCAL_MODULE := art-libartd-libopenjdkd-target-dependency
+LOCAL_MULTILIB := both
+LOCAL_REQUIRED_MODULES := libopenjdkd
+include $(BUILD_PHONY_PACKAGE)
+endif
+
 ########################################################################
 # "m build-art" for quick minimal build
 .PHONY: build-art
@@ -405,10 +444,10 @@
 # Rules for building all dependencies for tests.
 
 .PHONY: build-art-host-tests
-build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES)
+build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 .PHONY: build-art-target-tests
-build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS)
+build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 ########################################################################
 # targets to switch back and forth from libdvm to libart
diff --git a/NOTICE b/NOTICE
index d27f6a6..d79b004 100644
--- a/NOTICE
+++ b/NOTICE
@@ -262,5 +262,3 @@
 pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
 ARM contributions   llvm/lib/Target/ARM/LICENSE.TXT
 md5 contributions   llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
-
--------------------------------------------------------------------
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index c53479c..ecc9e76 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -80,20 +80,24 @@
 TARGET_CORE_IMG_LOCATION := $(ART_TARGET_TEST_OUT)/core.art
 
 # Jar files for core.art.
-TARGET_CORE_JARS := core-libart conscrypt okhttp bouncycastle
+TARGET_CORE_JARS := core-oj core-libart conscrypt okhttp bouncycastle apache-xml
 HOST_CORE_JARS := $(addsuffix -hostdex,$(TARGET_CORE_JARS))
 
 HOST_CORE_DEX_LOCATIONS   := $(foreach jar,$(HOST_CORE_JARS),  $(HOST_OUT_JAVA_LIBRARIES)/$(jar).jar)
+ifeq ($(ART_TEST_ANDROID_ROOT),)
 TARGET_CORE_DEX_LOCATIONS := $(foreach jar,$(TARGET_CORE_JARS),/$(DEXPREOPT_BOOT_JAR_DIR)/$(jar).jar)
+else
+TARGET_CORE_DEX_LOCATIONS := $(foreach jar,$(TARGET_CORE_JARS),$(ART_TEST_ANDROID_ROOT)/framework/$(jar).jar)
+endif
 
 HOST_CORE_DEX_FILES   := $(foreach jar,$(HOST_CORE_JARS),  $(call intermediates-dir-for,JAVA_LIBRARIES,$(jar),t,COMMON)/javalib.jar)
 TARGET_CORE_DEX_FILES := $(foreach jar,$(TARGET_CORE_JARS),$(call intermediates-dir-for,JAVA_LIBRARIES,$(jar), ,COMMON)/javalib.jar)
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
 # Classpath for Jack compilation: we only need core-libart.
-HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
-HOST_JACK_CLASSPATH                := $(foreach dep,$(HOST_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
-TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
-TARGET_JACK_CLASSPATH              := $(foreach dep,$(TARGET_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
+HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
+HOST_JACK_CLASSPATH                := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack)
+TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
+TARGET_JACK_CLASSPATH              := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack)
 endif
 endif # ART_ANDROID_COMMON_PATH_MK
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index edf107e..ab70367 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -114,6 +114,9 @@
 # Do you want run-tests with the --debuggable flag
 ART_TEST_RUN_TEST_DEBUGGABLE ?= $(ART_TEST_FULL)
 
+# Do you want to test multi-part boot-image functionality?
+ART_TEST_RUN_TEST_MULTI_IMAGE ?= $(ART_TEST_FULL)
+
 # Define the command run on test failure. $(1) is the name of the test. Executed by the shell.
 define ART_TEST_FAILED
   ( [ -f $(ART_HOST_TEST_DIR)/skipped/$(1) ] || \
@@ -202,7 +205,7 @@
     LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
       LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
-      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
+      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp -D jack.dex.output.multidex.legacy=true
     endif
     include $(BUILD_JAVA_LIBRARY)
     $(5) := $$(LOCAL_INSTALLED_MODULE)
@@ -218,7 +221,7 @@
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
       LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
-      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
+      LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp -D jack.dex.output.multidex.legacy=true
     endif
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
     $(6) := $$(LOCAL_INSTALLED_MODULE)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 1c23929..af64470 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -441,7 +441,9 @@
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_TARGET_GTEST_$(file)_DEX)) \
     $$(ART_TARGET_NATIVETEST_OUT)/$$(TARGET_$(2)ARCH)/$(1) \
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
-    $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar
+    $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libopenjdkd.so \
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart-testdex.jar \
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): test-art-target-sync
@@ -483,6 +485,7 @@
   # Dependencies for all host gtests.
   gtest_deps := $$(HOST_CORE_DEX_LOCATIONS) \
     $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$$(ART_HOST_SHLIB_EXTENSION) \
+    $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$$(ART_HOST_SHLIB_EXTENSION) \
     $$(gtest_exe) \
     $$(ART_GTEST_$(1)_HOST_DEPS) \
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_HOST_GTEST_$(file)_DEX))
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 50600ef..884f698 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -42,6 +42,7 @@
 # $(3): 2ND_ or undefined, 2ND_ for 32-bit host builds.
 # $(4): wrapper, e.g., valgrind.
 # $(5): dex2oat suffix, e.g, valgrind requires 32 right now.
+# $(6): multi-image.
 # NB depending on HOST_CORE_DEX_LOCATIONS so we are sure to have the dex files in frameworks for
 # run-test --no-image
 define create-core-oat-host-rules
@@ -92,14 +93,25 @@
     $$(error found $(2) expected pic or no-pic)
   endif
 
-  core_image_name := $($(3)HOST_CORE_IMG_OUT_BASE)$$(core_infix)$$(core_pic_infix)$(4)$(CORE_IMG_SUFFIX)
-  core_oat_name := $($(3)HOST_CORE_OAT_OUT_BASE)$$(core_infix)$$(core_pic_infix)$(4)$(CORE_OAT_SUFFIX)
+  # If $(6) is true, generate a multi-image.
+  ifeq ($(6),true)
+    core_multi_infix := -multi
+    core_multi_param := --multi-image --no-inline-from=core-oj-hostdex.jar
+    core_multi_group := _multi
+  else
+    core_multi_infix :=
+    core_multi_param :=
+    core_multi_group :=
+  endif
+
+  core_image_name := $($(3)HOST_CORE_IMG_OUT_BASE)$$(core_infix)$$(core_pic_infix)$$(core_multi_infix)$(4)$(CORE_IMG_SUFFIX)
+  core_oat_name := $($(3)HOST_CORE_OAT_OUT_BASE)$$(core_infix)$$(core_pic_infix)$$(core_multi_infix)$(4)$(CORE_OAT_SUFFIX)
 
   # Using the bitness suffix makes it easier to add as a dependency for the run-test mk.
   ifeq ($(3),)
-    $(4)HOST_CORE_IMAGE_$(1)_$(2)_64 := $$(core_image_name)
+    $(4)HOST_CORE_IMAGE_$(1)_$(2)$$(core_multi_group)_64 := $$(core_image_name)
   else
-    $(4)HOST_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
+    $(4)HOST_CORE_IMAGE_$(1)_$(2)$$(core_multi_group)_32 := $$(core_image_name)
   endif
   $(4)HOST_CORE_IMG_OUTS += $$(core_image_name)
   $(4)HOST_CORE_OAT_OUTS += $$(core_oat_name)
@@ -111,6 +123,7 @@
 $$(core_image_name): PRIVATE_CORE_COMPILE_OPTIONS := $$(core_compile_options)
 $$(core_image_name): PRIVATE_CORE_IMG_NAME := $$(core_image_name)
 $$(core_image_name): PRIVATE_CORE_OAT_NAME := $$(core_oat_name)
+$$(core_image_name): PRIVATE_CORE_MULTI_PARAM := $$(core_multi_param)
 $$(core_image_name): $$(HOST_CORE_DEX_LOCATIONS) $$(core_dex2oat_dependency)
 	@echo "host dex2oat: $$@"
 	@mkdir -p $$(dir $$@)
@@ -122,7 +135,7 @@
 	  --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(3)ART_HOST_ARCH) \
 	  $$(LOCAL_$(3)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES_OPTION) \
 	  --host --android-root=$$(HOST_OUT) --include-patch-information --generate-debug-info \
-	  $$(PRIVATE_CORE_COMPILE_OPTIONS)
+	  $$(PRIVATE_CORE_MULTI_PARAM) $$(PRIVATE_CORE_COMPILE_OPTIONS)
 
 $$(core_oat_name): $$(core_image_name)
 
@@ -138,32 +151,40 @@
 # $(1): compiler - default, optimizing, jit, interpreter or interpreter-access-checks.
 # $(2): wrapper.
 # $(3): dex2oat suffix.
+# $(4): multi-image.
 define create-core-oat-host-rule-combination
-  $(call create-core-oat-host-rules,$(1),no-pic,,$(2),$(3))
-  $(call create-core-oat-host-rules,$(1),pic,,$(2),$(3))
+  $(call create-core-oat-host-rules,$(1),no-pic,,$(2),$(3),$(4))
+  $(call create-core-oat-host-rules,$(1),pic,,$(2),$(3),$(4))
 
   ifneq ($(HOST_PREFER_32_BIT),true)
-    $(call create-core-oat-host-rules,$(1),no-pic,2ND_,$(2),$(3))
-    $(call create-core-oat-host-rules,$(1),pic,2ND_,$(2),$(3))
+    $(call create-core-oat-host-rules,$(1),no-pic,2ND_,$(2),$(3),$(4))
+    $(call create-core-oat-host-rules,$(1),pic,2ND_,$(2),$(3),$(4))
   endif
 endef
 
-$(eval $(call create-core-oat-host-rule-combination,default,,))
-$(eval $(call create-core-oat-host-rule-combination,optimizing,,))
-$(eval $(call create-core-oat-host-rule-combination,interpreter,,))
-$(eval $(call create-core-oat-host-rule-combination,interp-ac,,))
-$(eval $(call create-core-oat-host-rule-combination,jit,,))
+$(eval $(call create-core-oat-host-rule-combination,default,,,false))
+$(eval $(call create-core-oat-host-rule-combination,optimizing,,,false))
+$(eval $(call create-core-oat-host-rule-combination,interpreter,,,false))
+$(eval $(call create-core-oat-host-rule-combination,interp-ac,,,false))
+$(eval $(call create-core-oat-host-rule-combination,jit,,,false))
+$(eval $(call create-core-oat-host-rule-combination,default,,,true))
+$(eval $(call create-core-oat-host-rule-combination,optimizing,,,true))
+$(eval $(call create-core-oat-host-rule-combination,interpreter,,,true))
+$(eval $(call create-core-oat-host-rule-combination,interp-ac,,,true))
+$(eval $(call create-core-oat-host-rule-combination,jit,,,true))
 
 valgrindHOST_CORE_IMG_OUTS :=
 valgrindHOST_CORE_OAT_OUTS :=
-$(eval $(call create-core-oat-host-rule-combination,default,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,optimizing,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,interpreter,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,interp-ac,valgrind,32))
-$(eval $(call create-core-oat-host-rule-combination,jit,valgrind,32))
+$(eval $(call create-core-oat-host-rule-combination,default,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,optimizing,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,interpreter,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,interp-ac,valgrind,32,false))
+$(eval $(call create-core-oat-host-rule-combination,jit,valgrind,32,false))
 
 valgrind-test-art-host-dex2oat-host: $(valgrindHOST_CORE_IMG_OUTS)
 
+test-art-host-dex2oat-host: $(HOST_CORE_IMG_OUTS)
+
 define create-core-oat-target-rules
   core_compile_options :=
   core_image_name :=
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index 4aced5b..4dcaf80 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -80,8 +80,7 @@
   }
 }
 
-static Runtime* StartRuntime(const char* boot_image_location,
-                             InstructionSet instruction_set) {
+static Runtime* StartRuntime(const char* boot_image_location, InstructionSet instruction_set) {
   CHECK(boot_image_location != nullptr);
 
   RuntimeOptions options;
diff --git a/compiler/Android.mk b/compiler/Android.mk
index bdd9a84..4589736 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -92,7 +92,6 @@
 	optimizing/parallel_move_resolver.cc \
 	optimizing/pc_relative_fixups_x86.cc \
 	optimizing/prepare_for_register_allocation.cc \
-	optimizing/primitive_type_propagation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
 	optimizing/sharpening.cc \
@@ -109,7 +108,8 @@
 	elf_writer_debug.cc \
 	elf_writer_quick.cc \
 	image_writer.cc \
-	oat_writer.cc
+	oat_writer.cc \
+	profile_assistant.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
 	dex/quick/arm/assemble_arm.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index c7c1907..b5fd1e0 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -208,7 +208,8 @@
                                             false,
                                             timer_.get(),
                                             -1,
-                                            ""));
+                                            /* dex_to_oat_map */ nullptr,
+                                            /* profile_compilation_info */ nullptr));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 3b88021..0130ef4 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -448,6 +448,10 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
+  // b/26327751.
+  if ((true)) {
+    return false;
+  }
   int32_t encoded_imm = EncodeImmSingle(bit_cast<uint32_t, float>(0.5f));
   A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src = info->args[0];
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index f48947d..32d7518 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -22,6 +22,7 @@
 #include "base/macros.h"
 #include "base/mutex-inl.h"
 #include "dex/compiler_ir.h"
+#include "driver/compiler_driver.h"
 #include "thread-inl.h"
 #include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir.h"
@@ -777,6 +778,17 @@
 
 bool DexFileMethodInliner::GenInline(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
                                      uint32_t method_idx) {
+  // Check that we're allowed to inline.
+  {
+    CompilationUnit* cu = mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit();
+    if (!cu->compiler_driver->MayInline(dex_file_, cu->dex_file)) {
+      VLOG(compiler) << "Won't inline " << method_idx << " in "
+                     << cu->dex_file->GetLocation() << " from "
+                     << dex_file_->GetLocation();
+      return false;
+    }
+  }
+
   InlineMethod method;
   {
     ReaderMutexLock mu(Thread::Current(), lock_);
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index 24daf2f..12568a4 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -58,6 +58,7 @@
       CompilerOptions::kDefaultNumDexMethodsThreshold,
       CompilerOptions::kDefaultInlineDepthLimit,
       CompilerOptions::kDefaultInlineMaxCodeUnits,
+      nullptr,
       false,
       CompilerOptions::kDefaultTopKProfileThreshold,
       false,
@@ -74,9 +75,25 @@
     std::unique_ptr<const InstructionSetFeatures> isa_features;
     std::string error;
     isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
-    CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map,
-                          Compiler::kQuick, isa, isa_features.get(),
-                          false, nullptr, nullptr, nullptr, 0, false, false, "", false, 0, -1, "");
+    CompilerDriver driver(&compiler_options,
+                          &verification_results,
+                          &method_inliner_map,
+                          Compiler::kQuick,
+                          isa,
+                          isa_features.get(),
+                          false,
+                          nullptr,
+                          nullptr,
+                          nullptr,
+                          0,
+                          false,
+                          false,
+                          "",
+                          false,
+                          0,
+                          -1,
+                          nullptr,
+                          nullptr);
     ClassLinker* linker = nullptr;
     CompilationUnit cu(&pool, isa, &driver, linker);
     DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } };  // NOLINT
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index e977ebf..b39fe4d 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -41,6 +41,7 @@
         CompilerOptions::kDefaultNumDexMethodsThreshold,
         CompilerOptions::kDefaultInlineDepthLimit,
         CompilerOptions::kDefaultInlineMaxCodeUnits,
+        nullptr,
         false,
         CompilerOptions::kDefaultTopKProfileThreshold,
         false,
@@ -72,7 +73,8 @@
         false,
         0,
         -1,
-        ""));
+        nullptr,
+        nullptr));
     cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr));
     DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
         cu_->arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index c6dbd24..f18fa67 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -45,7 +45,8 @@
                         false,
                         nullptr,
                         -1,
-                        "");
+                        nullptr,
+                        nullptr);
   CompiledMethodStorage* storage = driver.GetCompiledMethodStorage();
 
   ASSERT_TRUE(storage->DedupeEnabled());  // The default.
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 5630b08..043bd93 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -334,19 +334,21 @@
   DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats);
 };
 
-CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options,
-                               VerificationResults* verification_results,
-                               DexFileToMethodInlinerMap* method_inliner_map,
-                               Compiler::Kind compiler_kind,
-                               InstructionSet instruction_set,
-                               const InstructionSetFeatures* instruction_set_features,
-                               bool boot_image, std::unordered_set<std::string>* image_classes,
-                               std::unordered_set<std::string>* compiled_classes,
-                               std::unordered_set<std::string>* compiled_methods,
-                               size_t thread_count, bool dump_stats, bool dump_passes,
-                               const std::string& dump_cfg_file_name, bool dump_cfg_append,
-                               CumulativeLogger* timer, int swap_fd,
-                               const std::string& profile_file)
+CompilerDriver::CompilerDriver(
+    const CompilerOptions* compiler_options,
+    VerificationResults* verification_results,
+    DexFileToMethodInlinerMap* method_inliner_map,
+    Compiler::Kind compiler_kind,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features,
+    bool boot_image, std::unordered_set<std::string>* image_classes,
+    std::unordered_set<std::string>* compiled_classes,
+    std::unordered_set<std::string>* compiled_methods,
+    size_t thread_count, bool dump_stats, bool dump_passes,
+    const std::string& dump_cfg_file_name, bool dump_cfg_append,
+    CumulativeLogger* timer, int swap_fd,
+    const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
+    const ProfileCompilationInfo* profile_compilation_info)
     : compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
@@ -374,7 +376,9 @@
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
       dex_files_for_oat_file_(nullptr),
-      compiled_method_storage_(swap_fd) {
+      dex_file_oat_filename_map_(dex_to_oat_map),
+      compiled_method_storage_(swap_fd),
+      profile_compilation_info_(profile_compilation_info) {
   DCHECK(compiler_options_ != nullptr);
   DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
@@ -382,12 +386,6 @@
   compiler_->Init();
 
   CHECK_EQ(boot_image_, image_classes_.get() != nullptr);
-
-  // Read the profile file if one is provided.
-  if (!profile_file.empty()) {
-    profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file));
-    LOG(INFO) << "Using profile data from file " << profile_file;
-  }
 }
 
 CompilerDriver::~CompilerDriver() {
@@ -1538,6 +1536,12 @@
       use_dex_cache = true;
     }
   }
+  if (!use_dex_cache && IsBootImage()) {
+    if (!AreInSameOatFile(&(const_cast<mirror::Class*>(referrer_class)->GetDexFile()),
+                          &declaring_class->GetDexFile())) {
+      use_dex_cache = true;
+    }
+  }
   // The method is defined not within this dex file. We need a dex cache slot within the current
   // dex file or direct pointers.
   bool must_use_direct_pointers = false;
@@ -1571,12 +1575,14 @@
       *type = sharp_type;
     }
   } else {
-    auto* image_space = heap->GetBootImageSpace();
     bool method_in_image = false;
-    if (image_space != nullptr) {
+    const std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+    for (gc::space::ImageSpace* image_space : image_spaces) {
       const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
-      method_in_image = method_section.Contains(
-          reinterpret_cast<uint8_t*>(method) - image_space->Begin());
+      if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
+        method_in_image = true;
+        break;
+      }
     }
     if (method_in_image || compiling_boot || runtime->UseJit()) {
       // We know we must be able to get to the method in the image, so use that pointer.
@@ -2295,15 +2301,11 @@
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
-  if (profile_compilation_info_ != nullptr) {
-    if (!profile_compilation_info_->Load(dex_files)) {
-      LOG(WARNING) << "Failed to load offline profile info from "
-          << profile_compilation_info_->GetFilename()
-          << ". No methods will be compiled";
-    } else if (kDebugProfileGuidedCompilation) {
-      LOG(INFO) << "[ProfileGuidedCompilation] "
-          << profile_compilation_info_->DumpInfo();
-    }
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] " <<
+        ((profile_compilation_info_ == nullptr)
+            ? "null"
+            : profile_compilation_info_->DumpInfo(&dex_files));
   }
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
@@ -2572,4 +2574,15 @@
   return inliner->IsStringInitMethodIndex(method_index);
 }
 
+bool CompilerDriver::MayInlineInternal(const DexFile* inlined_from,
+                                       const DexFile* inlined_into) const {
+  // We're not allowed to inline across dex files if we're the no-inline-from dex file.
+  if (inlined_from != inlined_into &&
+      compiler_options_->GetNoInlineFromDexFile() == inlined_from) {
+    return false;
+  }
+
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index f0360ce..3847c81 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -97,7 +97,8 @@
                  size_t thread_count, bool dump_stats, bool dump_passes,
                  const std::string& dump_cfg_file_name, bool dump_cfg_append,
                  CumulativeLogger* timer, int swap_fd,
-                 const std::string& profile_file);
+                 const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
+                 const ProfileCompilationInfo* profile_compilation_info);
 
   ~CompilerDriver();
 
@@ -113,6 +114,18 @@
         : ArrayRef<const DexFile* const>();
   }
 
+  // Are the given dex files compiled into the same oat file? Should only be called after
+  // GetDexFilesForOatFile, as the conservative answer (when we don't have a map) is true.
+  bool AreInSameOatFile(const DexFile* d1, const DexFile* d2) {
+    if (dex_file_oat_filename_map_ == nullptr) {
+      // TODO: Check for this wrt/ apps and boot image calls.
+      return true;
+    }
+    auto it1 = dex_file_oat_filename_map_->find(d1);
+    auto it2 = dex_file_oat_filename_map_->find(d2);
+    return it1 == it2;
+  }
+
   void CompileAll(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
                   TimingLogger* timings)
@@ -471,6 +484,13 @@
   bool CanAssumeClassIsLoaded(mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool MayInline(const DexFile* inlined_from, const DexFile* inlined_into) const {
+    if (!kIsTargetBuild) {
+      return MayInlineInternal(inlined_from, inlined_into);
+    }
+    return true;
+  }
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -587,6 +607,8 @@
                       ThreadPool* thread_pool, TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
+  bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const;
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -621,9 +643,8 @@
 
   const bool boot_image_;
 
-  // If image_ is true, specifies the classes that will be included in
-  // the image. Note if image_classes_ is null, all classes are
-  // included in the image.
+  // If image_ is true, specifies the classes that will be included in the image.
+  // Note if image_classes_ is null, all classes are included in the image.
   std::unique_ptr<std::unordered_set<std::string>> image_classes_;
 
   // Specifies the classes that will be compiled. Note that if classes_to_compile_ is null,
@@ -636,9 +657,6 @@
   // This option may be restricted to the boot image, depending on a flag in the implementation.
   std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
 
-  // Info for profile guided compilation.
-  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
-
   bool had_hard_verifier_failure_;
 
   size_t thread_count_;
@@ -663,8 +681,14 @@
   // List of dex files that will be stored in the oat file.
   const std::vector<const DexFile*>* dex_files_for_oat_file_;
 
+  // Map from dex files to the oat file (name) they will be compiled into.
+  const std::unordered_map<const DexFile*, const char*>* dex_file_oat_filename_map_;
+
   CompiledMethodStorage compiled_method_storage_;
 
+  // Info for profile guided compilation.
+  const ProfileCompilationInfo* const profile_compilation_info_;
+
   friend class CompileClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index b6abc6e..82c0e86 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -142,7 +142,7 @@
   // TODO: check that all Method::GetCode() values are non-null
 }
 
-TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
+TEST_F(CompilerDriverTest, DISABLED_AbstractMethodErrorStub) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 4d2d924..209bb5a 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -31,9 +31,11 @@
       num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
       inline_depth_limit_(kUnsetInlineDepthLimit),
       inline_max_code_units_(kUnsetInlineMaxCodeUnits),
+      no_inline_from_(nullptr),
       include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
+      native_debuggable_(kDefaultNativeDebuggable),
       generate_debug_info_(kDefaultGenerateDebugInfo),
       implicit_null_checks_(true),
       implicit_so_checks_(true),
@@ -58,6 +60,7 @@
                                  size_t num_dex_methods_threshold,
                                  size_t inline_depth_limit,
                                  size_t inline_max_code_units,
+                                 const DexFile* no_inline_from,
                                  bool include_patch_information,
                                  double top_k_profile_threshold,
                                  bool debuggable,
@@ -78,9 +81,11 @@
     num_dex_methods_threshold_(num_dex_methods_threshold),
     inline_depth_limit_(inline_depth_limit),
     inline_max_code_units_(inline_max_code_units),
+    no_inline_from_(no_inline_from),
     include_patch_information_(include_patch_information),
     top_k_profile_threshold_(top_k_profile_threshold),
     debuggable_(debuggable),
+    native_debuggable_(kDefaultNativeDebuggable),
     generate_debug_info_(generate_debug_info),
     implicit_null_checks_(implicit_null_checks),
     implicit_so_checks_(implicit_so_checks),
@@ -207,6 +212,10 @@
   } else if (option == "--debuggable") {
     debuggable_ = true;
     generate_debug_info_ = true;
+  } else if (option == "--native-debuggable") {
+    native_debuggable_ = true;
+    debuggable_ = true;
+    generate_debug_info_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
     ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
   } else if (option == "--include-patch-information") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index e6acab4..f8032bb 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -49,10 +49,11 @@
   static const size_t kDefaultTinyMethodThreshold = 20;
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
+  static const bool kDefaultNativeDebuggable = false;
   static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
-  static const size_t kDefaultInlineMaxCodeUnits = 20;
+  static const size_t kDefaultInlineMaxCodeUnits = 32;
   static constexpr size_t kUnsetInlineDepthLimit = -1;
   static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
 
@@ -71,6 +72,7 @@
                   size_t num_dex_methods_threshold,
                   size_t inline_depth_limit,
                   size_t inline_max_code_units,
+                  const DexFile* no_inline_from,
                   bool include_patch_information,
                   double top_k_profile_threshold,
                   bool debuggable,
@@ -162,6 +164,10 @@
     return debuggable_;
   }
 
+  bool GetNativeDebuggable() const {
+    return native_debuggable_;
+  }
+
   bool GetGenerateDebugInfo() const {
     return generate_debug_info_;
   }
@@ -212,6 +218,10 @@
     return abort_on_hard_verifier_failure_;
   }
 
+  const DexFile* GetNoInlineFromDexFile() const {
+    return no_inline_from_;
+  }
+
   bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
 
  private:
@@ -236,10 +246,15 @@
   size_t num_dex_methods_threshold_;
   size_t inline_depth_limit_;
   size_t inline_max_code_units_;
+
+  // A dex file from which we should not inline code.
+  const DexFile* no_inline_from_;
+
   bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool debuggable_;
+  bool native_debuggable_;
   bool generate_debug_info_;
   bool implicit_null_checks_;
   bool implicit_so_checks_;
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 06553a6..2bc8c89 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -36,6 +36,15 @@
 namespace art {
 namespace dwarf {
 
+// The ARM specification defines three special mapping symbols
+// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
+// These symbols can be used by tools, for example, to pretty
+// print instructions correctly.  Objdump will use them if they
+// exist, but it will still work well without them.
+// However, these extra symbols take space, so let's just generate
+// one symbol which marks the whole .text section as code.
+constexpr bool kGenerateSingleArmMappingSymbol = true;
+
 static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) {
   switch (isa) {
     case kArm:
@@ -207,8 +216,7 @@
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
                      const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format) {
-  CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
-        format == dwarf::DW_EH_FRAME_FORMAT);
+  CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
   std::vector<uint32_t> binary_search_table;
@@ -220,7 +228,7 @@
   }
 
   // Write .eh_frame/.debug_frame section.
-  auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
+  auto* cfi_section = (format == DW_DEBUG_FRAME_FORMAT
                        ? builder->GetDebugFrame()
                        : builder->GetEhFrame());
   {
@@ -1134,21 +1142,87 @@
   }
 }
 
+template <typename ElfTypes>
+void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                       const ArrayRef<const MethodDebugInfo>& method_infos) {
+  bool generated_mapping_symbol = false;
+  auto* strtab = builder->GetStrTab();
+  auto* symtab = builder->GetSymTab();
+
+  if (method_infos.empty()) {
+    return;
+  }
+
+  // Find all addresses (low_pc) which contain deduped methods.
+  // The first instance of method is not marked deduped_, but the rest is.
+  std::unordered_set<uint32_t> deduped_addresses;
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      deduped_addresses.insert(info.low_pc_);
+    }
+  }
+
+  strtab->Start();
+  strtab->Write("");  // strtab should start with empty string.
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      continue;  // Add symbol only for the first instance.
+    }
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
+      name += " [DEDUPED]";
+    }
+
+    uint32_t low_pc = info.low_pc_;
+    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
+    low_pc += info.compiled_method_->CodeDelta();
+    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
+                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+
+    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
+    // instructions, so that disassembler tools can correctly disassemble.
+    // Note that even if we generate just a single mapping symbol, ARM's Streamline
+    // requires it to match function symbol.  Just address 0 does not work.
+    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
+      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
+        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
+                    true, 0, STB_LOCAL, STT_NOTYPE);
+        generated_mapping_symbol = true;
+      }
+    }
+  }
+  strtab->End();
+
+  // Symbols are buffered and written after names (because they are smaller).
+  // We could also do two passes in this function to avoid the buffering.
+  symtab->Start();
+  symtab->Write();
+  symtab->End();
+}
+
+template <typename ElfTypes>
+void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    const ArrayRef<const MethodDebugInfo>& method_infos,
+                    CFIFormat cfi_format) {
+  if (!method_infos.empty()) {
+    // Add methods to .symtab.
+    WriteDebugSymbols(builder, method_infos);
+    // Generate CFI (stack unwinding information).
+    WriteCFISection(builder, method_infos, cfi_format);
+    // Write DWARF .debug_* sections.
+    WriteDebugSections(builder, method_infos);
+  }
+}
+
 // Explicit instantiations
-template void WriteCFISection<ElfTypes32>(
+template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat format);
-template void WriteCFISection<ElfTypes64>(
+    CFIFormat cfi_format);
+template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat format);
-template void WriteDebugSections<ElfTypes32>(
-    ElfBuilder<ElfTypes32>* builder,
-    const ArrayRef<const MethodDebugInfo>& method_infos);
-template void WriteDebugSections<ElfTypes64>(
-    ElfBuilder<ElfTypes64>* builder,
-    const ArrayRef<const MethodDebugInfo>& method_infos);
+    CFIFormat cfi_format);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 9ed102f..7ec0be1 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -25,14 +25,10 @@
 namespace art {
 namespace dwarf {
 
-template<typename ElfTypes>
-void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const ArrayRef<const MethodDebugInfo>& method_infos,
-                     CFIFormat format);
-
-template<typename ElfTypes>
-void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const ArrayRef<const MethodDebugInfo>& method_infos);
+template <typename ElfTypes>
+void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    const ArrayRef<const MethodDebugInfo>& method_infos,
+                    CFIFormat cfi_format);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index e411496..7b1bdd7 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -46,15 +46,6 @@
 // Let's use .debug_frame because it is easier to strip or compress.
 constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT;
 
-// The ARM specification defines three special mapping symbols
-// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
-// These symbols can be used by tools, for example, to pretty
-// print instructions correctly.  Objdump will use them if they
-// exist, but it will still work well without them.
-// However, these extra symbols take space, so let's just generate
-// one symbol which marks the whole .text section as code.
-constexpr bool kGenerateSingleArmMappingSymbol = true;
-
 template <typename ElfTypes>
 class ElfWriterQuick FINAL : public ElfWriter {
  public:
@@ -99,10 +90,6 @@
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
-                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos);
-
-template <typename ElfTypes>
 ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
                                          const CompilerOptions* compiler_options,
                                          File* elf_file)
@@ -165,14 +152,7 @@
 void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   if (compiler_options_->GetGenerateDebugInfo()) {
-    if (!method_infos.empty()) {
-      // Add methods to .symtab.
-      WriteDebugSymbols(builder_.get(), method_infos);
-      // Generate CFI (stack unwinding information).
-      dwarf::WriteCFISection(builder_.get(), method_infos, kCFIFormat);
-      // Write DWARF .debug_* sections.
-      dwarf::WriteDebugSections(builder_.get(), method_infos);
-    }
+    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
   }
 }
 
@@ -199,64 +179,6 @@
   return builder_->GetStream();
 }
 
-template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
-                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
-  bool generated_mapping_symbol = false;
-  auto* strtab = builder->GetStrTab();
-  auto* symtab = builder->GetSymTab();
-
-  if (method_infos.empty()) {
-    return;
-  }
-
-  // Find all addresses (low_pc) which contain deduped methods.
-  // The first instance of method is not marked deduped_, but the rest is.
-  std::unordered_set<uint32_t> deduped_addresses;
-  for (const dwarf::MethodDebugInfo& info : method_infos) {
-    if (info.deduped_) {
-      deduped_addresses.insert(info.low_pc_);
-    }
-  }
-
-  strtab->Start();
-  strtab->Write("");  // strtab should start with empty string.
-  for (const dwarf::MethodDebugInfo& info : method_infos) {
-    if (info.deduped_) {
-      continue;  // Add symbol only for the first instance.
-    }
-    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
-    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
-      name += " [DEDUPED]";
-    }
-
-    uint32_t low_pc = info.low_pc_;
-    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += info.compiled_method_->CodeDelta();
-    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
-
-    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
-    // instructions, so that disassembler tools can correctly disassemble.
-    // Note that even if we generate just a single mapping symbol, ARM's Streamline
-    // requires it to match function symbol.  Just address 0 does not work.
-    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
-      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
-                    true, 0, STB_LOCAL, STT_NOTYPE);
-        generated_mapping_symbol = true;
-      }
-    }
-  }
-  strtab->End();
-
-  // Symbols are buffered and written after names (because they are smaller).
-  // We could also do two passes in this function to avoid the buffering.
-  symtab->Start();
-  symtab->Write();
-  symtab->End();
-}
-
 // Explicit instantiations
 template class ElfWriterQuick<ElfTypes32>;
 template class ElfWriterQuick<ElfTypes64>;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 15812dc..6859605 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -72,11 +72,18 @@
   ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str()));
 
   const uintptr_t requested_image_base = ART_BASE_ADDRESS;
+  std::unordered_map<const DexFile*, const char*> dex_file_to_oat_filename_map;
+  std::vector<const char*> oat_filename_vector(1, oat_filename.c_str());
+  for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
+    dex_file_to_oat_filename_map.emplace(dex_file, oat_filename.c_str());
+  }
   std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_,
                                                       requested_image_base,
                                                       /*compile_pic*/false,
                                                       /*compile_app_image*/false,
-                                                      storage_mode));
+                                                      storage_mode,
+                                                      oat_filename_vector,
+                                                      dex_file_to_oat_filename_map));
   // TODO: compile_pic should be a test argument.
   {
     {
@@ -131,12 +138,12 @@
   ASSERT_TRUE(dup_oat.get() != nullptr);
 
   {
-    bool success_image = writer->Write(kInvalidImageFd,
-                                       image_file.GetFilename(),
-                                       dup_oat->GetPath(),
-                                       dup_oat->GetPath());
+    std::vector<const char*> dup_oat_filename(1, dup_oat->GetPath().c_str());
+    std::vector<const char*> dup_image_filename(1, image_file.GetFilename().c_str());
+    bool success_image = writer->Write(kInvalidImageFd, dup_image_filename, dup_oat_filename);
     ASSERT_TRUE(success_image);
-    bool success_fixup = ElfWriter::Fixup(dup_oat.get(), writer->GetOatDataBegin());
+    bool success_fixup = ElfWriter::Fixup(dup_oat.get(),
+                                          writer->GetOatDataBegin(dup_oat_filename[0]));
     ASSERT_TRUE(success_fixup);
 
     ASSERT_EQ(dup_oat->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
@@ -181,7 +188,7 @@
   java_lang_dex_file_ = nullptr;
 
   MemMap::Init();
-  std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str()));
+  std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileNames()[0].c_str()));
 
   RuntimeOptions options;
   std::string image("-Ximage:");
@@ -203,10 +210,11 @@
   class_linker_ = runtime_->GetClassLinker();
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  ASSERT_TRUE(heap->HasImageSpace());
+  ASSERT_TRUE(heap->HasBootImageSpace());
   ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
 
-  gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
+  // We loaded the runtime with an explicit image, so it must exist.
+  gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[0];
   ASSERT_TRUE(image_space != nullptr);
   if (storage_mode == ImageHeader::kStorageModeUncompressed) {
     // Uncompressed, image should be smaller than file.
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 9545c83..d0bb201 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -76,23 +76,35 @@
 
 // Return true if an object is already in an image space.
 bool ImageWriter::IsInBootImage(const void* obj) const {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
   if (!compile_app_image_) {
-    DCHECK(boot_image_space_ == nullptr);
+    DCHECK(heap->GetBootImageSpaces().empty());
     return false;
   }
-  const uint8_t* image_begin = boot_image_space_->Begin();
-  // Real image end including ArtMethods and ArtField sections.
-  const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize();
-  return image_begin <= obj && obj < image_end;
+  for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+    const uint8_t* image_begin = boot_image_space->Begin();
+    // Real image end including ArtMethods and ArtField sections.
+    const uint8_t* image_end = image_begin + boot_image_space->GetImageHeader().GetImageSize();
+    if (image_begin <= obj && obj < image_end) {
+      return true;
+    }
+  }
+  return false;
 }
 
 bool ImageWriter::IsInBootOatFile(const void* ptr) const {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
   if (!compile_app_image_) {
-    DCHECK(boot_image_space_ == nullptr);
+    DCHECK(heap->GetBootImageSpaces().empty());
     return false;
   }
-  const ImageHeader& image_header = boot_image_space_->GetImageHeader();
-  return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd();
+  for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+    const ImageHeader& image_header = boot_image_space->GetImageHeader();
+    if (image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd()) {
+      return true;
+    }
+  }
+  return false;
 }
 
 static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
@@ -109,14 +121,6 @@
 bool ImageWriter::PrepareImageAddressSpace() {
   target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
   gc::Heap* const heap = Runtime::Current()->GetHeap();
-  // Cache boot image space.
-    for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
-      if (space->IsImageSpace()) {
-        CHECK(compile_app_image_);
-        CHECK(boot_image_space_ == nullptr) << "Multiple image spaces";
-        boot_image_space_ = space->AsImageSpace();
-      }
-    }
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
@@ -154,147 +158,171 @@
 }
 
 bool ImageWriter::Write(int image_fd,
-                        const std::string& image_filename,
-                        const std::string& oat_filename,
-                        const std::string& oat_location) {
-  CHECK(!image_filename.empty());
+                        const std::vector<const char*>& image_filenames,
+                        const std::vector<const char*>& oat_filenames) {
+  CHECK(!image_filenames.empty());
+  CHECK(!oat_filenames.empty());
+  CHECK_EQ(image_filenames.size(), oat_filenames.size());
 
-  std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
-  if (oat_file.get() == nullptr) {
-    PLOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location;
-    return false;
-  }
-  std::string error_msg;
-  oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, nullptr, &error_msg);
-  if (oat_file_ == nullptr) {
-    PLOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location
-        << ": " << error_msg;
-    oat_file->Erase();
-    return false;
-  }
-  Runtime::Current()->GetOatFileManager().RegisterOatFile(
+  size_t oat_file_offset = 0;
+
+  for (size_t i = 0; i < oat_filenames.size(); ++i) {
+    const char* oat_filename = oat_filenames[i];
+    std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
+    if (oat_file.get() == nullptr) {
+      PLOG(ERROR) << "Failed to open oat file " << oat_filename;
+      return false;
+    }
+    std::string error_msg;
+    oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_filename, nullptr, &error_msg);
+    if (oat_file_ == nullptr) {
+      PLOG(ERROR) << "Failed to open writable oat file " << oat_filename;
+      oat_file->Erase();
+      return false;
+    }
+    Runtime::Current()->GetOatFileManager().RegisterOatFile(
       std::unique_ptr<const OatFile>(oat_file_));
 
-  const OatHeader& oat_header = oat_file_->GetOatHeader();
-  oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] =
-      oat_header.GetInterpreterToInterpreterBridgeOffset();
-  oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] =
-      oat_header.GetInterpreterToCompiledCodeBridgeOffset();
-  oat_address_offsets_[kOatAddressJNIDlsymLookup] =
-      oat_header.GetJniDlsymLookupOffset();
-  oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] =
-      oat_header.GetQuickGenericJniTrampolineOffset();
-  oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] =
-      oat_header.GetQuickImtConflictTrampolineOffset();
-  oat_address_offsets_[kOatAddressQuickResolutionTrampoline] =
-      oat_header.GetQuickResolutionTrampolineOffset();
-  oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
-      oat_header.GetQuickToInterpreterBridgeOffset();
+    const OatHeader& oat_header = oat_file_->GetOatHeader();
+    ImageInfo& image_info = GetImageInfo(oat_filename);
 
-  size_t oat_loaded_size = 0;
-  size_t oat_data_offset = 0;
-  ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset);
+    size_t oat_loaded_size = 0;
+    size_t oat_data_offset = 0;
+    ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset);
+
+    DCHECK_EQ(image_info.oat_offset_, oat_file_offset);
+    oat_file_offset += oat_loaded_size;
+
+    if (i == 0) {
+      // Primary oat file, read the trampolines.
+      image_info.oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] =
+          oat_header.GetInterpreterToInterpreterBridgeOffset();
+      image_info.oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] =
+          oat_header.GetInterpreterToCompiledCodeBridgeOffset();
+      image_info.oat_address_offsets_[kOatAddressJNIDlsymLookup] =
+          oat_header.GetJniDlsymLookupOffset();
+      image_info.oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] =
+          oat_header.GetQuickGenericJniTrampolineOffset();
+      image_info.oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] =
+          oat_header.GetQuickImtConflictTrampolineOffset();
+      image_info.oat_address_offsets_[kOatAddressQuickResolutionTrampoline] =
+          oat_header.GetQuickResolutionTrampolineOffset();
+      image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
+          oat_header.GetQuickToInterpreterBridgeOffset();
+    }
+
+
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      CreateHeader(oat_loaded_size, oat_data_offset);
+      CopyAndFixupNativeData();
+    }
+
+    SetOatChecksumFromElfFile(oat_file.get());
+
+    if (oat_file->FlushCloseOrErase() != 0) {
+      LOG(ERROR) << "Failed to flush and close oat file " << oat_filename;
+      return false;
+    }
+  }
 
   {
-    ScopedObjectAccess soa(Thread::Current());
-    CreateHeader(oat_loaded_size, oat_data_offset);
-    CopyAndFixupNativeData();
     // TODO: heap validation can't handle these fix up passes.
+    ScopedObjectAccess soa(Thread::Current());
     Runtime::Current()->GetHeap()->DisableObjectValidation();
     CopyAndFixupObjects();
   }
 
-  SetOatChecksumFromElfFile(oat_file.get());
-
-  if (oat_file->FlushCloseOrErase() != 0) {
-    LOG(ERROR) << "Failed to flush and close oat file " << oat_filename << " for " << oat_location;
-    return false;
-  }
-  std::unique_ptr<File> image_file;
-  if (image_fd != kInvalidImageFd) {
-    image_file.reset(new File(image_fd, image_filename, unix_file::kCheckSafeUsage));
-  } else {
-    image_file.reset(OS::CreateEmptyFile(image_filename.c_str()));
-  }
-  if (image_file == nullptr) {
-    LOG(ERROR) << "Failed to open image file " << image_filename;
-    return false;
-  }
-  if (fchmod(image_file->Fd(), 0644) != 0) {
-    PLOG(ERROR) << "Failed to make image file world readable: " << image_filename;
-    image_file->Erase();
-    return EXIT_FAILURE;
-  }
-
-  std::unique_ptr<char[]> compressed_data;
-  // Image data size excludes the bitmap and the header.
-  ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  const size_t image_data_size = image_header->GetImageSize() - sizeof(ImageHeader);
-  char* image_data = reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader);
-  size_t data_size;
-  const char* image_data_to_write;
-
-  CHECK_EQ(image_header->storage_mode_, image_storage_mode_);
-  switch (image_storage_mode_) {
-    case ImageHeader::kStorageModeLZ4: {
-      size_t compressed_max_size = LZ4_compressBound(image_data_size);
-      compressed_data.reset(new char[compressed_max_size]);
-      data_size = LZ4_compress(
-          reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader),
-          &compressed_data[0],
-          image_data_size);
-      image_data_to_write = &compressed_data[0];
-      VLOG(compiler) << "Compressed from " << image_data_size << " to " << data_size;
-      break;
+  for (size_t i = 0; i < image_filenames.size(); ++i) {
+    const char* image_filename = image_filenames[i];
+    const char* oat_filename = oat_filenames[i];
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    std::unique_ptr<File> image_file;
+    if (image_fd != kInvalidImageFd) {
+      image_file.reset(new File(image_fd, image_filename, unix_file::kCheckSafeUsage));
+    } else {
+      image_file.reset(OS::CreateEmptyFile(image_filename));
     }
-    case ImageHeader::kStorageModeUncompressed: {
-      data_size = image_data_size;
-      image_data_to_write = image_data;
-      break;
+    if (image_file == nullptr) {
+      LOG(ERROR) << "Failed to open image file " << image_filename;
+      return false;
     }
-    default: {
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    if (fchmod(image_file->Fd(), 0644) != 0) {
+      PLOG(ERROR) << "Failed to make image file world readable: " << image_filename;
+      image_file->Erase();
+      return EXIT_FAILURE;
     }
-  }
 
-  // Write header first, as uncompressed.
-  image_header->data_size_ = data_size;
-  if (!image_file->WriteFully(image_->Begin(), sizeof(ImageHeader))) {
-    PLOG(ERROR) << "Failed to write image file header " << image_filename;
-    image_file->Erase();
-    return false;
-  }
+    std::unique_ptr<char[]> compressed_data;
+    // Image data size excludes the bitmap and the header.
+    ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin());
+    const size_t image_data_size = image_header->GetImageSize() - sizeof(ImageHeader);
+    char* image_data = reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader);
+    size_t data_size;
+    const char* image_data_to_write;
 
-  // Write out the image + fields + methods.
-  const bool is_compressed = compressed_data != nullptr;
-  if (!image_file->WriteFully(image_data_to_write, data_size)) {
-    PLOG(ERROR) << "Failed to write image file data " << image_filename;
-    image_file->Erase();
-    return false;
-  }
+    CHECK_EQ(image_header->storage_mode_, image_storage_mode_);
+    switch (image_storage_mode_) {
+      case ImageHeader::kStorageModeLZ4: {
+        size_t compressed_max_size = LZ4_compressBound(image_data_size);
+        compressed_data.reset(new char[compressed_max_size]);
+        data_size = LZ4_compress(
+            reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader),
+            &compressed_data[0],
+            image_data_size);
+        image_data_to_write = &compressed_data[0];
+        VLOG(compiler) << "Compressed from " << image_data_size << " to " << data_size;
+        break;
+      }
+      case ImageHeader::kStorageModeUncompressed: {
+        data_size = image_data_size;
+        image_data_to_write = image_data;
+        break;
+      }
+      default: {
+        LOG(FATAL) << "Unsupported";
+        UNREACHABLE();
+      }
+    }
 
-  // Write out the image bitmap at the page aligned start of the image end, also uncompressed for
-  // convenience.
-  const ImageSection& bitmap_section = image_header->GetImageSection(
-      ImageHeader::kSectionImageBitmap);
-  // Align up since data size may be unaligned if the image is compressed.
-  size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize);
-  if (!is_compressed) {
-    CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset());
-  }
-  if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()),
-                         bitmap_section.Size(),
-                         bitmap_position_in_file)) {
-    PLOG(ERROR) << "Failed to write image file " << image_filename;
-    image_file->Erase();
-    return false;
-  }
-  CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(),
-           static_cast<size_t>(image_file->GetLength()));
-  if (image_file->FlushCloseOrErase() != 0) {
-    PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
-    return false;
+    // Write header first, as uncompressed.
+    image_header->data_size_ = data_size;
+    if (!image_file->WriteFully(image_info.image_->Begin(), sizeof(ImageHeader))) {
+      PLOG(ERROR) << "Failed to write image file header " << image_filename;
+      image_file->Erase();
+      return false;
+    }
+
+    // Write out the image + fields + methods.
+    const bool is_compressed = compressed_data != nullptr;
+    if (!image_file->WriteFully(image_data_to_write, data_size)) {
+      PLOG(ERROR) << "Failed to write image file data " << image_filename;
+      image_file->Erase();
+      return false;
+    }
+
+    // Write out the image bitmap at the page aligned start of the image end, also uncompressed for
+    // convenience.
+    const ImageSection& bitmap_section = image_header->GetImageSection(
+        ImageHeader::kSectionImageBitmap);
+    // Align up since data size may be unaligned if the image is compressed.
+    size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize);
+    if (!is_compressed) {
+      CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset());
+    }
+    if (!image_file->Write(reinterpret_cast<char*>(image_info.image_bitmap_->Begin()),
+                           bitmap_section.Size(),
+                           bitmap_position_in_file)) {
+      PLOG(ERROR) << "Failed to write image file " << image_filename;
+      image_file->Erase();
+      return false;
+    }
+    CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(),
+             static_cast<size_t>(image_file->GetLength()));
+    if (image_file->FlushCloseOrErase() != 0) {
+      PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
+      return false;
+    }
   }
   return true;
 }
@@ -319,12 +347,14 @@
   DCHECK(object != nullptr);
   DCHECK_NE(image_objects_offset_begin_, 0u);
 
-  size_t bin_slot_offset = bin_slot_offsets_[bin_slot.GetBin()];
+  const char* oat_filename = GetOatFilename(object);
+  ImageInfo& image_info = GetImageInfo(oat_filename);
+  size_t bin_slot_offset = image_info.bin_slot_offsets_[bin_slot.GetBin()];
   size_t new_offset = bin_slot_offset + bin_slot.GetIndex();
   DCHECK_ALIGNED(new_offset, kObjectAlignment);
 
   SetImageOffset(object, new_offset);
-  DCHECK_LT(new_offset, image_end_);
+  DCHECK_LT(new_offset, image_info.image_end_);
 }
 
 bool ImageWriter::IsImageOffsetAssigned(mirror::Object* object) const {
@@ -338,7 +368,9 @@
   DCHECK(IsImageOffsetAssigned(object));
   LockWord lock_word = object->GetLockWord(false);
   size_t offset = lock_word.ForwardingAddress();
-  DCHECK_LT(offset, image_end_);
+  const char* oat_filename = GetOatFilename(object);
+  const ImageInfo& image_info = GetConstImageInfo(oat_filename);
+  DCHECK_LT(offset, image_info.image_end_);
   return offset;
 }
 
@@ -377,15 +409,16 @@
 
 void ImageWriter::PrepareDexCacheArraySlots() {
   // Prepare dex cache array starts based on the ordering specified in the CompilerDriver.
-  uint32_t size = 0u;
-  for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) {
-    dex_cache_array_starts_.Put(dex_file, size);
-    DexCacheArraysLayout layout(target_ptr_size_, dex_file);
-    size += layout.Size();
-  }
   // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
   // when AssignImageBinSlot() assigns their indexes out or order.
-  bin_slot_sizes_[kBinDexCacheArray] = size;
+  for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) {
+    auto it = dex_file_oat_filename_map_.find(dex_file);
+    DCHECK(it != dex_file_oat_filename_map_.end()) << dex_file->GetLocation();
+    ImageInfo& image_info = GetImageInfo(it->second);
+    image_info.dex_cache_array_starts_.Put(dex_file, image_info.bin_slot_sizes_[kBinDexCacheArray]);
+    DexCacheArraysLayout layout(target_ptr_size_, dex_file);
+    image_info.bin_slot_sizes_[kBinDexCacheArray] += layout.Size();
+  }
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* const self = Thread::Current();
@@ -399,24 +432,32 @@
     const DexFile* dex_file = dex_cache->GetDexFile();
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
     DCHECK(layout.Valid());
-    uint32_t start = dex_cache_array_starts_.Get(dex_file);
+    const char* oat_filename = GetOatFilenameForDexCache(dex_cache);
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    uint32_t start = image_info.dex_cache_array_starts_.Get(dex_file);
     DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), start + layout.TypesOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(),
+                               start + layout.TypesOffset(),
+                               dex_cache);
     DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), start + layout.MethodsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(),
+                               start + layout.MethodsOffset(),
+                               dex_cache);
     DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), start + layout.FieldsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(),
+                               start + layout.FieldsOffset(),
+                               dex_cache);
     DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset(), dex_cache);
   }
 }
 
-void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) {
+void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset, DexCache* dex_cache) {
   if (array != nullptr) {
     DCHECK(!IsInBootImage(array));
-    native_object_relocations_.emplace(
-        array,
-        NativeObjectRelocation { offset, kNativeObjectRelocationTypeDexCacheArray });
+    const char* oat_filename = GetOatFilenameForDexCache(dex_cache);
+    native_object_relocations_.emplace(array,
+        NativeObjectRelocation { oat_filename, offset, kNativeObjectRelocationTypeDexCacheArray });
   }
 }
 
@@ -531,18 +572,21 @@
     }  // else bin = kBinRegular
   }
 
+  const char* oat_filename = GetOatFilename(object);
+  ImageInfo& image_info = GetImageInfo(oat_filename);
+
   size_t offset_delta = RoundUp(object_size, kObjectAlignment);  // 64-bit alignment
-  current_offset = bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
-  // Move the current bin size up to accomodate the object we just assigned a bin slot.
-  bin_slot_sizes_[bin] += offset_delta;
+  current_offset = image_info.bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
+  // Move the current bin size up to accommodate the object we just assigned a bin slot.
+  image_info.bin_slot_sizes_[bin] += offset_delta;
 
   BinSlot new_bin_slot(bin, current_offset);
   SetImageBinSlot(object, new_bin_slot);
 
-  ++bin_slot_count_[bin];
+  ++image_info.bin_slot_count_[bin];
 
   // Grow the image closer to the end by the object we just assigned.
-  image_end_ += offset_delta;
+  image_info.image_end_ += offset_delta;
 }
 
 bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const {
@@ -565,7 +609,9 @@
     LockWord lock_word = object->GetLockWord(false);
     size_t offset = lock_word.ForwardingAddress();
     BinSlot bin_slot(offset);
-    DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()])
+    const char* oat_filename = GetOatFilename(object);
+    const ImageInfo& image_info = GetConstImageInfo(oat_filename);
+    DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()])
         << "bin slot offset should not exceed the size of that bin";
   }
   return true;
@@ -580,39 +626,42 @@
   DCHECK_LE(offset, std::numeric_limits<uint32_t>::max());
 
   BinSlot bin_slot(static_cast<uint32_t>(offset));
-  DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()]);
+  const char* oat_filename = GetOatFilename(object);
+  const ImageInfo& image_info = GetConstImageInfo(oat_filename);
+  DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()]);
 
   return bin_slot;
 }
 
 bool ImageWriter::AllocMemory() {
-  const size_t length = RoundUp(image_objects_offset_begin_ +
-                                    GetBinSizeSum() +
-                                    intern_table_bytes_ +
-                                    class_table_bytes_,
-                                kPageSize);
-  std::string error_msg;
-  image_.reset(MemMap::MapAnonymous("image writer image",
-                                    nullptr,
-                                    length,
-                                    PROT_READ | PROT_WRITE,
-                                    false,
-                                    false,
-                                    &error_msg));
-  if (UNLIKELY(image_.get() == nullptr)) {
-    LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
-    return false;
-  }
+  for (const char* oat_filename : oat_filenames_) {
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    ImageSection unused_sections[ImageHeader::kSectionCount];
+    const size_t length = RoundUp(
+        image_info.CreateImageSections(target_ptr_size_, unused_sections),
+        kPageSize);
 
-  // Create the image bitmap, only needs to cover mirror object section which is up to image_end_.
-  CHECK_LE(image_end_, length);
-  image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create(
-      "image bitmap",
-      image_->Begin(),
-      RoundUp(image_end_, kPageSize)));
-  if (image_bitmap_.get() == nullptr) {
-    LOG(ERROR) << "Failed to allocate memory for image bitmap";
-    return false;
+    std::string error_msg;
+    image_info.image_.reset(MemMap::MapAnonymous("image writer image",
+                                                 nullptr,
+                                                 length,
+                                                 PROT_READ | PROT_WRITE,
+                                                 false,
+                                                 false,
+                                                 &error_msg));
+    if (UNLIKELY(image_info.image_.get() == nullptr)) {
+      LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
+      return false;
+    }
+
+    // Create the image bitmap, only needs to cover mirror object section which is up to image_end_.
+    CHECK_LE(image_info.image_end_, length);
+    image_info.image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create(
+        "image bitmap", image_info.image_->Begin(), RoundUp(image_info.image_end_, kPageSize)));
+    if (image_info.image_bitmap_.get() == nullptr) {
+      LOG(ERROR) << "Failed to allocate memory for image bitmap";
+      return false;
+    }
   }
   return true;
 }
@@ -861,14 +910,17 @@
   DCHECK(obj != nullptr);
   // if it is a string, we want to intern it if its not interned.
   if (obj->GetClass()->IsStringClass()) {
+    const char* oat_filename = GetOatFilename(obj);
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+
     // we must be an interned string that was forward referenced and already assigned
     if (IsImageBinSlotAssigned(obj)) {
-      DCHECK_EQ(obj, obj->AsString()->Intern());
+      DCHECK_EQ(obj, image_info.intern_table_->InternStrongImageString(obj->AsString()));
       return;
     }
     // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since
     // we are guaranteed to not have GC during image writing.
-    mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrongImageString(
+    mirror::String* const interned = image_info.intern_table_->InternStrongImageString(
         obj->AsString());
     if (obj != interned) {
       if (!IsImageBinSlotAssigned(interned)) {
@@ -885,7 +937,7 @@
   AssignImageBinSlot(obj);
 }
 
-ObjectArray<Object>* ImageWriter::CreateImageRoots() const {
+ObjectArray<Object>* ImageWriter::CreateImageRoots(const char* oat_filename) const {
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
@@ -893,6 +945,15 @@
   Handle<Class> object_array_class(hs.NewHandle(
       class_linker->FindSystemClass(self, "[Ljava/lang/Object;")));
 
+  std::unordered_set<const DexFile*> image_dex_files;
+  for (auto& pair : dex_file_oat_filename_map_) {
+    const DexFile* image_dex_file = pair.first;
+    const char* image_oat_filename = pair.second;
+    if (strcmp(oat_filename, image_oat_filename) == 0) {
+      image_dex_files.insert(image_dex_file);
+    }
+  }
+
   // build an Object[] of all the DexCaches used in the source_space_.
   // Since we can't hold the dex lock when allocating the dex_caches
   // ObjectArray, we lock the dex lock twice, first to get the number
@@ -905,7 +966,10 @@
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
           down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
-      dex_cache_count += IsInBootImage(dex_cache) ? 0u : 1u;
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      if (!IsInBootImage(dex_cache)) {
+        dex_cache_count += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
+      }
     }
   }
   Handle<ObjectArray<Object>> dex_caches(
@@ -918,7 +982,10 @@
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
           down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
-      non_image_dex_caches += IsInBootImage(dex_cache) ? 0u : 1u;
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      if (!IsInBootImage(dex_cache)) {
+        non_image_dex_caches += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
+      }
     }
     CHECK_EQ(dex_cache_count, non_image_dex_caches)
         << "The number of non-image dex caches changed.";
@@ -926,7 +993,8 @@
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
           down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
-      if (!IsInBootImage(dex_cache)) {
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      if (!IsInBootImage(dex_cache) && image_dex_files.find(dex_file) != image_dex_files.end()) {
         dex_caches->Set<false>(i, dex_cache);
         ++i;
       }
@@ -997,9 +1065,19 @@
       }
       // Visit and assign offsets for fields and field arrays.
       auto* as_klass = h_obj->AsClass();
+      mirror::DexCache* dex_cache = as_klass->GetDexCache();
       LengthPrefixedArray<ArtField>* fields[] = {
           as_klass->GetSFieldsPtr(), as_klass->GetIFieldsPtr(),
       };
+      const char* oat_file = GetOatFilenameForDexCache(dex_cache);
+      ImageInfo& image_info = GetImageInfo(oat_file);
+      {
+        // Note: This table is only accessed from the image writer, so the lock is technically
+        // unnecessary.
+        WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+        // Insert in the class table for this iamge.
+        image_info.class_table_->Insert(as_klass);
+      }
       for (LengthPrefixedArray<ArtField>* cur_fields : fields) {
         // Total array length including header.
         if (cur_fields != nullptr) {
@@ -1008,11 +1086,10 @@
           auto it = native_object_relocations_.find(cur_fields);
           CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields
                                                   << " already forwarded";
-          size_t& offset = bin_slot_sizes_[kBinArtField];
+          size_t& offset = image_info.bin_slot_sizes_[kBinArtField];
           DCHECK(!IsInBootImage(cur_fields));
-          native_object_relocations_.emplace(
-              cur_fields,
-              NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtFieldArray });
+          native_object_relocations_.emplace(cur_fields,
+              NativeObjectRelocation {oat_file, offset, kNativeObjectRelocationTypeArtFieldArray });
           offset += header_size;
           // Forward individual fields so that we can quickly find where they belong.
           for (size_t i = 0, count = cur_fields->size(); i < count; ++i) {
@@ -1022,9 +1099,8 @@
             CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i
                 << " already assigned " << PrettyField(field) << " static=" << field->IsStatic();
             DCHECK(!IsInBootImage(field));
-            native_object_relocations_.emplace(
-                field,
-                NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField });
+            native_object_relocations_.emplace(field,
+                NativeObjectRelocation {oat_file, offset, kNativeObjectRelocationTypeArtField });
             offset += sizeof(ArtField);
           }
         }
@@ -1053,17 +1129,17 @@
         auto it = native_object_relocations_.find(array);
         CHECK(it == native_object_relocations_.end())
             << "Method array " << array << " already forwarded";
-        size_t& offset = bin_slot_sizes_[bin_type];
+        size_t& offset = image_info.bin_slot_sizes_[bin_type];
         DCHECK(!IsInBootImage(array));
-        native_object_relocations_.emplace(
-            array, NativeObjectRelocation {
-              offset,
-              any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty
-                        : kNativeObjectRelocationTypeArtMethodArrayClean
-            });
+        native_object_relocations_.emplace(array,
+            NativeObjectRelocation {
+                oat_file,
+                offset,
+                any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty
+                          : kNativeObjectRelocationTypeArtMethodArrayClean });
         offset += header_size;
         for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
-          AssignMethodOffset(&m, type);
+          AssignMethodOffset(&m, type, oat_file);
         }
         (any_dirty ? dirty_methods_ : clean_methods_) += num_methods;
       }
@@ -1089,13 +1165,16 @@
   }
 }
 
-void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) {
+void ImageWriter::AssignMethodOffset(ArtMethod* method,
+                                     NativeObjectRelocationType type,
+                                     const char* oat_filename) {
   DCHECK(!IsInBootImage(method));
   auto it = native_object_relocations_.find(method);
   CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned "
       << PrettyMethod(method);
-  size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(type)];
-  native_object_relocations_.emplace(method, NativeObjectRelocation { offset, type });
+  ImageInfo& image_info = GetImageInfo(oat_filename);
+  size_t& offset = image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(type)];
+  native_object_relocations_.emplace(method, NativeObjectRelocation { oat_filename, offset, type });
   offset += ArtMethod::Size(target_ptr_size_);
 }
 
@@ -1128,18 +1207,20 @@
 
 void ImageWriter::CalculateNewObjectOffsets() {
   Thread* const self = Thread::Current();
-  StackHandleScope<1> hs(self);
-  Handle<ObjectArray<Object>> image_roots(hs.NewHandle(CreateImageRoots()));
+  StackHandleScopeCollection handles(self);
+  std::vector<Handle<ObjectArray<Object>>> image_roots;
+  for (const char* oat_filename : oat_filenames_) {
+    std::string image_filename = oat_filename;
+    image_roots.push_back(handles.NewHandle(CreateImageRoots(image_filename.c_str())));
+  }
 
   auto* runtime = Runtime::Current();
   auto* heap = runtime->GetHeap();
-  DCHECK_EQ(0U, image_end_);
 
   // Leave space for the header, but do not write it yet, we need to
   // know where image_roots is going to end up
-  image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
+  image_objects_offset_begin_ = RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
-  image_objects_offset_begin_ = image_end_;
   // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
   heap->VisitObjects(WalkFieldsCallback, this);
   // Write the image runtime methods.
@@ -1156,10 +1237,12 @@
   const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean;
   auto it = native_object_relocations_.find(&image_method_array_);
   CHECK(it == native_object_relocations_.end());
-  size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)];
+  ImageInfo& default_image_info = GetImageInfo(default_oat_filename_);
+  size_t& offset =
+      default_image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)];
   if (!compile_app_image_) {
     native_object_relocations_.emplace(&image_method_array_,
-                                       NativeObjectRelocation { offset, image_method_type });
+        NativeObjectRelocation { default_oat_filename_, offset, image_method_type });
   }
   size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
   const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize(
@@ -1171,153 +1254,182 @@
     CHECK(m->IsRuntimeMethod());
     DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image";
     if (!IsInBootImage(m)) {
-      AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean);
+      AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean, default_oat_filename_);
     }
   }
   // Calculate size of the dex cache arrays slot and prepare offsets.
   PrepareDexCacheArraySlots();
 
-  // Calculate bin slot offsets.
-  size_t bin_offset = image_objects_offset_begin_;
-  for (size_t i = 0; i != kBinSize; ++i) {
-    bin_slot_offsets_[i] = bin_offset;
-    bin_offset += bin_slot_sizes_[i];
-    if (i == kBinArtField) {
-      static_assert(kBinArtField + 1 == kBinArtMethodClean, "Methods follow fields.");
-      static_assert(alignof(ArtField) == 4u, "ArtField alignment is 4.");
-      DCHECK_ALIGNED(bin_offset, 4u);
-      DCHECK(method_alignment == 4u || method_alignment == 8u);
-      bin_offset = RoundUp(bin_offset, method_alignment);
-    }
+  // Calculate the sizes of the intern tables and class tables.
+  for (const char* oat_filename : oat_filenames_) {
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    // Calculate how big the intern table will be after being serialized.
+    InternTable* const intern_table = image_info.intern_table_.get();
+    CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
+    image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
+    // Calculate the size of the class table.
+    ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
   }
-  // NOTE: There may be additional padding between the bin slots and the intern table.
 
-  DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
+  // Calculate bin slot offsets.
+  for (const char* oat_filename : oat_filenames_) {
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    size_t bin_offset = image_objects_offset_begin_;
+    for (size_t i = 0; i != kBinSize; ++i) {
+      image_info.bin_slot_offsets_[i] = bin_offset;
+      bin_offset += image_info.bin_slot_sizes_[i];
+      if (i == kBinArtField) {
+        static_assert(kBinArtField + 1 == kBinArtMethodClean, "Methods follow fields.");
+        static_assert(alignof(ArtField) == 4u, "ArtField alignment is 4.");
+        DCHECK_ALIGNED(bin_offset, 4u);
+        DCHECK(method_alignment == 4u || method_alignment == 8u);
+        bin_offset = RoundUp(bin_offset, method_alignment);
+      }
+    }
+    // NOTE: There may be additional padding between the bin slots and the intern table.
+    DCHECK_EQ(image_info.image_end_,
+              GetBinSizeSum(image_info, kBinMirrorCount) + image_objects_offset_begin_);
+  }
+
+  // Calculate image offsets.
+  size_t image_offset = 0;
+  for (const char* oat_filename : oat_filenames_) {
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    image_info.image_begin_ = global_image_begin_ + image_offset;
+    image_info.image_offset_ = image_offset;
+    ImageSection unused_sections[ImageHeader::kSectionCount];
+    image_info.image_size_ = RoundUp(
+        image_info.CreateImageSections(target_ptr_size_, unused_sections),
+        kPageSize);
+    // There should be no gaps until the next image.
+    image_offset += image_info.image_size_;
+  }
 
   // Transform each object's bin slot into an offset which will be used to do the final copy.
   heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
 
-  DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
+  // DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
 
-  image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots.Get()));
+  size_t i = 0;
+  for (const char* oat_filename : oat_filenames_) {
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    image_info.image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots[i].Get()));
+    i++;
+  }
 
   // Update the native relocations by adding their bin sums.
   for (auto& pair : native_object_relocations_) {
     NativeObjectRelocation& relocation = pair.second;
     Bin bin_type = BinTypeForNativeRelocationType(relocation.type);
-    relocation.offset += bin_slot_offsets_[bin_type];
+    ImageInfo& image_info = GetImageInfo(relocation.oat_filename);
+    relocation.offset += image_info.bin_slot_offsets_[bin_type];
   }
 
-  // Calculate how big the intern table will be after being serialized.
-  InternTable* const intern_table = runtime->GetInternTable();
-  CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
-  intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
-
-  // Write out the class table.
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  if (boot_image_space_ == nullptr) {
-    // Compiling the boot image, add null class loader.
-    class_loaders_.insert(nullptr);
-  }
-  // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image
-  // with no classes.
-  if (class_loaders_.size() == 1u) {
-    // Only write the class table if we have exactly one class loader. There may be cases where
-    // there are multiple class loaders if a class path is passed to dex2oat.
-    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    for (mirror::ClassLoader* loader : class_loaders_) {
-      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
-      CHECK(table != nullptr);
-      class_table_bytes_ += table->WriteToMemory(nullptr);
-    }
-  }
-
-  // Note that image_end_ is left at end of used mirror object section.
+  // Note that image_info.image_end_ is left at end of used mirror object section.
 }
 
-void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
-  CHECK_NE(0U, oat_loaded_size);
-  const uint8_t* oat_file_begin = GetOatFileBegin();
-  const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
-  oat_data_begin_ = oat_file_begin + oat_data_offset;
-  const uint8_t* oat_data_end = oat_data_begin_ + oat_file_->Size();
-
-  // Create the image sections.
-  ImageSection sections[ImageHeader::kSectionCount];
+size_t ImageWriter::ImageInfo::CreateImageSections(size_t target_ptr_size,
+                                                   ImageSection* out_sections) const {
+  DCHECK(out_sections != nullptr);
   // Objects section
-  auto* objects_section = &sections[ImageHeader::kSectionObjects];
+  auto* objects_section = &out_sections[ImageHeader::kSectionObjects];
   *objects_section = ImageSection(0u, image_end_);
   size_t cur_pos = objects_section->End();
   // Add field section.
-  auto* field_section = &sections[ImageHeader::kSectionArtFields];
+  auto* field_section = &out_sections[ImageHeader::kSectionArtFields];
   *field_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtField]);
   CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset());
   cur_pos = field_section->End();
   // Round up to the alignment the required by the method section.
-  cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size_));
+  cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size));
   // Add method section.
-  auto* methods_section = &sections[ImageHeader::kSectionArtMethods];
+  auto* methods_section = &out_sections[ImageHeader::kSectionArtMethods];
   *methods_section = ImageSection(cur_pos,
                                   bin_slot_sizes_[kBinArtMethodClean] +
                                       bin_slot_sizes_[kBinArtMethodDirty]);
   CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
   cur_pos = methods_section->End();
   // Add dex cache arrays section.
-  auto* dex_cache_arrays_section = &sections[ImageHeader::kSectionDexCacheArrays];
+  auto* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
   *dex_cache_arrays_section = ImageSection(cur_pos, bin_slot_sizes_[kBinDexCacheArray]);
   CHECK_EQ(bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset());
   cur_pos = dex_cache_arrays_section->End();
   // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
   cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
   // Calculate the size of the interned strings.
-  auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
+  auto* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings];
   *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
   cur_pos = interned_strings_section->End();
   // Round up to the alignment the class table expects. See HashSet::WriteToMemory.
   cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
   // Calculate the size of the class table section.
-  auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
+  auto* class_table_section = &out_sections[ImageHeader::kSectionClassTable];
   *class_table_section = ImageSection(cur_pos, class_table_bytes_);
   cur_pos = class_table_section->End();
   // Image end goes right before the start of the image bitmap.
-  const size_t image_end = static_cast<uint32_t>(cur_pos);
+  return cur_pos;
+}
+
+void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
+  CHECK_NE(0U, oat_loaded_size);
+  const char* oat_filename = oat_file_->GetLocation().c_str();
+  ImageInfo& image_info = GetImageInfo(oat_filename);
+  const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename);
+  const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
+  image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset;
+  const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size();
+  image_info.oat_size_ = oat_file_->Size();
+
+  // Create the image sections.
+  ImageSection sections[ImageHeader::kSectionCount];
+  const size_t image_end = image_info.CreateImageSections(target_ptr_size_, sections);
+
   // Finally bitmap section.
-  const size_t bitmap_bytes = image_bitmap_->Size();
+  const size_t bitmap_bytes = image_info.image_bitmap_->Size();
   auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
-  *bitmap_section = ImageSection(RoundUp(cur_pos, kPageSize), RoundUp(bitmap_bytes, kPageSize));
-  cur_pos = bitmap_section->End();
-  if (kIsDebugBuild) {
+  *bitmap_section = ImageSection(RoundUp(image_end, kPageSize), RoundUp(bitmap_bytes, kPageSize));
+  if (VLOG_IS_ON(compiler)) {
+    LOG(INFO) << "Creating header for " << oat_filename;
     size_t idx = 0;
     for (const ImageSection& section : sections) {
       LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section;
       ++idx;
     }
     LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_;
+    LOG(INFO) << "Image roots address=" << std::hex << image_info.image_roots_address_ << std::dec;
+    LOG(INFO) << "Image begin=" << std::hex << reinterpret_cast<uintptr_t>(global_image_begin_)
+              << " Image offset=" << image_info.image_offset_ << std::dec;
+    LOG(INFO) << "Oat file begin=" << std::hex << reinterpret_cast<uintptr_t>(oat_file_begin)
+              << " Oat data begin=" << reinterpret_cast<uintptr_t>(image_info.oat_data_begin_)
+              << " Oat data end=" << reinterpret_cast<uintptr_t>(oat_data_end)
+              << " Oat file end=" << reinterpret_cast<uintptr_t>(oat_file_end);
   }
-  CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
-      "Oat file should be right after the image.";
+
   // Create the header, leave 0 for data size since we will fill this in as we are writing the
   // image.
-  new (image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_begin_),
-                                                          image_end,
-                                                          sections,
-                                                          image_roots_address_,
-                                                          oat_file_->GetOatHeader().GetChecksum(),
-                                                          PointerToLowMemUInt32(oat_file_begin),
-                                                          PointerToLowMemUInt32(oat_data_begin_),
-                                                          PointerToLowMemUInt32(oat_data_end),
-                                                          PointerToLowMemUInt32(oat_file_end),
-                                                          target_ptr_size_,
-                                                          compile_pic_,
-                                                          image_storage_mode_,
-                                                          /*data_size*/0u);
+  new (image_info.image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_info.image_begin_),
+                                               image_end,
+                                               sections,
+                                               image_info.image_roots_address_,
+                                               oat_file_->GetOatHeader().GetChecksum(),
+                                               PointerToLowMemUInt32(oat_file_begin),
+                                               PointerToLowMemUInt32(image_info.oat_data_begin_),
+                                               PointerToLowMemUInt32(oat_data_end),
+                                               PointerToLowMemUInt32(oat_file_end),
+                                               target_ptr_size_,
+                                               compile_pic_,
+                                               image_storage_mode_,
+                                               /*data_size*/0u);
 }
 
 ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) {
   auto it = native_object_relocations_.find(method);
   CHECK(it != native_object_relocations_.end()) << PrettyMethod(method) << " @ " << method;
-  CHECK_GE(it->second.offset, image_end_) << "ArtMethods should be after Objects";
-  return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
+  const char* oat_filename = GetOatFilename(method->GetDexCache());
+  ImageInfo& image_info = GetImageInfo(oat_filename);
+  CHECK_GE(it->second.offset, image_info.image_end_) << "ArtMethods should be after Objects";
+  return reinterpret_cast<ArtMethod*>(image_info.image_begin_ + it->second.offset);
 }
 
 class FixupRootVisitor : public RootVisitor {
@@ -1328,7 +1440,7 @@
   void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     for (size_t i = 0; i < count; ++i) {
-      *roots[i] = ImageAddress(*roots[i]);
+      *roots[i] = image_writer_->GetImageAddress(*roots[i]);
     }
   }
 
@@ -1336,27 +1448,26 @@
                   const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     for (size_t i = 0; i < count; ++i) {
-      roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr()));
+      roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr()));
     }
   }
 
  private:
   ImageWriter* const image_writer_;
-
-  mirror::Object* ImageAddress(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
-    const size_t offset = image_writer_->GetImageOffset(obj);
-    auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset);
-    VLOG(compiler) << "Update root from " << obj << " to " << dest;
-    return dest;
-  }
 };
 
 void ImageWriter::CopyAndFixupNativeData() {
+  const char* oat_filename = oat_file_->GetLocation().c_str();
+  ImageInfo& image_info = GetImageInfo(oat_filename);
   // Copy ArtFields and methods to their locations and update the array for convenience.
   for (auto& pair : native_object_relocations_) {
     NativeObjectRelocation& relocation = pair.second;
-    auto* dest = image_->Begin() + relocation.offset;
-    DCHECK_GE(dest, image_->Begin() + image_end_);
+    // Only work with fields and methods that are in the current oat file.
+    if (strcmp(relocation.oat_filename, oat_filename) != 0) {
+      continue;
+    }
+    auto* dest = image_info.image_->Begin() + relocation.offset;
+    DCHECK_GE(dest, image_info.image_->Begin() + image_info.image_end_);
     DCHECK(!IsInBootImage(pair.first));
     switch (relocation.type) {
       case kNativeObjectRelocationTypeArtField: {
@@ -1368,7 +1479,8 @@
       case kNativeObjectRelocationTypeArtMethodClean:
       case kNativeObjectRelocationTypeArtMethodDirty: {
         CopyAndFixupMethod(reinterpret_cast<ArtMethod*>(pair.first),
-                           reinterpret_cast<ArtMethod*>(dest));
+                           reinterpret_cast<ArtMethod*>(dest),
+                           image_info);
         break;
       }
       // For arrays, copy just the header since the elements will get copied by their corresponding
@@ -1391,67 +1503,69 @@
     }
   }
   // Fixup the image method roots.
-  auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
+  auto* image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin());
   const ImageSection& methods_section = image_header->GetMethodsSection();
   for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) {
     ArtMethod* method = image_methods_[i];
     CHECK(method != nullptr);
+    // Only place runtime methods in the image of the default oat file.
+    if (method->IsRuntimeMethod() && strcmp(default_oat_filename_, oat_filename) != 0) {
+      continue;
+    }
     if (!IsInBootImage(method)) {
       auto it = native_object_relocations_.find(method);
-      CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(method);
+      CHECK(it != native_object_relocations_.end()) << "No forwarding for " << PrettyMethod(method);
       NativeObjectRelocation& relocation = it->second;
       CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in "
           << methods_section;
       CHECK(relocation.IsArtMethodRelocation()) << relocation.type;
-      method = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
+      method = reinterpret_cast<ArtMethod*>(global_image_begin_ + it->second.offset);
     }
     image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
   FixupRootVisitor root_visitor(this);
 
   // Write the intern table into the image.
-  const ImageSection& intern_table_section = image_header->GetImageSection(
-      ImageHeader::kSectionInternedStrings);
-  Runtime* const runtime = Runtime::Current();
-  InternTable* const intern_table = runtime->GetInternTable();
-  uint8_t* const intern_table_memory_ptr = image_->Begin() + intern_table_section.Offset();
-  const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
-  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
-  // Fixup the pointers in the newly written intern table to contain image addresses.
-  InternTable temp_intern_table;
-  // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
-  // the VisitRoots() will update the memory directly rather than the copies.
-  // This also relies on visit roots not doing any verification which could fail after we update
-  // the roots to be the image addresses.
-  temp_intern_table.ReadFromMemory(intern_table_memory_ptr);
-  CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
-  temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
-
+  if (image_info.intern_table_bytes_ > 0) {
+    const ImageSection& intern_table_section = image_header->GetImageSection(
+        ImageHeader::kSectionInternedStrings);
+    InternTable* const intern_table = image_info.intern_table_.get();
+    uint8_t* const intern_table_memory_ptr =
+        image_info.image_->Begin() + intern_table_section.Offset();
+    const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
+    CHECK_EQ(intern_table_bytes, image_info.intern_table_bytes_);
+    // Fixup the pointers in the newly written intern table to contain image addresses.
+    InternTable temp_intern_table;
+    // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
+    // the VisitRoots() will update the memory directly rather than the copies.
+    // This also relies on visit roots not doing any verification which could fail after we update
+    // the roots to be the image addresses.
+    temp_intern_table.AddTableFromMemory(intern_table_memory_ptr);
+    CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
+    temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
+  }
   // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
   // class loaders. Writing multiple class tables into the image is currently unsupported.
-  if (class_table_bytes_ > 0u) {
-    ClassLinker* const class_linker = runtime->GetClassLinker();
+  if (image_info.class_table_bytes_ > 0u) {
     const ImageSection& class_table_section = image_header->GetImageSection(
         ImageHeader::kSectionClassTable);
-    uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
+    uint8_t* const class_table_memory_ptr =
+        image_info.image_->Begin() + class_table_section.Offset();
     ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    size_t class_table_bytes = 0;
-    for (mirror::ClassLoader* loader : class_loaders_) {
-      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
-      CHECK(table != nullptr);
-      uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
-      class_table_bytes += table->WriteToMemory(memory_ptr);
-      // Fixup the pointers in the newly written class table to contain image addresses. See
-      // above comment for intern tables.
-      ClassTable temp_class_table;
-      temp_class_table.ReadFromMemory(memory_ptr);
-      CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
-               table->NumZygoteClasses());
-      BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
-                                                                      RootInfo(kRootUnknown));
-      temp_class_table.VisitRoots(buffered_visitor);
-    }
-    CHECK_EQ(class_table_bytes, class_table_bytes_);
+
+    ClassTable* table = image_info.class_table_.get();
+    CHECK(table != nullptr);
+    const size_t class_table_bytes = table->WriteToMemory(class_table_memory_ptr);
+    CHECK_EQ(class_table_bytes, image_info.class_table_bytes_);
+    // Fixup the pointers in the newly written class table to contain image addresses. See
+    // above comment for intern tables.
+    ClassTable temp_class_table;
+    temp_class_table.ReadFromMemory(class_table_memory_ptr);
+    CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
+             table->NumZygoteClasses());
+    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                    RootInfo(kRootUnknown));
+    temp_class_table.VisitRoots(buffered_visitor);
   }
 }
 
@@ -1500,7 +1614,8 @@
         }
         UNREACHABLE();
       } else {
-        elem = image_begin_ + it->second.offset;
+        ImageInfo& image_info = GetImageInfo(it->second.oat_filename);
+        elem = image_info.image_begin_ + it->second.offset;
       }
     }
     dest_array->SetElementPtrSize<false, true>(i, elem, target_ptr_size_);
@@ -1512,14 +1627,16 @@
     return;
   }
   size_t offset = GetImageOffset(obj);
-  auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset);
-  DCHECK_LT(offset, image_end_);
+  const char* oat_filename = GetOatFilename(obj);
+  ImageInfo& image_info = GetImageInfo(oat_filename);
+  auto* dst = reinterpret_cast<Object*>(image_info.image_->Begin() + offset);
+  DCHECK_LT(offset, image_info.image_end_);
   const auto* src = reinterpret_cast<const uint8_t*>(obj);
 
-  image_bitmap_->Set(dst);  // Mark the obj as live.
+  image_info.image_bitmap_->Set(dst);  // Mark the obj as live.
 
   const size_t n = obj->SizeOf();
-  DCHECK_LE(offset + n, image_->Size());
+  DCHECK_LE(offset + n, image_info.image_->Size());
   memcpy(dst, src, n);
 
   // Write in a hash code of objects which have inflated monitors or a hash code in their monitor
@@ -1595,34 +1712,55 @@
 }
 
 template <typename T>
-T* ImageWriter::NativeLocationInImage(T* obj) {
-  return (obj == nullptr || IsInBootImage(obj))
-      ? obj
-      : reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj));
+T* ImageWriter::NativeLocationInImage(T* obj, const char* oat_filename) {
+  if (obj == nullptr || IsInBootImage(obj)) {
+    return obj;
+  } else {
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    return reinterpret_cast<T*>(image_info.image_begin_ + NativeOffsetInImage(obj));
+  }
 }
 
 template <typename T>
-T* ImageWriter::NativeCopyLocation(T* obj) {
-  return (obj == nullptr || IsInBootImage(obj))
-      ? obj
-      : reinterpret_cast<T*>(image_->Begin() + NativeOffsetInImage(obj));
+T* ImageWriter::NativeCopyLocation(T* obj, mirror::DexCache* dex_cache) {
+  if (obj == nullptr || IsInBootImage(obj)) {
+    return obj;
+  } else {
+    const char* oat_filename = GetOatFilenameForDexCache(dex_cache);
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    return reinterpret_cast<T*>(image_info.image_->Begin() + NativeOffsetInImage(obj));
+  }
 }
 
 class NativeLocationVisitor {
  public:
-  explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+  explicit NativeLocationVisitor(ImageWriter* image_writer, const char* oat_filename)
+      : image_writer_(image_writer), oat_filename_(oat_filename) {}
 
   template <typename T>
-  T* operator()(T* ptr) const {
-    return image_writer_->NativeLocationInImage(ptr);
+  T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return image_writer_->NativeLocationInImage(ptr, oat_filename_);
+  }
+
+  ArtMethod* operator()(ArtMethod* method) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    const char* oat_filename = method->IsRuntimeMethod() ? image_writer_->GetDefaultOatFilename() :
+        image_writer_->GetOatFilenameForDexCache(method->GetDexCache());
+    return image_writer_->NativeLocationInImage(method, oat_filename);
+  }
+
+  ArtField* operator()(ArtField* field) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    const char* oat_filename = image_writer_->GetOatFilenameForDexCache(field->GetDexCache());
+    return image_writer_->NativeLocationInImage(field, oat_filename);
   }
 
  private:
   ImageWriter* const image_writer_;
+  const char* oat_filename_;
 };
 
 void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
-  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this));
+  const char* oat_filename = GetOatFilename(orig);
+  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
 }
@@ -1661,7 +1799,7 @@
       CHECK(it != native_object_relocations_.end())
           << "Missing relocation for AbstractMethod.artMethod " << PrettyMethod(src_method);
       dest->SetArtMethod(
-          reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset));
+          reinterpret_cast<ArtMethod*>(global_image_begin_ + it->second.offset));
     } else if (!klass->IsArrayClass()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       if (klass == class_linker->GetClassRoot(ClassLinker::kJavaLangDexCache)) {
@@ -1702,41 +1840,52 @@
   // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
   // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
   //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
+  const char* oat_filename = GetOatFilenameForDexCache(orig_dex_cache);
   GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
   if (orig_strings != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
-                                               NativeLocationInImage(orig_strings),
+                                               NativeLocationInImage(orig_strings, oat_filename),
                                                /*pointer size*/8u);
-    orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings), ImageAddressVisitor(this));
+    orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
+                                 ImageAddressVisitor(this));
   }
   GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
   if (orig_types != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
-                                               NativeLocationInImage(orig_types),
+                                               NativeLocationInImage(orig_types, oat_filename),
                                                /*pointer size*/8u);
-    orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types), ImageAddressVisitor(this));
+    orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
+                                       ImageAddressVisitor(this));
   }
   ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
   if (orig_methods != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(),
-                                               NativeLocationInImage(orig_methods),
+                                               NativeLocationInImage(orig_methods, oat_filename),
                                                /*pointer size*/8u);
-    ArtMethod** copy_methods = NativeCopyLocation(orig_methods);
+    ArtMethod** copy_methods = NativeCopyLocation(orig_methods, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
-      ArtMethod* copy = NativeLocationInImage(orig);
+      const char* method_oat_filename;
+      if (orig == nullptr || orig->IsRuntimeMethod()) {
+        method_oat_filename = default_oat_filename_;
+      } else {
+        method_oat_filename = GetOatFilenameForDexCache(orig->GetDexCache());
+      }
+      ArtMethod* copy = NativeLocationInImage(orig, method_oat_filename);
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
   ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
-                                               NativeLocationInImage(orig_fields),
+                                               NativeLocationInImage(orig_fields, oat_filename),
                                                /*pointer size*/8u);
-    ArtField** copy_fields = NativeCopyLocation(orig_fields);
+    ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
-      ArtField* copy = NativeLocationInImage(orig);
+      const char* field_oat_filename =
+          orig == nullptr ? default_oat_filename_ : GetOatFilenameForDexCache(orig->GetDexCache());
+      ArtField* copy = NativeLocationInImage(orig, field_oat_filename);
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
@@ -1747,9 +1896,10 @@
   // If we are compiling an app image, we need to use the stubs of the boot image.
   if (compile_app_image_) {
     // Use the current image pointers.
-    gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-    DCHECK(image_space != nullptr);
-    const OatFile* oat_file = image_space->GetOatFile();
+    std::vector<gc::space::ImageSpace*> image_spaces =
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    DCHECK(!image_spaces.empty());
+    const OatFile* oat_file = image_spaces[0]->GetOatFile();
     CHECK(oat_file != nullptr);
     const OatHeader& header = oat_file->GetOatHeader();
     switch (type) {
@@ -1772,10 +1922,13 @@
         UNREACHABLE();
     }
   }
-  return GetOatAddressForOffset(oat_address_offsets_[type]);
+  const ImageInfo& primary_image_info = GetImageInfo(0);
+  return GetOatAddressForOffset(primary_image_info.oat_address_offsets_[type], primary_image_info);
 }
 
-const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) {
+const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method,
+                                         const ImageInfo& image_info,
+                                         bool* quick_is_interpreted) {
   DCHECK(!method->IsResolutionMethod()) << PrettyMethod(method);
   DCHECK(!method->IsImtConflictMethod()) << PrettyMethod(method);
   DCHECK(!method->IsImtUnimplementedMethod()) << PrettyMethod(method);
@@ -1788,7 +1941,7 @@
   // Quick entrypoint:
   uint32_t quick_oat_code_offset = PointerToLowMemUInt32(
       method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_));
-  const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset);
+  const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset, image_info);
   *quick_is_interpreted = false;
   if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() ||
       method->GetDeclaringClass()->IsInitialized())) {
@@ -1808,42 +1961,32 @@
     quick_code = GetOatAddress(kOatAddressQuickResolutionTrampoline);
   }
   if (!IsInBootOatFile(quick_code)) {
-    DCHECK_GE(quick_code, oat_data_begin_);
+    // DCHECK_GE(quick_code, oat_data_begin_);
   }
   return quick_code;
 }
 
-const uint8_t* ImageWriter::GetQuickEntryPoint(ArtMethod* method) {
-  // Calculate the quick entry point following the same logic as FixupMethod() below.
-  // The resolution method has a special trampoline to call.
-  Runtime* runtime = Runtime::Current();
-  if (UNLIKELY(method == runtime->GetResolutionMethod())) {
-    return GetOatAddress(kOatAddressQuickResolutionTrampoline);
-  } else if (UNLIKELY(method == runtime->GetImtConflictMethod() ||
-                      method == runtime->GetImtUnimplementedMethod())) {
-    return GetOatAddress(kOatAddressQuickIMTConflictTrampoline);
-  } else {
-    // We assume all methods have code. If they don't currently then we set them to the use the
-    // resolution trampoline. Abstract methods never have code and so we need to make sure their
-    // use results in an AbstractMethodError. We use the interpreter to achieve this.
-    if (UNLIKELY(!method->IsInvokable())) {
-      return GetOatAddress(kOatAddressQuickToInterpreterBridge);
-    } else {
-      bool quick_is_interpreted;
-      return GetQuickCode(method, &quick_is_interpreted);
-    }
-  }
-}
-
-void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) {
+void ImageWriter::CopyAndFixupMethod(ArtMethod* orig,
+                                     ArtMethod* copy,
+                                     const ImageInfo& image_info) {
   memcpy(copy, orig, ArtMethod::Size(target_ptr_size_));
 
   copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
 
+  const char* oat_filename;
+  if (orig->IsRuntimeMethod() || compile_app_image_) {
+    oat_filename = default_oat_filename_;
+  } else {
+    auto it = dex_file_oat_filename_map_.find(orig->GetDexFile());
+    DCHECK(it != dex_file_oat_filename_map_.end()) << orig->GetDexFile()->GetLocation();
+    oat_filename = it->second;
+  }
   ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_);
-  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
+  copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods, oat_filename),
+                                   target_ptr_size_);
   GcRoot<mirror::Class>* orig_resolved_types = orig->GetDexCacheResolvedTypes(target_ptr_size_);
-  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types), target_ptr_size_);
+  copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types, oat_filename),
+                                 target_ptr_size_);
 
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
@@ -1877,7 +2020,7 @@
           GetOatAddress(kOatAddressQuickToInterpreterBridge), target_ptr_size_);
     } else {
       bool quick_is_interpreted;
-      const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted);
+      const uint8_t* quick_code = GetQuickCode(orig, image_info, &quick_is_interpreted);
       copy->SetEntryPointFromQuickCompiledCodePtrSize(quick_code, target_ptr_size_);
 
       // JNI entrypoint:
@@ -1914,13 +2057,16 @@
   CHECK(oat_header != nullptr);
   CHECK(oat_header->IsValid());
 
-  ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
+  ImageInfo& image_info = GetImageInfo(oat_file_->GetLocation().c_str());
+  ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin());
   image_header->SetOatChecksum(oat_header->GetChecksum());
 }
 
-size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const {
+size_t ImageWriter::GetBinSizeSum(ImageWriter::ImageInfo& image_info, ImageWriter::Bin up_to) const {
   DCHECK_LE(up_to, kBinSize);
-  return std::accumulate(&bin_slot_sizes_[0], &bin_slot_sizes_[up_to], /*init*/0);
+  return std::accumulate(&image_info.bin_slot_sizes_[0],
+                         &image_info.bin_slot_sizes_[up_to],
+                         /*init*/0);
 }
 
 ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) {
@@ -1946,15 +2092,17 @@
   return lockword_ & ~kBinMask;
 }
 
-uint8_t* ImageWriter::GetOatFileBegin() const {
-  DCHECK_GT(intern_table_bytes_, 0u);
-  size_t native_sections_size = bin_slot_sizes_[kBinArtField] +
-                                bin_slot_sizes_[kBinArtMethodDirty] +
-                                bin_slot_sizes_[kBinArtMethodClean] +
-                                bin_slot_sizes_[kBinDexCacheArray] +
-                                intern_table_bytes_ +
-                                class_table_bytes_;
-  return image_begin_ + RoundUp(image_end_ + native_sections_size, kPageSize);
+uint8_t* ImageWriter::GetOatFileBegin(const char* oat_filename) const {
+  uintptr_t last_image_end = 0;
+  for (const char* oat_fn : oat_filenames_) {
+    const ImageInfo& image_info = GetConstImageInfo(oat_fn);
+    DCHECK(image_info.image_begin_ != nullptr);
+    uintptr_t this_end = reinterpret_cast<uintptr_t>(image_info.image_begin_) +
+        image_info.image_size_;
+    last_image_end = std::max(this_end, last_image_end);
+  }
+  const ImageInfo& image_info = GetConstImageInfo(oat_filename);
+  return reinterpret_cast<uint8_t*>(last_image_end) + image_info.oat_offset_;
 }
 
 ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocationType type) {
@@ -1974,4 +2122,94 @@
   UNREACHABLE();
 }
 
+const char* ImageWriter::GetOatFilename(mirror::Object* obj) const {
+  if (compile_app_image_) {
+    return default_oat_filename_;
+  } else {
+    return GetOatFilenameForDexCache(obj->IsDexCache() ? obj->AsDexCache() :
+        obj->IsClass() ? obj->AsClass()->GetDexCache() : obj->GetClass()->GetDexCache());
+  }
+}
+
+const char* ImageWriter::GetOatFilenameForDexCache(mirror::DexCache* dex_cache) const {
+  if (compile_app_image_ || dex_cache == nullptr) {
+    return default_oat_filename_;
+  } else {
+    auto it = dex_file_oat_filename_map_.find(dex_cache->GetDexFile());
+    DCHECK(it != dex_file_oat_filename_map_.end()) << dex_cache->GetDexFile()->GetLocation();
+    return it->second;
+  }
+}
+
+ImageWriter::ImageInfo& ImageWriter::GetImageInfo(const char* oat_filename) {
+  auto it = image_info_map_.find(oat_filename);
+  DCHECK(it != image_info_map_.end());
+  return it->second;
+}
+
+const ImageWriter::ImageInfo& ImageWriter::GetConstImageInfo(const char* oat_filename) const {
+  auto it = image_info_map_.find(oat_filename);
+  DCHECK(it != image_info_map_.end());
+  return it->second;
+}
+
+const ImageWriter::ImageInfo& ImageWriter::GetImageInfo(size_t index) const {
+  DCHECK_LT(index, oat_filenames_.size());
+  return GetConstImageInfo(oat_filenames_[index]);
+}
+
+void ImageWriter::UpdateOatFile(const char* oat_filename) {
+  std::unique_ptr<File> oat_file(OS::OpenFileForReading(oat_filename));
+  DCHECK(oat_file != nullptr);
+  size_t oat_loaded_size = 0;
+  size_t oat_data_offset = 0;
+  ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset);
+
+  ImageInfo& cur_image_info = GetImageInfo(oat_filename);
+
+  // Update the oat_offset of the next image info.
+  auto it = std::find(oat_filenames_.begin(), oat_filenames_.end(), oat_filename);
+  DCHECK(it != oat_filenames_.end());
+
+  it++;
+  if (it != oat_filenames_.end()) {
+    // There is a following one.
+    ImageInfo& next_image_info = GetImageInfo(*it);
+    next_image_info.oat_offset_ = cur_image_info.oat_offset_ + oat_loaded_size;
+  }
+}
+
+ImageWriter::ImageWriter(
+    const CompilerDriver& compiler_driver,
+    uintptr_t image_begin,
+    bool compile_pic,
+    bool compile_app_image,
+    ImageHeader::StorageMode image_storage_mode,
+    const std::vector<const char*> oat_filenames,
+    const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map)
+    : compiler_driver_(compiler_driver),
+      global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
+      image_objects_offset_begin_(0),
+      oat_file_(nullptr),
+      compile_pic_(compile_pic),
+      compile_app_image_(compile_app_image),
+      target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
+      image_method_array_(ImageHeader::kImageMethodsCount),
+      dirty_methods_(0u),
+      clean_methods_(0u),
+      image_storage_mode_(image_storage_mode),
+      dex_file_oat_filename_map_(dex_file_oat_filename_map),
+      oat_filenames_(oat_filenames),
+      default_oat_filename_(oat_filenames[0]) {
+  CHECK_NE(image_begin, 0U);
+  for (const char* oat_filename : oat_filenames) {
+    image_info_map_.emplace(oat_filename, ImageInfo());
+  }
+  std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+}
+
+ImageWriter::ImageInfo::ImageInfo()
+    : intern_table_(new InternTable),
+      class_table_(new ClassTable) {}
+
 }  // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index f1b2965..ad69038 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -47,6 +47,8 @@
 }  // namespace space
 }  // namespace gc
 
+class ClassTable;
+
 static constexpr int kInvalidImageFd = -1;
 
 // Write a Space built during compilation for use during execution.
@@ -56,46 +58,32 @@
               uintptr_t image_begin,
               bool compile_pic,
               bool compile_app_image,
-              ImageHeader::StorageMode image_storage_mode)
-      : compiler_driver_(compiler_driver),
-        image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
-        image_end_(0),
-        image_objects_offset_begin_(0),
-        image_roots_address_(0),
-        oat_file_(nullptr),
-        oat_data_begin_(nullptr),
-        compile_pic_(compile_pic),
-        compile_app_image_(compile_app_image),
-        boot_image_space_(nullptr),
-        target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
-        bin_slot_sizes_(),
-        bin_slot_offsets_(),
-        bin_slot_count_(),
-        intern_table_bytes_(0u),
-        image_method_array_(ImageHeader::kImageMethodsCount),
-        dirty_methods_(0u),
-        clean_methods_(0u),
-        class_table_bytes_(0u),
-        image_storage_mode_(image_storage_mode) {
-    CHECK_NE(image_begin, 0U);
-    std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
-    std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0);
-  }
-
-  ~ImageWriter() {
-  }
+              ImageHeader::StorageMode image_storage_mode,
+              const std::vector<const char*> oat_filenames,
+              const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map);
 
   bool PrepareImageAddressSpace();
 
   bool IsImageAddressSpaceReady() const {
-    return image_roots_address_ != 0u;
+    bool ready = !image_info_map_.empty();
+    for (auto& pair : image_info_map_) {
+      const ImageInfo& image_info = pair.second;
+      if (image_info.image_roots_address_ == 0u) {
+        return false;
+      }
+    }
+    return ready;
   }
 
   template <typename T>
   T* GetImageAddress(T* object) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return (object == nullptr || IsInBootImage(object))
-        ? object
-        : reinterpret_cast<T*>(image_begin_ + GetImageOffset(object));
+    if (object == nullptr || IsInBootImage(object)) {
+      return object;
+    } else {
+      const char* oat_filename = GetOatFilename(object);
+      const ImageInfo& image_info = GetConstImageInfo(oat_filename);
+      return reinterpret_cast<T*>(image_info.image_begin_ + GetImageOffset(object));
+    }
   }
 
   ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -103,26 +91,36 @@
   template <typename PtrType>
   PtrType GetDexCacheArrayElementImageAddress(const DexFile* dex_file, uint32_t offset)
       const SHARED_REQUIRES(Locks::mutator_lock_) {
-    auto it = dex_cache_array_starts_.find(dex_file);
-    DCHECK(it != dex_cache_array_starts_.end());
+    auto oat_it = dex_file_oat_filename_map_.find(dex_file);
+    DCHECK(oat_it != dex_file_oat_filename_map_.end());
+    const ImageInfo& image_info = GetConstImageInfo(oat_it->second);
+    auto it = image_info.dex_cache_array_starts_.find(dex_file);
+    DCHECK(it != image_info.dex_cache_array_starts_.end());
     return reinterpret_cast<PtrType>(
-        image_begin_ + bin_slot_offsets_[kBinDexCacheArray] + it->second + offset);
+        image_info.image_begin_ + image_info.bin_slot_offsets_[kBinDexCacheArray] +
+            it->second + offset);
   }
 
-  uint8_t* GetOatFileBegin() const;
+  uint8_t* GetOatFileBegin(const char* oat_filename) const;
 
   // If image_fd is not kInvalidImageFd, then we use that for the file. Otherwise we open
-  // image_filename.
+  // the names in image_filenames.
   bool Write(int image_fd,
-             const std::string& image_filename,
-             const std::string& oat_filename,
-             const std::string& oat_location)
+             const std::vector<const char*>& image_filenames,
+             const std::vector<const char*>& oat_filenames)
       REQUIRES(!Locks::mutator_lock_);
 
-  uintptr_t GetOatDataBegin() {
-    return reinterpret_cast<uintptr_t>(oat_data_begin_);
+  uintptr_t GetOatDataBegin(const char* oat_filename) {
+    return reinterpret_cast<uintptr_t>(GetImageInfo(oat_filename).oat_data_begin_);
   }
 
+  const char* GetOatFilenameForDexCache(mirror::DexCache* dex_cache) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Update the oat size for the given oat file. This will make the oat_offset for the next oat
+  // file valid.
+  void UpdateOatFile(const char* oat_filename);
+
  private:
   bool AllocMemory();
 
@@ -214,6 +212,69 @@
     const uint32_t lockword_;
   };
 
+  struct ImageInfo {
+    ImageInfo();
+    ImageInfo(ImageInfo&&) = default;
+
+    // Create the image sections into the out sections variable, returns the size of the image
+    // excluding the bitmap.
+    size_t CreateImageSections(size_t target_ptr_size, ImageSection* out_sections) const;
+
+    std::unique_ptr<MemMap> image_;  // Memory mapped for generating the image.
+
+    // Target begin of this image. Notes: It is not valid to write here, this is the address
+    // of the target image, not necessarily where image_ is mapped. The address is only valid
+    // after layouting (otherwise null).
+    uint8_t* image_begin_ = nullptr;
+
+    // Offset to the free space in image_, initially size of image header.
+    size_t image_end_ = RoundUp(sizeof(ImageHeader), kObjectAlignment);
+    uint32_t image_roots_address_ = 0;  // The image roots address in the image.
+    size_t image_offset_ = 0;  // Offset of this image from the start of the first image.
+
+    // Image size is the *address space* covered by this image. As the live bitmap is aligned
+    // to the page size, the live bitmap will cover more address space than necessary. But live
+    // bitmaps may not overlap, so an image has a "shadow," which is accounted for in the size.
+    // The next image may only start at image_begin_ + image_size_ (which is guaranteed to be
+    // page-aligned).
+    size_t image_size_ = 0;
+
+    // Oat data.
+    // Offset of the oat file for this image from start of oat files. This is
+    // valid when the previous oat file has been written.
+    size_t oat_offset_ = 0;
+    // Start of oatdata in the corresponding oat file. This is
+    // valid when the images have been layed out.
+    uint8_t* oat_data_begin_ = nullptr;
+    size_t oat_size_ = 0;  // Size of the corresponding oat data.
+
+    // Image bitmap which lets us know where the objects inside of the image reside.
+    std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
+
+    // The start offsets of the dex cache arrays.
+    SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
+
+    // Offset from oat_data_begin_ to the stubs.
+    uint32_t oat_address_offsets_[kOatAddressCount] = {};
+
+    // Bin slot tracking for dirty object packing.
+    size_t bin_slot_sizes_[kBinSize] = {};  // Number of bytes in a bin.
+    size_t bin_slot_offsets_[kBinSize] = {};  // Number of bytes in previous bins.
+    size_t bin_slot_count_[kBinSize] = {};  // Number of objects in a bin.
+
+    // Cached size of the intern table for when we allocate memory.
+    size_t intern_table_bytes_ = 0;
+
+    // Number of image class table bytes.
+    size_t class_table_bytes_ = 0;
+
+    // Intern table associated with this image for serialization.
+    std::unique_ptr<InternTable> intern_table_;
+
+    // Class table associated with this image for serialization.
+    std::unique_ptr<ClassTable> class_table_;
+  };
+
   // We use the lock word to store the offset of the object in the image.
   void AssignImageOffset(mirror::Object* object, BinSlot bin_slot)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -233,7 +294,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   BinSlot GetImageBinSlot(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void AddDexCacheArrayRelocation(void* array, size_t offset) SHARED_REQUIRES(Locks::mutator_lock_);
+  void AddDexCacheArrayRelocation(void* array, size_t offset, mirror::DexCache* dex_cache)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void AddMethodPointerArray(mirror::PointerArray* arr) SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void* GetImageAddressCallback(void* writer, mirror::Object* obj)
@@ -244,19 +306,21 @@
   mirror::Object* GetLocalAddress(mirror::Object* object) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
     size_t offset = GetImageOffset(object);
-    uint8_t* dst = image_->Begin() + offset;
+    const char* oat_filename = GetOatFilename(object);
+    const ImageInfo& image_info = GetConstImageInfo(oat_filename);
+    uint8_t* dst = image_info.image_->Begin() + offset;
     return reinterpret_cast<mirror::Object*>(dst);
   }
 
   // Returns the address in the boot image if we are compiling the app image.
   const uint8_t* GetOatAddress(OatAddress type) const;
 
-  const uint8_t* GetOatAddressForOffset(uint32_t offset) const {
+  const uint8_t* GetOatAddressForOffset(uint32_t offset, const ImageInfo& image_info) const {
     // With Quick, code is within the OatFile, as there are all in one
-    // .o ELF object.
-    DCHECK_LE(offset, oat_file_->Size());
-    DCHECK(oat_data_begin_ != nullptr);
-    return offset == 0u ? nullptr : oat_data_begin_ + offset;
+    // .o ELF object. But interpret it as signed.
+    DCHECK_LE(static_cast<int32_t>(offset), static_cast<int32_t>(image_info.oat_size_));
+    DCHECK(image_info.oat_data_begin_ != nullptr);
+    return offset == 0u ? nullptr : image_info.oat_data_begin_ + static_cast<int32_t>(offset);
   }
 
   // Returns true if the class was in the original requested image classes list.
@@ -282,7 +346,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateHeader(size_t oat_loaded_size, size_t oat_data_offset)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  mirror::ObjectArray<mirror::Object>* CreateImageRoots() const
+  mirror::ObjectArray<mirror::Object>* CreateImageRoots(const char* oat_filename) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CalculateObjectBinSlots(mirror::Object* obj)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -304,7 +368,7 @@
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
-  void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy)
+  void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupClass(mirror::Class* orig, mirror::Class* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -319,23 +383,24 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get quick code for non-resolution/imt_conflict/abstract method.
-  const uint8_t* GetQuickCode(ArtMethod* method, bool* quick_is_interpreted)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  const uint8_t* GetQuickEntryPoint(ArtMethod* method)
+  const uint8_t* GetQuickCode(ArtMethod* method,
+                              const ImageInfo& image_info,
+                              bool* quick_is_interpreted)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Patches references in OatFile to expect runtime addresses.
   void SetOatChecksumFromElfFile(File* elf_file);
 
   // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins.
-  size_t GetBinSizeSum(Bin up_to = kBinSize) const;
+  size_t GetBinSizeSum(ImageInfo& image_info, Bin up_to = kBinSize) const;
 
   // Return true if a method is likely to be dirtied at runtime.
   bool WillMethodBeDirty(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Assign the offset for an ArtMethod.
-  void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type)
+  void AssignMethodOffset(ArtMethod* method,
+                          NativeObjectRelocationType type,
+                          const char* oat_filename)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return true if klass is loaded by the boot class loader but not in the boot image.
@@ -359,11 +424,11 @@
 
   // Location of where the object will be when the image is loaded at runtime.
   template <typename T>
-  T* NativeLocationInImage(T* obj);
+  T* NativeLocationInImage(T* obj, const char* oat_filename) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Location of where the temporary copy of the object currently is.
   template <typename T>
-  T* NativeCopyLocation(T* obj);
+  T* NativeCopyLocation(T* obj, mirror::DexCache* dex_cache) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return true of obj is inside of the boot image space. This may only return true if we are
   // compiling an app image.
@@ -372,68 +437,50 @@
   // Return true if ptr is within the boot oat file.
   bool IsInBootOatFile(const void* ptr) const;
 
+  const char* GetOatFilename(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_);
+
+  const char* GetDefaultOatFilename() const {
+    return default_oat_filename_;
+  }
+
+  ImageInfo& GetImageInfo(const char* oat_filename);
+  const ImageInfo& GetConstImageInfo(const char* oat_filename) const;
+  const ImageInfo& GetImageInfo(size_t index) const;
+
   const CompilerDriver& compiler_driver_;
 
-  // Beginning target image address for the output image.
-  uint8_t* image_begin_;
-
-  // Offset to the free space in image_.
-  size_t image_end_;
+  // Beginning target image address for the first image.
+  uint8_t* global_image_begin_;
 
   // Offset from image_begin_ to where the first object is in image_.
   size_t image_objects_offset_begin_;
 
-  // The image roots address in the image.
-  uint32_t image_roots_address_;
-
   // oat file with code for this image
   OatFile* oat_file_;
 
-  // Memory mapped for generating the image.
-  std::unique_ptr<MemMap> image_;
-
   // Pointer arrays that need to be updated. Since these are only some int and long arrays, we need
   // to keep track. These include vtable arrays, iftable arrays, and dex caches.
   std::unordered_map<mirror::PointerArray*, Bin> pointer_arrays_;
 
-  // The start offsets of the dex cache arrays.
-  SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
-
   // Saved hash codes. We use these to restore lockwords which were temporarily used to have
   // forwarding addresses as well as copying over hash codes.
   std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_;
 
-  // Beginning target oat address for the pointers from the output image to its oat file.
-  const uint8_t* oat_data_begin_;
-
-  // Image bitmap which lets us know where the objects inside of the image reside.
-  std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
-
-  // Offset from oat_data_begin_ to the stubs.
-  uint32_t oat_address_offsets_[kOatAddressCount];
-
   // Boolean flags.
   const bool compile_pic_;
   const bool compile_app_image_;
 
-  // Cache the boot image space in this class for faster lookups.
-  gc::space::ImageSpace* boot_image_space_;
-
   // Size of pointers on the target architecture.
   size_t target_ptr_size_;
 
-  // Bin slot tracking for dirty object packing
-  size_t bin_slot_sizes_[kBinSize];  // Number of bytes in a bin
-  size_t bin_slot_offsets_[kBinSize];  // Number of bytes in previous bins.
-  size_t bin_slot_count_[kBinSize];  // Number of objects in a bin
-
-  // Cached size of the intern table for when we allocate memory.
-  size_t intern_table_bytes_;
+  // Mapping of oat filename to image data.
+  std::unordered_map<std::string, ImageInfo> image_info_map_;
 
   // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to
   // have one entry per art field for convenience. ArtFields are placed right after the end of the
   // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields.
   struct NativeObjectRelocation {
+    const char* oat_filename;
     uintptr_t offset;
     NativeObjectRelocationType type;
 
@@ -462,12 +509,14 @@
   // null is a valid entry.
   std::unordered_set<mirror::ClassLoader*> class_loaders_;
 
-  // Number of image class table bytes.
-  size_t class_table_bytes_;
-
   // Which mode the image is stored as, see image.h
   const ImageHeader::StorageMode image_storage_mode_;
 
+  // Map of dex files to the oat filenames that they were compiled into.
+  const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map_;
+  const std::vector<const char*> oat_filenames_;
+  const char* default_oat_filename_;
+
   friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index d001495..85216b7 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -84,6 +84,7 @@
       CompilerOptions::kDefaultNumDexMethodsThreshold,
       CompilerOptions::kDefaultInlineDepthLimit,
       CompilerOptions::kDefaultInlineMaxCodeUnits,
+      /* no_inline_from */ nullptr,
       /* include_patch_information */ false,
       CompilerOptions::kDefaultTopKProfileThreshold,
       Runtime::Current()->IsDebuggable(),
@@ -154,7 +155,8 @@
       /* dump_cfg_append */ false,
       cumulative_logger_.get(),
       /* swap_fd */ -1,
-      /* profile_file */ ""));
+      /* dex to oat map */ nullptr,
+      /* profile_compilation_info */ nullptr));
   // Disable dedupe so we can remove compiled methods.
   compiler_driver_->SetDedupeEnabled(false);
   compiler_driver_->SetSupportBootImageFixup(false);
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 92cf8ca..b10cc35 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -47,7 +47,7 @@
         driver_(&compiler_options_, &verification_results_, &inliner_map_,
                 Compiler::kQuick, instruction_set, nullptr,
                 false, nullptr, nullptr, nullptr, 1u,
-                false, false, "", false, nullptr, -1, ""),
+                false, false, "", false, nullptr, -1, nullptr, nullptr),
         error_msg_(),
         instruction_set_(instruction_set),
         features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 7b7d46c..9f7ffa5 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -121,7 +121,8 @@
                                               false,
                                               timer_.get(),
                                               -1,
-                                              ""));
+                                              nullptr,
+                                              nullptr));
   }
 
   bool WriteElf(File* file,
@@ -199,7 +200,7 @@
   ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
   const OatHeader& oat_header = oat_file->GetOatHeader();
   ASSERT_TRUE(oat_header.IsValid());
-  ASSERT_EQ(1U, oat_header.GetDexFileCount());  // core
+  ASSERT_EQ(class_linker->GetBootClassPath().size(), oat_header.GetDexFileCount());  // core
   ASSERT_EQ(42U, oat_header.GetImageFileLocationOatChecksum());
   ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin());
   ASSERT_EQ("lue.art", std::string(oat_header.GetStoreValueByKey(OatHeader::kImageLocationKey)));
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 53ac77b..025e35e 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -716,6 +716,14 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
+    const DexFile::TypeId& type_id =
+        dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
+    const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
+    // Skip methods that are not in the image.
+    if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) {
+      return true;
+    }
+
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
@@ -958,7 +966,9 @@
     if (writer_->HasBootImage()) {
       auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>(
               patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
-      const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_;
+      const char* oat_filename = writer_->image_writer_->GetOatFilenameForDexCache(dex_cache_);
+      const uint8_t* oat_data =
+          writer_->image_writer_->GetOatFileBegin(oat_filename) + file_offset_;
       return element - oat_data;
     } else {
       size_t start = writer_->dex_cache_arrays_offsets_.Get(patch.TargetDexCacheDexFile());
@@ -994,9 +1004,15 @@
       // NOTE: We're using linker patches for app->boot references when the image can
       // be relocated and therefore we need to emit .oat_patches. We're not using this
       // for app->app references, so check that the method is an image method.
-      gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-      size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
-      CHECK(image_space->GetImageHeader().GetMethodsSection().Contains(method_offset));
+      std::vector<gc::space::ImageSpace*> image_spaces =
+          Runtime::Current()->GetHeap()->GetBootImageSpaces();
+      bool contains_method = false;
+      for (gc::space::ImageSpace* image_space : image_spaces) {
+        size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
+        contains_method |=
+            image_space->GetImageHeader().GetMethodsSection().Contains(method_offset);
+      }
+      CHECK(contains_method);
     }
     // Note: We only patch targeting ArtMethods in image which is in the low 4gb.
     uint32_t address = PointerToLowMemUInt32(method);
@@ -1012,7 +1028,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t address = target_offset;
     if (writer_->HasBootImage()) {
-      address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
+      const char* oat_filename = writer_->image_writer_->GetOatFilenameForDexCache(dex_cache_);
+      address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin(oat_filename) +
                                       writer_->oat_data_offset_ + target_offset);
     }
     DCHECK_LE(offset + 4, code->size());
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 4c3f66a..dc75ff1 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1590,15 +1590,18 @@
     HGraph* graph = GetGraph();
     HInstruction* zero;
     switch (type) {
-      case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
-      case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
-      case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      case Primitive::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
       default: zero = graph->GetConstant(type, 0); break;
     }
     HPhi* phi = new (graph->GetArena())
         HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
     phi->SetRawInputAt(0, instruction);
     phi->SetRawInputAt(1, zero);
+    if (type == Primitive::kPrimNot) {
+      phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo());
+    }
     new_preheader->AddPhi(phi);
     return phi;
   }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1178d0f..1af6846 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -17,6 +17,8 @@
 #include "builder.h"
 
 #include "art_field-inl.h"
+#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
 #include "base/logging.h"
 #include "class_linker.h"
 #include "dex/verified_method.h"
@@ -458,6 +460,19 @@
     return false;
   }
 
+  // Find locations where we want to generate extra stackmaps for native debugging.
+  // This allows us to generate the info only at interesting points (for example,
+  // at start of java statement) rather than before every dex instruction.
+  const bool native_debuggable = compiler_driver_ != nullptr &&
+                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+  ArenaBitVector* native_debug_info_locations;
+  if (native_debuggable) {
+    const uint32_t num_instructions = code_item.insns_size_in_code_units_;
+    native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
+    native_debug_info_locations->ClearAllBits();
+    FindNativeDebugInfoLocations(code_item, native_debug_info_locations);
+  }
+
   CreateBlocksForTryCatch(code_item);
 
   InitializeParameters(code_item.ins_size_);
@@ -467,6 +482,11 @@
     // Update the current block if dex_pc starts a new block.
     MaybeUpdateCurrentBlock(dex_pc);
     const Instruction& instruction = *Instruction::At(code_ptr);
+    if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
+      if (current_block_ != nullptr) {
+        current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc));
+      }
+    }
     if (!AnalyzeDexInstruction(instruction, dex_pc)) {
       return false;
     }
@@ -507,6 +527,47 @@
   current_block_ = block;
 }
 
+void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item,
+                                                 ArenaBitVector* locations) {
+  // The callback gets called when the line number changes.
+  // In other words, it marks the start of new java statement.
+  struct Callback {
+    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
+      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
+      return false;
+    }
+  };
+  dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations);
+  // Add native debug info at the start of every basic block.
+  for (uint32_t pc = 0; pc < code_item.insns_size_in_code_units_; pc++) {
+    if (FindBlockStartingAt(pc) != nullptr) {
+      locations->SetBit(pc);
+    }
+  }
+  // Instruction-specific tweaks.
+  const Instruction* const begin = Instruction::At(code_item.insns_);
+  const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_);
+  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
+    switch (inst->Opcode()) {
+      case Instruction::MOVE_EXCEPTION:
+      case Instruction::MOVE_RESULT:
+      case Instruction::MOVE_RESULT_WIDE:
+      case Instruction::MOVE_RESULT_OBJECT: {
+        // The compiler checks that there are no instructions before those.
+        // So generate HNativeDebugInfo after them instead.
+        locations->ClearBit(inst->GetDexPc(code_item.insns_));
+        const Instruction* next = inst->Next();
+        if (next < end) {
+          locations->SetBit(next->GetDexPc(code_item.insns_));
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+}
+
 bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
                                          size_t* number_of_branches) {
@@ -1756,7 +1817,12 @@
     UpdateLocal(destination, current_block_->GetLastInstruction(), dex_pc);
   } else {
     DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
+    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
+    // which may throw. If it succeeds BoundType sets the new type of `object`
+    // for all subsequent uses.
     current_block_->AddInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
+    current_block_->AddInstruction(new (arena_) HBoundType(object, dex_pc));
+    UpdateLocal(reference, current_block_->GetLastInstruction(), dex_pc);
   }
 }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 73e85bb..26bf1cb 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -112,6 +112,7 @@
                             const uint16_t* end,
                             size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t dex_pc);
+  void FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, ArenaBitVector* locations);
   HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const;
   HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc);
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 53d3615..57c5058 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -997,6 +997,12 @@
   stack_map_stream_.EndStackMapEntry();
 }
 
+bool CodeGenerator::HasStackMapAtCurrentPc() {
+  uint32_t pc = GetAssembler()->CodeSize();
+  size_t count = stack_map_stream_.GetNumberOfStackMaps();
+  return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc;
+}
+
 void CodeGenerator::RecordCatchBlockInfo() {
   ArenaAllocator* arena = graph_->GetArena();
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index eade05d..950043e 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -269,6 +269,8 @@
 
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+  // Check whether we have already recorded mapping at this PC.
+  bool HasStackMapAtCurrentPc();
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 680b200..0be1520 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -417,6 +417,56 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
  public:
@@ -438,7 +488,7 @@
     // to be instrumented, e.g.:
     //
     //   __ LoadFromOffset(kLoadWord, out, out, offset);
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -454,7 +504,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -596,14 +648,18 @@
 class ReadBarrierForRootSlowPathARM : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1607,7 +1663,19 @@
                         /* false_target */ nullptr);
 }
 
-void LocationsBuilderARM::VisitCondition(HCondition* cond) {
+void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
+void LocationsBuilderARM::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1638,7 +1706,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1695,83 +1763,83 @@
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLocal(HLocal* local) {
@@ -1872,7 +1940,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
@@ -2827,8 +2895,7 @@
   Register dividend = locations->InAt(0).AsRegister<Register>();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
-  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (ctz_imm == 1) {
@@ -2904,7 +2971,7 @@
     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   } else if (imm == 1 || imm == -1) {
     DivRemOneOrMinusOne(instruction);
-  } else if (IsPowerOfTwo(std::abs(imm))) {
+  } else if (IsPowerOfTwo(AbsOrMin(imm))) {
     DivRemByPowerOfTwo(instruction);
   } else {
     DCHECK(imm <= -2 || imm >= 2);
@@ -2933,12 +3000,12 @@
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -3059,12 +3126,12 @@
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -3418,7 +3485,7 @@
       Register first_reg = first.AsRegister<Register>();
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
-        // Arm doesn't mask the shift count so we need to do it ourselves.
+        // ARM doesn't mask the shift count so we need to do it ourselves.
         __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
         if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, out_reg);
@@ -3430,7 +3497,7 @@
       } else {
         int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
         uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
-        if (shift_value == 0) {  // arm does not support shifting with 0 immediate.
+        if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
           __ Mov(out_reg, first_reg);
         } else if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, shift_value);
@@ -3777,9 +3844,9 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
-  // TODO (ported from quick): revisit Arm barrier kinds
-  DmbOptions flavor = DmbOptions::ISH;  // quiet c++ warnings
+void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
+  // TODO (ported from quick): revisit ARM barrier kinds.
+  DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
   switch (kind) {
     case MemBarrierKind::kAnyStore:
     case MemBarrierKind::kLoadAny:
@@ -3860,11 +3927,11 @@
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   } else if (generate_volatile) {
-    // Arm encoding have some additional constraints for ldrexd/strexd:
+    // ARM encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
-    // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
-    // enable Arm encoding.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
 
     locations->AddTemp(Location::RequiresRegister());
@@ -3894,7 +3961,7 @@
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   switch (field_type) {
@@ -3986,7 +4053,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4020,14 +4087,18 @@
                       (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   }
   if (volatile_for_double) {
-    // Arm encoding have some additional constraints for ldrexd/strexd:
+    // ARM encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
-    // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
-    // enable Arm encoding.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -4086,33 +4157,52 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (field_type) {
-    case Primitive::kPrimBoolean: {
+    case Primitive::kPrimBoolean:
       __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimByte: {
+    case Primitive::kPrimByte:
       __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimShort: {
+    case Primitive::kPrimShort:
       __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimChar: {
+    case Primitive::kPrimChar:
       __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
       __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
       break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
       if (is_volatile && !atomic_ldrd_strd) {
         GenerateWideAtomicLoad(base, offset,
                                out.AsRegisterPairLow<Register>(),
@@ -4121,12 +4211,10 @@
         __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset);
       }
       break;
-    }
 
-    case Primitive::kPrimFloat: {
+    case Primitive::kPrimFloat:
       __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset);
       break;
-    }
 
     case Primitive::kPrimDouble: {
       DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
@@ -4148,17 +4236,20 @@
       UNREACHABLE();
   }
 
-  // Doubles are handled in the switch.
-  if (field_type != Primitive::kPrimDouble) {
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) {
+    // Potential implicit null checks, in the case of reference or
+    // double fields, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4321,6 +4412,11 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
@@ -4328,12 +4424,13 @@
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  Location out_loc = locations->Out();
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4347,7 +4444,7 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4361,7 +4458,7 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4375,7 +4472,7 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4387,13 +4484,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -4405,44 +4498,79 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ LoadFromOffset(kLoadWord, out, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+          __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      Location out = locations->Out();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset);
+        __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
-        __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset);
+        __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset);
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK(out.IsFpuRegister());
+      SRegister out = out_loc.AsFpuRegister<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+        __ LoadSFromOffset(out, obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+        __ LoadSFromOffset(out, IP, data_offset);
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK(out.IsFpuRegisterPair());
+      SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+        __ LoadDFromOffset(FromLowSToD(out), obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
-        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+        __ LoadDFromOffset(FromLowSToD(out), IP, data_offset);
       }
       break;
     }
@@ -4451,20 +4579,12 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -4555,6 +4675,7 @@
           __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
         DCHECK(!may_need_runtime_call_for_type_check);
         break;
@@ -4596,12 +4717,12 @@
           //   __ Mov(temp2, temp1);
           //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
           //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = value->klass_
           //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
           //
           //   __ cmp(temp1, ShifterOperand(temp2));
@@ -4698,8 +4819,6 @@
         __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
         __ StoreToOffset(kStoreWord, value, IP, data_offset);
       }
-
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4751,8 +4870,8 @@
       UNREACHABLE();
   }
 
-  // Ints and objects are handled in the switch.
-  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+  // Objects are handled in the switch.
+  if (value_type != Primitive::kPrimNot) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
@@ -5121,16 +5240,9 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ AddConstant(out, current_method, declaring_class_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
@@ -5138,17 +5250,8 @@
                       out,
                       current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ AddConstant(out, out, cache_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ LoadFromOffset(kLoadWord, out, out, cache_offset);
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5211,30 +5314,14 @@
   Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ AddConstant(out, current_method, declaring_class_offset);
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ AddConstant(out, out, cache_offset);
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ LoadFromOffset(kLoadWord, out, out, cache_offset);
-  }
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
   if (!load->IsInDexCache()) {
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
@@ -5281,6 +5368,14 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5307,21 +5402,22 @@
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5336,10 +5432,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ LoadFromOffset(kLoadWord, out, obj, class_offset);
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       __ cmp(out, ShifterOperand(cls));
       // Classes must be equal for the instanceof to succeed.
@@ -5354,17 +5449,8 @@
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -5382,17 +5468,8 @@
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -5410,17 +5487,8 @@
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ LoadFromOffset(kLoadWord, out, out, component_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5459,6 +5527,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
                                                                     /* is_fatal */ false);
@@ -5513,27 +5588,27 @@
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5552,8 +5627,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5570,18 +5644,8 @@
       // object to avoid doing a comparison we know will fail.
       Label loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -5593,8 +5657,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5610,18 +5673,8 @@
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -5632,8 +5685,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5645,19 +5697,8 @@
       __ b(&done, EQ);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -5670,8 +5711,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5680,8 +5720,7 @@
       __ CompareAndBranchIfZero(temp, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5698,6 +5737,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ b(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -5882,14 +5928,249 @@
   }
 }
 
-void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
-                                           Location out,
-                                           Location ref,
-                                           Location obj,
-                                           uint32_t offset,
-                                           Location index) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                   Location out,
+                                                                   uint32_t offset,
+                                                                   Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mov(temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                    Location out,
+                                                                    Location obj,
+                                                                    uint32_t offset,
+                                                                    Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                          Location root,
+                                                          Register obj,
+                                                          uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ LoadFromOffset(
+          kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
+      __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ AddConstant(root_reg, obj, offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, offset, no_index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t data_offset,
+                                                             Location index,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, data_offset, index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 Register obj,
+                                                                 uint32_t offset,
+                                                                 Location index,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
+  __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Introduce a dependency on the high bits of rb_state, which shall
+  // be all zeroes, to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
+  __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+  // obj is unchanged by this operation, but its value now depends on
+  // IP, which depends on temp_reg.
+  __ add(obj, obj, ShifterOperand(IP));
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    // /* HeapReference<Object> */ ref =
+    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    if (index.IsConstant()) {
+      size_t computed_offset =
+          (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset;
+      __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+    } else {
+      __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+      __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
+  __ b(slow_path->GetEntryLabel(), EQ);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
+                                               Location out,
+                                               Location ref,
+                                               Location obj,
+                                               uint32_t offset,
+                                               Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -5903,57 +6184,41 @@
       ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ b(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                Location out,
-                                                Location ref,
-                                                Location obj,
-                                                uint32_t offset,
-                                                Location index) {
+void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                    Location out,
+                                                    Location ref,
+                                                    Location obj,
+                                                    uint32_t offset,
+                                                    Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<Register>());
   }
 }
 
-void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                  Location out,
-                                                  Location root) {
+void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ b(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -6337,7 +6602,7 @@
 
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1204b2c..26ca71e 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -170,6 +170,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+  void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(LocationSummary* locations);
   void HandleLongRotate(LocationSummary* locations);
   void HandleRotate(HRor* ror);
@@ -216,21 +217,56 @@
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(LocationSummary* locations);
   void HandleLongRotate(LocationSummary* locations);
   void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void GenerateWideAtomicStore(Register addr, uint32_t offset,
                                Register value_lo, Register value_hi,
                                Register temp1, Register temp2,
                                HInstruction* instruction);
   void GenerateWideAtomicLoad(Register addr, uint32_t offset,
                               Register out_lo, Register out_hi);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
+
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
@@ -343,6 +379,8 @@
   // Emit a write barrier.
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   Label* GetLabelOf(HBasicBlock* block) const {
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
@@ -403,7 +441,26 @@
     return &it->second;
   }
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -420,23 +477,25 @@
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -446,9 +505,19 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5205830..1ad487d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2427,7 +2427,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
@@ -2447,7 +2447,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
@@ -2495,8 +2495,8 @@
   M(Above)                                                                               \
   M(AboveOrEqual)
 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
-void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }         \
-void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }
+void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
+void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
 #undef DEFINE_CONDITION_VISITORS
 #undef FOR_EACH_CONDITION_INSTRUCTION
@@ -2534,8 +2534,7 @@
   Register out = OutputRegister(instruction);
   Register dividend = InputRegisterAt(instruction, 0);
   int64_t imm = Int64FromConstant(second.GetConstant());
-  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -2627,7 +2626,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -2949,6 +2948,18 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   HandleFieldGet(instruction);
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7950f07..0e90ac6 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -215,6 +215,7 @@
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* instr);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -257,6 +258,7 @@
   void HandleFieldSet(HInstruction* instruction);
   void HandleFieldGet(HInstruction* instruction);
   void HandleInvoke(HInvoke* instr);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* instr);
 
   CodeGeneratorARM64* const codegen_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index ae0f2c8..7bc0635 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1191,17 +1191,16 @@
                                       uint32_t dex_pc,
                                       SlowPathCode* slow_path,
                                       bool is_direct_entrypoint) {
+  __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+  __ Jalr(T9);
   if (is_direct_entrypoint) {
     // Reserve argument space on stack (for $a0-$a3) for
     // entrypoints that directly reference native implementations.
     // Called function may use this space to store $a0-$a3 regs.
-    __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
-  }
-  __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
-  __ Jalr(T9);
-  __ Nop();
-  if (is_direct_entrypoint) {
+    __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);  // Single instruction in delay slot.
     __ DecreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
+  } else {
+    __ Nop();  // In delay slot.
   }
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
@@ -1275,15 +1274,9 @@
     }
 
     case Primitive::kPrimLong: {
-      // TODO: can 2nd param be const?
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      if (instruction->IsAdd() || instruction->IsSub()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-      } else {
-        DCHECK(instruction->IsAnd() || instruction->IsOr() || instruction->IsXor());
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
 
@@ -1350,34 +1343,142 @@
     }
 
     case Primitive::kPrimLong: {
-      // TODO: can 2nd param be const?
       Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
       Register dst_low = locations->Out().AsRegisterPairLow<Register>();
       Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
       Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
-      Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
-      Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>();
-
-      if (instruction->IsAnd()) {
-        __ And(dst_low, lhs_low, rhs_low);
-        __ And(dst_high, lhs_high, rhs_high);
-      } else if (instruction->IsOr()) {
-        __ Or(dst_low, lhs_low, rhs_low);
-        __ Or(dst_high, lhs_high, rhs_high);
-      } else if (instruction->IsXor()) {
-        __ Xor(dst_low, lhs_low, rhs_low);
-        __ Xor(dst_high, lhs_high, rhs_high);
-      } else if (instruction->IsAdd()) {
-        __ Addu(dst_low, lhs_low, rhs_low);
-        __ Sltu(TMP, dst_low, lhs_low);
-        __ Addu(dst_high, lhs_high, rhs_high);
-        __ Addu(dst_high, dst_high, TMP);
+      Location rhs_location = locations->InAt(1);
+      bool use_imm = rhs_location.IsConstant();
+      if (!use_imm) {
+        Register rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+        Register rhs_low = rhs_location.AsRegisterPairLow<Register>();
+        if (instruction->IsAnd()) {
+          __ And(dst_low, lhs_low, rhs_low);
+          __ And(dst_high, lhs_high, rhs_high);
+        } else if (instruction->IsOr()) {
+          __ Or(dst_low, lhs_low, rhs_low);
+          __ Or(dst_high, lhs_high, rhs_high);
+        } else if (instruction->IsXor()) {
+          __ Xor(dst_low, lhs_low, rhs_low);
+          __ Xor(dst_high, lhs_high, rhs_high);
+        } else if (instruction->IsAdd()) {
+          if (lhs_low == rhs_low) {
+            // Special case for lhs = rhs and the sum potentially overwriting both lhs and rhs.
+            __ Slt(TMP, lhs_low, ZERO);
+            __ Addu(dst_low, lhs_low, rhs_low);
+          } else {
+            __ Addu(dst_low, lhs_low, rhs_low);
+            // If the sum overwrites rhs, lhs remains unchanged, otherwise rhs remains unchanged.
+            __ Sltu(TMP, dst_low, (dst_low == rhs_low) ? lhs_low : rhs_low);
+          }
+          __ Addu(dst_high, lhs_high, rhs_high);
+          __ Addu(dst_high, dst_high, TMP);
+        } else {
+          DCHECK(instruction->IsSub());
+          __ Sltu(TMP, lhs_low, rhs_low);
+          __ Subu(dst_low, lhs_low, rhs_low);
+          __ Subu(dst_high, lhs_high, rhs_high);
+          __ Subu(dst_high, dst_high, TMP);
+        }
       } else {
-        DCHECK(instruction->IsSub());
-        __ Subu(dst_low, lhs_low, rhs_low);
-        __ Sltu(TMP, lhs_low, dst_low);
-        __ Subu(dst_high, lhs_high, rhs_high);
-        __ Subu(dst_high, dst_high, TMP);
+        int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+        if (instruction->IsOr()) {
+          uint32_t low = Low32Bits(value);
+          uint32_t high = High32Bits(value);
+          if (IsUint<16>(low)) {
+            if (dst_low != lhs_low || low != 0) {
+              __ Ori(dst_low, lhs_low, low);
+            }
+          } else {
+            __ LoadConst32(TMP, low);
+            __ Or(dst_low, lhs_low, TMP);
+          }
+          if (IsUint<16>(high)) {
+            if (dst_high != lhs_high || high != 0) {
+              __ Ori(dst_high, lhs_high, high);
+            }
+          } else {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ Or(dst_high, lhs_high, TMP);
+          }
+        } else if (instruction->IsXor()) {
+          uint32_t low = Low32Bits(value);
+          uint32_t high = High32Bits(value);
+          if (IsUint<16>(low)) {
+            if (dst_low != lhs_low || low != 0) {
+              __ Xori(dst_low, lhs_low, low);
+            }
+          } else {
+            __ LoadConst32(TMP, low);
+            __ Xor(dst_low, lhs_low, TMP);
+          }
+          if (IsUint<16>(high)) {
+            if (dst_high != lhs_high || high != 0) {
+              __ Xori(dst_high, lhs_high, high);
+            }
+          } else {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ Xor(dst_high, lhs_high, TMP);
+          }
+        } else if (instruction->IsAnd()) {
+          uint32_t low = Low32Bits(value);
+          uint32_t high = High32Bits(value);
+          if (IsUint<16>(low)) {
+            __ Andi(dst_low, lhs_low, low);
+          } else if (low != 0xFFFFFFFF) {
+            __ LoadConst32(TMP, low);
+            __ And(dst_low, lhs_low, TMP);
+          } else if (dst_low != lhs_low) {
+            __ Move(dst_low, lhs_low);
+          }
+          if (IsUint<16>(high)) {
+            __ Andi(dst_high, lhs_high, high);
+          } else if (high != 0xFFFFFFFF) {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ And(dst_high, lhs_high, TMP);
+          } else if (dst_high != lhs_high) {
+            __ Move(dst_high, lhs_high);
+          }
+        } else {
+          if (instruction->IsSub()) {
+            value = -value;
+          } else {
+            DCHECK(instruction->IsAdd());
+          }
+          int32_t low = Low32Bits(value);
+          int32_t high = High32Bits(value);
+          if (IsInt<16>(low)) {
+            if (dst_low != lhs_low || low != 0) {
+              __ Addiu(dst_low, lhs_low, low);
+            }
+            if (low != 0) {
+              __ Sltiu(AT, dst_low, low);
+            }
+          } else {
+            __ LoadConst32(TMP, low);
+            __ Addu(dst_low, lhs_low, TMP);
+            __ Sltu(AT, dst_low, TMP);
+          }
+          if (IsInt<16>(high)) {
+            if (dst_high != lhs_high || high != 0) {
+              __ Addiu(dst_high, lhs_high, high);
+            }
+          } else {
+            if (high != low) {
+              __ LoadConst32(TMP, high);
+            }
+            __ Addu(dst_high, lhs_high, TMP);
+          }
+          if (low != 0) {
+            __ Addu(dst_high, dst_high, AT);
+          }
+        }
       }
       break;
     }
@@ -1416,12 +1517,15 @@
   Primitive::Type type = instr->GetResultType();
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
       locations->SetOut(Location::RequiresRegister());
       break;
-    }
     default:
       LOG(FATAL) << "Unexpected shift type " << type;
   }
@@ -1440,6 +1544,8 @@
   int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
   uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
   uint32_t shift_value = rhs_imm & shift_mask;
+  // Is the INS (Insert Bit Field) instruction supported?
+  bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 
   switch (type) {
     case Primitive::kPrimInt: {
@@ -1474,21 +1580,37 @@
           if (shift_value == 0) {
             codegen_->Move64(locations->Out(), locations->InAt(0));
           } else if (shift_value < kMipsBitsPerWord) {
-            if (instr->IsShl()) {
-              __ Sll(dst_low, lhs_low, shift_value);
-              __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value);
-              __ Sll(dst_high, lhs_high, shift_value);
-              __ Or(dst_high, dst_high, TMP);
-            } else if (instr->IsShr()) {
-              __ Sra(dst_high, lhs_high, shift_value);
-              __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
-              __ Srl(dst_low, lhs_low, shift_value);
-              __ Or(dst_low, dst_low, TMP);
+            if (has_ins) {
+              if (instr->IsShl()) {
+                __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+                __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value);
+                __ Sll(dst_low, lhs_low, shift_value);
+              } else if (instr->IsShr()) {
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                __ Sra(dst_high, lhs_high, shift_value);
+              } else {
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                __ Srl(dst_high, lhs_high, shift_value);
+              }
             } else {
-              __ Srl(dst_high, lhs_high, shift_value);
-              __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
-              __ Srl(dst_low, lhs_low, shift_value);
-              __ Or(dst_low, dst_low, TMP);
+              if (instr->IsShl()) {
+                __ Sll(dst_low, lhs_low, shift_value);
+                __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value);
+                __ Sll(dst_high, lhs_high, shift_value);
+                __ Or(dst_high, dst_high, TMP);
+              } else if (instr->IsShr()) {
+                __ Sra(dst_high, lhs_high, shift_value);
+                __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Or(dst_low, dst_low, TMP);
+              } else {
+                __ Srl(dst_high, lhs_high, shift_value);
+                __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Or(dst_low, dst_low, TMP);
+              }
             }
           } else {
             shift_value -= kMipsBitsPerWord;
@@ -2092,7 +2214,7 @@
   }
 }
 
-void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->InputAt(0)->GetType()) {
     default:
@@ -2112,7 +2234,7 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
@@ -2192,8 +2314,7 @@
   Register out = locations->Out().AsRegister<Register>();
   Register dividend = locations->InAt(0).AsRegister<Register>();
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
-  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (instruction->IsDiv()) {
@@ -2296,7 +2417,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -3244,6 +3365,18 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   Primitive::Type field_type = field_info.GetFieldType();
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
@@ -4792,83 +4925,83 @@
 }
 
 void LocationsBuilderMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 1ee6bde..38302ad 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -185,6 +185,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -220,6 +221,7 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 1e428a0..7682ca7 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1852,7 +1852,7 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->InputAt(0)->GetType()) {
     default:
@@ -1872,7 +1872,7 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
@@ -1955,8 +1955,7 @@
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
   int64_t imm = Int64FromConstant(second.GetConstant());
-  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (instruction->IsDiv()) {
@@ -2138,7 +2137,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -2745,6 +2744,18 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
                                             const FieldInfo& field_info ATTRIBUTE_UNUSED) {
   LocationSummary* locations =
@@ -4075,83 +4086,83 @@
 }
 
 void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 1593cec..60ff96d 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -189,6 +189,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -224,6 +225,7 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7a5b8db..4a0c2f4 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1335,9 +1335,10 @@
 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
-                                                  Label* true_label,
-                                                  Label* false_label) {
+                                                  LabelType* true_label,
+                                                  LabelType* false_label) {
   if (cond->IsFPConditionTrueIfNaN()) {
     __ j(kUnordered, true_label);
   } else if (cond->IsFPConditionFalseIfNaN()) {
@@ -1346,9 +1347,10 @@
   __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
-                                                               Label* true_label,
-                                                               Label* false_label) {
+                                                               LabelType* true_label,
+                                                               LabelType* false_label) {
   LocationSummary* locations = cond->GetLocations();
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
@@ -1437,14 +1439,15 @@
   __ j(final_condition, true_label);
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
-                                                               Label* true_target_in,
-                                                               Label* false_target_in) {
+                                                               LabelType* true_target_in,
+                                                               LabelType* false_target_in) {
   // Generated branching requires both targets to be explicit. If either of the
   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
-  Label fallthrough_target;
-  Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
-  Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+  LabelType fallthrough_target;
+  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
   LocationSummary* locations = condition->GetLocations();
   Location left = locations->InAt(0);
@@ -1486,10 +1489,11 @@
          !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
                                                         size_t condition_input_index,
-                                                        Label* true_target,
-                                                        Label* false_target) {
+                                                        LabelType* true_target,
+                                                        LabelType* false_target) {
   HInstruction* cond = instruction->InputAt(condition_input_index);
 
   if (true_target == nullptr && false_target == nullptr) {
@@ -1554,7 +1558,7 @@
 
     Location lhs = condition->GetLocations()->InAt(0);
     Location rhs = condition->GetLocations()->InAt(1);
-    // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
+    // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
     if (rhs.IsRegister()) {
       __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
     } else if (rhs.IsConstant()) {
@@ -1613,7 +1617,19 @@
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
-                        /* false_target */ nullptr);
+                        /* false_target */ static_cast<Label*>(nullptr));
+}
+
+void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
+  codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
 void LocationsBuilderX86::VisitLocal(HLocal* local) {
@@ -1659,7 +1675,7 @@
 void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
-void LocationsBuilderX86::VisitCondition(HCondition* cond) {
+void LocationsBuilderX86::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1692,7 +1708,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1701,7 +1717,7 @@
   Location lhs = locations->InAt(0);
   Location rhs = locations->InAt(1);
   Register reg = locations->Out().AsRegister<Register>();
-  Label true_label, false_label;
+  NearLabel true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default: {
@@ -1753,83 +1769,83 @@
 }
 
 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
@@ -3211,11 +3227,12 @@
   Register out_register = locations->Out().AsRegister<Register>();
   Register input_register = locations->InAt(0).AsRegister<Register>();
   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
 
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
   Register num = locations->GetTemp(0).AsRegister<Register>();
 
-  __ leal(num, Address(input_register, std::abs(imm) - 1));
+  __ leal(num, Address(input_register, abs_imm - 1));
   __ testl(input_register, input_register);
   __ cmovl(kGreaterEqual, num, input_register);
   int shift = CTZ(imm);
@@ -3328,7 +3345,7 @@
           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
         } else if (imm == 1 || imm == -1) {
           DivRemOneOrMinusOne(instruction);
-        } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+        } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
           DivByPowerOfTwo(instruction->AsDiv());
         } else {
           DCHECK(imm <= -2 || imm >= 2);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f0ead03..df73476 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -167,6 +167,7 @@
  private:
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void HandleInvoke(HInvoke* invoke);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -213,6 +214,7 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem* rem);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
@@ -265,15 +267,22 @@
 
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
+  template<class LabelType>
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
-                             Label* true_target,
-                             Label* false_target);
+                             LabelType* true_target,
+                             LabelType* false_target);
+  template<class LabelType>
   void GenerateCompareTestAndBranch(HCondition* condition,
-                                    Label* true_target,
-                                    Label* false_target);
-  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
-  void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
+                                    LabelType* true_target,
+                                    LabelType* false_target);
+  template<class LabelType>
+  void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
+  template<class LabelType>
+  void GenerateLongComparesAndJumps(HCondition* cond,
+                                    LabelType* true_label,
+                                    LabelType* false_label);
+
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
   void GenPackedSwitchWithCompares(Register value_reg,
                                    int32_t lower_bound,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 1e6d506..ec62d84 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -786,7 +786,7 @@
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
-      __ gs()->movl(temp.AsRegister<CpuRegister>(),
+      __ gs()->movq(temp.AsRegister<CpuRegister>(),
                     Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
@@ -1370,9 +1370,10 @@
 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
-                                                     Label* true_label,
-                                                     Label* false_label) {
+                                                     LabelType* true_label,
+                                                     LabelType* false_label) {
   if (cond->IsFPConditionTrueIfNaN()) {
     __ j(kUnordered, true_label);
   } else if (cond->IsFPConditionFalseIfNaN()) {
@@ -1381,14 +1382,15 @@
   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
-                                                                  Label* true_target_in,
-                                                                  Label* false_target_in) {
+                                                                  LabelType* true_target_in,
+                                                                  LabelType* false_target_in) {
   // Generated branching requires both targets to be explicit. If either of the
   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
-  Label fallthrough_target;
-  Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
-  Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+  LabelType fallthrough_target;
+  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
   LocationSummary* locations = condition->GetLocations();
   Location left = locations->InAt(0);
@@ -1470,10 +1472,11 @@
          !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
 }
 
+template<class LabelType>
 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
                                                            size_t condition_input_index,
-                                                           Label* true_target,
-                                                           Label* false_target) {
+                                                           LabelType* true_target,
+                                                           LabelType* false_target) {
   HInstruction* cond = instruction->InputAt(condition_input_index);
 
   if (true_target == nullptr && false_target == nullptr) {
@@ -1597,7 +1600,19 @@
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
-                        /* false_target */ nullptr);
+                        /* false_target */ static_cast<Label*>(nullptr));
+}
+
+void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
+  codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
 void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
@@ -1643,7 +1658,7 @@
 void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
-void LocationsBuilderX86_64::VisitCondition(HCondition* cond) {
+void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1667,7 +1682,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1676,7 +1691,7 @@
   Location lhs = locations->InAt(0);
   Location rhs = locations->InAt(1);
   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
-  Label true_label, false_label;
+  NearLabel true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default:
@@ -1765,83 +1780,83 @@
 }
 
 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
@@ -3339,13 +3354,13 @@
   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
 
   int64_t imm = Int64FromConstant(second.GetConstant());
-
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint64_t abs_imm = AbsOrMin(imm);
 
   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
 
   if (instruction->GetResultType() == Primitive::kPrimInt) {
-    __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+    __ leal(tmp, Address(numerator, abs_imm - 1));
     __ testl(numerator, numerator);
     __ cmov(kGreaterEqual, tmp, numerator);
     int shift = CTZ(imm);
@@ -3360,7 +3375,7 @@
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
 
-    codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
+    codegen_->Load64BitValue(rdx, abs_imm - 1);
     __ addq(rdx, numerator);
     __ testq(numerator, numerator);
     __ cmov(kGreaterEqual, rdx, numerator);
@@ -3518,7 +3533,7 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
       DivByPowerOfTwo(instruction->AsDiv());
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -5739,7 +5754,7 @@
                                                            is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
-  Label done;
+  NearLabel done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ testl(obj, obj);
@@ -6377,7 +6392,7 @@
     if (index != num_entries) {
       // There are an odd number of entries. Handle the last one.
       DCHECK_EQ(index + 1, num_entries);
-      __ cmpl(value_reg_in, Immediate(lower_bound + index));
+      __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
     }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index e5a487c..c5e8a04 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -172,6 +172,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction);
@@ -213,6 +214,7 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
 
   void HandleFieldSet(HInstruction* instruction,
@@ -256,14 +258,18 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
+  template<class LabelType>
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
-                             Label* true_target,
-                             Label* false_target);
+                             LabelType* true_target,
+                             LabelType* false_target);
+  template<class LabelType>
   void GenerateCompareTestAndBranch(HCondition* condition,
-                                    Label* true_target,
-                                    Label* false_target);
-  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
+                                    LabelType* true_target,
+                                    LabelType* false_target);
+  template<class LabelType>
+  void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
+
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   X86_64Assembler* const assembler_;
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index e469c8d..a8f65bf 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -32,7 +32,7 @@
 /**
  * Fixture class for the constant folding and dce tests.
  */
-class ConstantFoldingTest : public testing::Test {
+class ConstantFoldingTest : public CommonCompilerTest {
  public:
   ConstantFoldingTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -56,7 +56,7 @@
                             const std::string& expected_after_dce,
                             std::function<void(HGraph*)> check_after_cf) {
     ASSERT_NE(graph_, nullptr);
-    graph_->TryBuildingSsa();
+    TransformToSsa(graph_);
 
     StringPrettyPrinter printer_before(graph_);
     printer_before.VisitInsertionOrder();
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 02e5dab..67ff87a 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -165,6 +165,7 @@
       if (!inst->HasSideEffects()
           && !inst->CanThrow()
           && !inst->IsSuspendCheck()
+          && !inst->IsNativeDebugInfo()
           // If we added an explicit barrier then we should keep it.
           && !inst->IsMemoryBarrier()
           && !inst->IsParameterValue()
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 2c6a1ef..f0f98ef 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -26,6 +26,8 @@
 
 namespace art {
 
+class DeadCodeEliminationTest : public CommonCompilerTest {};
+
 static void TestCode(const uint16_t* data,
                      const std::string& expected_before,
                      const std::string& expected_after) {
@@ -34,7 +36,7 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
 
   StringPrettyPrinter printer_before(graph);
   printer_before.VisitInsertionOrder();
@@ -55,7 +57,6 @@
   ASSERT_EQ(actual_after, expected_after);
 }
 
-
 /**
  * Small three-register program.
  *
@@ -69,7 +70,7 @@
  * L1: v2 <- v0 + v1            5.      add-int v2, v0, v1
  *     return-void              7.      return
  */
-TEST(DeadCodeElimination, AdditionAndConditionalJump) {
+TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
@@ -131,7 +132,7 @@
  * L3: v2 <- v1 + 4             11.     add-int/lit16 v2, v1, #+4
  *     return                   13.     return-void
  */
-TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) {
+TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index dfc363f..6d0bdbe 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -24,6 +24,7 @@
 #include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -594,6 +595,17 @@
       }
     }
   }
+
+  // Ensure that reference type instructions have reference type info.
+  if (instruction->GetType() == Primitive::kPrimNot) {
+    ScopedObjectAccess soa(Thread::Current());
+    if (!instruction->GetReferenceTypeInfo().IsValid()) {
+      AddError(StringPrintf("Reference type instruction %s:%d does not have "
+                            "valid reference type information.",
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
+  }
 }
 
 static Primitive::Type PrimitiveKind(Primitive::Type type) {
@@ -751,6 +763,14 @@
                                 phi->GetId(),
                                 phi->GetRegNumber(),
                                 type_str.str().c_str()));
+        } else if (phi->GetType() == Primitive::kPrimNot) {
+          std::stringstream type_str;
+          type_str << other_phi->GetType();
+          AddError(StringPrintf(
+              "Equivalent non-reference phi (%d) found for VReg %d with type: %s.",
+              phi->GetId(),
+              phi->GetRegNumber(),
+              type_str.str().c_str()));
         } else {
           ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
           if (!IsConstantEquivalent(phi, other_phi, &visited)) {
@@ -901,4 +921,16 @@
   }
 }
 
+void SSAChecker::VisitBoundType(HBoundType* instruction) {
+  VisitInstruction(instruction);
+
+  ScopedObjectAccess soa(Thread::Current());
+  if (!instruction->GetUpperBound().IsValid()) {
+    AddError(StringPrintf(
+        "%s %d does not have a valid upper bound RTI.",
+        instruction->DebugName(),
+        instruction->GetId()));
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index d5ddbab..2e16bfe 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -128,6 +128,7 @@
   void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
   void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
   void VisitConstant(HConstant* instruction) OVERRIDE;
+  void VisitBoundType(HBoundType* instruction) OVERRIDE;
 
   void HandleBooleanInput(HInstruction* instruction, size_t input_index);
 
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index fee56c7..d10df4c 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -17,8 +17,6 @@
 #include "graph_checker.h"
 #include "optimizing_unit_test.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
 /**
@@ -43,7 +41,6 @@
   return graph;
 }
 
-
 static void TestCode(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -61,8 +58,7 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  graph->BuildDominatorTree();
-  graph->TransformToSsa();
+  TransformToSsa(graph);
 
   SSAChecker ssa_checker(graph);
   ssa_checker.Run();
@@ -145,7 +141,9 @@
   ASSERT_FALSE(graph_checker.IsValid());
 }
 
-TEST(SSAChecker, SSAPhi) {
+class SSACheckerTest : public CommonCompilerTest {};
+
+TEST_F(SSACheckerTest, SSAPhi) {
   // This code creates one Phi function during the conversion to SSA form.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e9fdb84..5f1328f 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -30,6 +30,7 @@
 #include "optimization.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
+#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 
@@ -505,7 +506,7 @@
       } else {
         StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
       }
-    } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)
+    } else if ((IsPass(SsaBuilder::kSsaBuilderPassName)
         || IsPass(HInliner::kInlinerPassName))
         && (instruction->GetType() == Primitive::kPrimNot)) {
       ReferenceTypeInfo info = instruction->IsLoadClass()
@@ -519,21 +520,15 @@
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
       } else if (instruction->IsLoadClass()) {
         StartAttributeStream("klass") << "unresolved";
-      } else if (instruction->IsNullConstant()) {
+      } else {
         // The NullConstant may be added to the graph during other passes that happen between
         // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
         // doesn't run or doesn't inline anything, the NullConstant remains untyped.
         // So we should check NullConstants for validity only after reference type propagation.
-        //
-        // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
-        // for the benefits.
-        StartAttributeStream("klass") << "not_set";
-        DCHECK(!is_after_pass_
-            || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
-            << " Expected a valid rti after reference type propagation";
-      } else {
-        DCHECK(!is_after_pass_)
-            << "Expected a valid rti after reference type propagation";
+        DCHECK(graph_in_bad_state_ ||
+               (!is_after_pass_ && IsPass(SsaBuilder::kSsaBuilderPassName)))
+            << instruction->DebugName() << instruction->GetId() << " has invalid rti "
+            << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_;
       }
     }
     if (disasm_info_ != nullptr) {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 78cb7d4..1f4eaf3 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -21,11 +21,11 @@
 #include "optimizing_unit_test.h"
 #include "side_effects_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
-TEST(GVNTest, LocalFieldElimination) {
+class GVNTest : public CommonCompilerTest {};
+
+TEST_F(GVNTest, LocalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   ScopedNullHandle<mirror::DexCache> dex_cache;
@@ -100,7 +100,7 @@
   ASSERT_EQ(different_offset->GetBlock(), block);
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -110,7 +110,7 @@
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 }
 
-TEST(GVNTest, GlobalFieldElimination) {
+TEST_F(GVNTest, GlobalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   ScopedNullHandle<mirror::DexCache> dex_cache;
@@ -182,7 +182,7 @@
                                                           0));
   join->AddInstruction(new (&allocator) HExit());
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -193,7 +193,7 @@
   ASSERT_TRUE(join->GetFirstInstruction()->IsExit());
 }
 
-TEST(GVNTest, LoopFieldElimination) {
+TEST_F(GVNTest, LoopFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   ScopedNullHandle<mirror::DexCache> dex_cache;
@@ -288,7 +288,7 @@
   ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body);
   ASSERT_EQ(field_get_in_exit->GetBlock(), exit);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   {
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
@@ -316,7 +316,7 @@
 }
 
 // Test that inner loops affect the side effects of the outer loop.
-TEST(GVNTest, LoopSideEffects) {
+TEST_F(GVNTest, LoopSideEffects) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   ScopedNullHandle<mirror::DexCache> dex_cache;
@@ -364,7 +364,7 @@
   inner_loop_exit->AddInstruction(new (&allocator) HGoto());
   outer_loop_exit->AddInstruction(new (&allocator) HExit());
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
 
   ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
       *outer_loop_header->GetLoopInformation()));
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 19e6cbd..eef6cef 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -706,7 +706,6 @@
     }
   }
   if (loop->IsDefinedOutOfTheLoop(instruction)) {
-    DCHECK(instruction->GetBlock()->Dominates(loop->GetPreHeader()));
     InductionInfo* info = CreateInvariantFetch(instruction);
     AssignInfo(loop, instruction, info);
     return info;
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 5de94f4..29a1845 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -18,7 +18,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "induction_var_analysis.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
@@ -28,7 +27,7 @@
 /**
  * Fixture class for the InductionVarAnalysis tests.
  */
-class InductionVarAnalysisTest : public testing::Test {
+class InductionVarAnalysisTest : public CommonCompilerTest {
  public:
   InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -86,6 +85,7 @@
     constant0_ = graph_->GetIntConstant(0);
     constant1_ = graph_->GetIntConstant(1);
     constant100_ = graph_->GetIntConstant(100);
+    float_constant0_ = graph_->GetFloatConstant(0.0f);
     induc_ = new (&allocator_) HLocal(n);
     entry_->AddInstruction(induc_);
     entry_->AddInstruction(new (&allocator_) HStoreLocal(induc_, constant0_));
@@ -102,6 +102,7 @@
       basic_[d] = new (&allocator_) HLocal(d);
       entry_->AddInstruction(basic_[d]);
       loop_preheader_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], constant0_));
+      loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto());
       HInstruction* load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt);
       loop_header_[d]->AddInstruction(load);
       HInstruction* compare = new (&allocator_) HLessThan(load, constant100_);
@@ -156,8 +157,10 @@
   HInstruction* InsertArrayStore(HLocal* subscript, int d) {
     HInstruction* load = InsertInstruction(
         new (&allocator_) HLoadLocal(subscript, Primitive::kPrimInt), d);
+    // ArraySet is given a float value in order to avoid SsaBuilder typing
+    // it from the array's non-existent reference type info.
     return InsertInstruction(new (&allocator_) HArraySet(
-        parameter_, load, constant0_, Primitive::kPrimInt, 0), d);
+        parameter_, load, float_constant0_, Primitive::kPrimFloat, 0), d);
   }
 
   // Returns induction information of instruction in loop at depth d.
@@ -168,7 +171,7 @@
 
   // Performs InductionVarAnalysis (after proper set up).
   void PerformInductionVarAnalysis() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    TransformToSsa(graph_);
     iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
     iva_->Run();
   }
@@ -187,6 +190,7 @@
   HInstruction* constant0_;
   HInstruction* constant1_;
   HInstruction* constant100_;
+  HInstruction* float_constant0_;
   HLocal* induc_;  // "vreg_n", the "k"
   HLocal* tmp_;    // "vreg_n+1"
   HLocal* dum_;    // "vreg_n+2"
@@ -212,7 +216,7 @@
   //   ..
   // }
   BuildLoopNest(10);
-  ASSERT_TRUE(graph_->TryBuildingSsa());
+  TransformToSsa(graph_);
   ASSERT_EQ(entry_->GetLoopInformation(), nullptr);
   for (int d = 0; d < 1; d++) {
     ASSERT_EQ(loop_preheader_[d]->GetLoopInformation(),
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 5c0bdd7..eda9c01 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -16,7 +16,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "induction_var_analysis.h"
 #include "induction_var_range.h"
 #include "nodes.h"
@@ -29,7 +28,7 @@
 /**
  * Fixture class for the InductionVarRange tests.
  */
-class InductionVarRangeTest : public testing::Test {
+class InductionVarRangeTest : public CommonCompilerTest {
  public:
   InductionVarRangeTest()
       : pool_(),
@@ -113,7 +112,7 @@
 
   /** Constructs SSA and performs induction variable analysis. */
   void PerformInductionVarAnalysis() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    TransformToSsa(graph_);
     iva_->Run();
   }
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a4dcb3a..48d3299 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -33,6 +33,7 @@
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
 #include "sharpening.h"
+#include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
@@ -41,7 +42,14 @@
 
 namespace art {
 
-static constexpr size_t kMaximumNumberOfHInstructions = 12;
+static constexpr size_t kMaximumNumberOfHInstructions = 32;
+
+// Limit the number of dex registers that we accumulate while inlining
+// to avoid creating large amount of nested environments.
+static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
+
+// Avoid inlining within a huge method due to memory pressure.
+static constexpr size_t kMaximumCodeUnitSize = 4096;
 
 void HInliner::Run() {
   const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
@@ -49,6 +57,9 @@
       || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
     return;
   }
+  if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
+    return;
+  }
   if (graph_->IsDebuggable()) {
     // For simplicity, we currently never inline when the graph is debuggable. This avoids
     // doing some logic in the runtime to discover if a method could have been inlined.
@@ -215,6 +226,7 @@
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method;
+  ArtMethod* actual_method = nullptr;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
     if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
       VLOG(compiler) << "Not inlining a String.<init> method";
@@ -226,9 +238,15 @@
         : class_linker->FindDexCache(soa.Self(), *ref.dex_file);
     resolved_method = dex_cache->GetResolvedMethod(
         ref.dex_method_index, class_linker->GetImagePointerSize());
+    // actual_method == resolved_method for direct or static calls.
+    actual_method = resolved_method;
   } else {
     resolved_method = caller_compilation_unit_.GetDexCache().Get()->GetResolvedMethod(
         method_index, class_linker->GetImagePointerSize());
+    if (resolved_method != nullptr) {
+      // Check if we can statically find the method.
+      actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
+    }
   }
 
   if (resolved_method == nullptr) {
@@ -238,15 +256,10 @@
     return false;
   }
 
-  if (invoke_instruction->IsInvokeStaticOrDirect()) {
-    return TryInline(invoke_instruction, resolved_method);
-  }
-
-  // Check if we can statically find the method.
-  ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
   if (actual_method != nullptr) {
     return TryInline(invoke_instruction, actual_method);
   }
+  DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
 
   // Check if we can use an inline cache.
   ArtMethod* caller = graph_->GetArtMethod();
@@ -372,6 +385,18 @@
 
 bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+
+  // Check whether we're allowed to inline. The outermost compilation unit is the relevant
+  // dex file here (though the transitivity of an inline chain would allow checking the calller).
+  if (!compiler_driver_->MayInline(method->GetDexFile(),
+                                   outer_compilation_unit_.GetDexFile())) {
+    VLOG(compiler) << "Won't inline " << PrettyMethod(method) << " in "
+                   << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
+                   << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
+                   << method->GetDexFile()->GetLocation();
+    return false;
+  }
+
   uint32_t method_index = FindMethodIndexIn(
       method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
   if (method_index == DexFile::kDexNoIndex) {
@@ -514,7 +539,7 @@
     return false;
   }
 
-  if (!callee_graph->TryBuildingSsa()) {
+  if (callee_graph->TryBuildingSsa(handles_) != kBuildSsaSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be transformed to SSA";
     return false;
@@ -549,14 +574,12 @@
   // Run simple optimizations on the graph.
   HDeadCodeElimination dce(callee_graph, stats_);
   HConstantFolding fold(callee_graph);
-  ReferenceTypePropagation type_propagation(callee_graph, handles_);
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
-    &type_propagation,
     &sharpening,
     &simplify,
     &fold,
@@ -578,6 +601,7 @@
                      compiler_driver_,
                      handles_,
                      stats_,
+                     total_number_of_dex_registers_ + code_item->registers_size_,
                      depth_ + 1);
     inliner.Run();
     number_of_instructions_budget += inliner.number_of_inlined_instructions_;
@@ -609,6 +633,10 @@
   HReversePostOrderIterator it(*callee_graph);
   it.Advance();  // Past the entry block, it does not contain instructions that prevent inlining.
   size_t number_of_instructions = 0;
+
+  bool can_inline_environment =
+      total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
+
   for (; !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
@@ -622,10 +650,17 @@
          instr_it.Advance()) {
       if (number_of_instructions++ ==  number_of_instructions_budget) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                       << " could not be inlined because it is too big.";
+                       << " is not inlined because its caller has reached"
+                       << " its instruction budget limit.";
         return false;
       }
       HInstruction* current = instr_it.Current();
+      if (!can_inline_environment && current->NeedsEnvironment()) {
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " is not inlined because its caller has reached"
+                       << " its environment budget limit.";
+        return false;
+      }
 
       if (current->IsInvokeInterface()) {
         // Disable inlining of interface calls. The cost in case of entering the
@@ -677,42 +712,36 @@
     DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph());
   }
 
-  // When merging the graph we might create a new NullConstant in the caller graph which does
-  // not have the chance to be typed. We assign the correct type here so that we can keep the
-  // assertion that every reference has a valid type. This also simplifies checks along the way.
-  HNullConstant* null_constant = graph_->GetNullConstant();
-  if (!null_constant->GetReferenceTypeInfo().IsValid()) {
-    ReferenceTypeInfo::TypeHandle obj_handle =
-        handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
-    null_constant->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
-  }
-
   // Check the integrity of reference types and run another type propagation if needed.
-  if ((return_replacement != nullptr)
-      && (return_replacement->GetType() == Primitive::kPrimNot)) {
-    if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
-      // Make sure that we have a valid type for the return. We may get an invalid one when
-      // we inline invokes with multiple branches and create a Phi for the result.
-      // TODO: we could be more precise by merging the phi inputs but that requires
-      // some functionality from the reference type propagation.
-      DCHECK(return_replacement->IsPhi());
-      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-      ReferenceTypeInfo::TypeHandle return_handle =
-          handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
-      return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
-         return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
-    }
+  if (return_replacement != nullptr) {
+    if (return_replacement->GetType() == Primitive::kPrimNot) {
+      if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
+        // Make sure that we have a valid type for the return. We may get an invalid one when
+        // we inline invokes with multiple branches and create a Phi for the result.
+        // TODO: we could be more precise by merging the phi inputs but that requires
+        // some functionality from the reference type propagation.
+        DCHECK(return_replacement->IsPhi());
+        size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+        ReferenceTypeInfo::TypeHandle return_handle =
+            handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
+        return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
+            return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
+      }
 
-    if (do_rtp) {
-      // If the return type is a refinement of the declared type run the type propagation again.
-      ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-      ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-      if (invoke_rti.IsStrictSupertypeOf(return_rti)
-          || (return_rti.IsExact() && !invoke_rti.IsExact())
-          || !return_replacement->CanBeNull()) {
-        ReferenceTypePropagation rtp_fixup(graph_, handles_);
-        rtp_fixup.Run();
+      if (do_rtp) {
+        // If the return type is a refinement of the declared type run the type propagation again.
+        ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+        ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+        if (invoke_rti.IsStrictSupertypeOf(return_rti)
+            || (return_rti.IsExact() && !invoke_rti.IsExact())
+            || !return_replacement->CanBeNull()) {
+          ReferenceTypePropagation(graph_, handles_).Run();
+        }
+      }
+    } else if (return_replacement->IsInstanceOf()) {
+      if (do_rtp) {
+        // Inlining InstanceOf into an If may put a tighter bound on reference types.
+        ReferenceTypePropagation(graph_, handles_).Run();
       }
     }
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7b9fb73..8de510e 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -40,13 +40,15 @@
            CompilerDriver* compiler_driver,
            StackHandleScopeCollection* handles,
            OptimizingCompilerStats* stats,
-           size_t depth = 0)
+           size_t total_number_of_dex_registers,
+           size_t depth)
       : HOptimization(outer_graph, kInlinerPassName, stats),
         outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
         codegen_(codegen),
         compiler_driver_(compiler_driver),
+        total_number_of_dex_registers_(total_number_of_dex_registers),
         depth_(depth),
         number_of_inlined_instructions_(0),
         handles_(handles) {}
@@ -88,6 +90,7 @@
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
   CompilerDriver* const compiler_driver_;
+  const size_t total_number_of_dex_registers_;
   const size_t depth_;
   size_t number_of_inlined_instructions_;
   StackHandleScopeCollection* const handles_;
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6a34b13..6bbc751 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -49,6 +49,7 @@
       GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value());
   HArm64IntermediateAddress* address =
       new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
+  address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
   access->GetBlock()->InsertInstructionBefore(address, access);
   access->ReplaceInput(address, 0);
   // Both instructions must depend on GC to prevent any instruction that can
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 7127215..c6da9a3 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -36,8 +36,8 @@
   switch (i) {
     case Intrinsics::kNone:
       return kInterface;  // Non-sensical for intrinsic.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \
-    case Intrinsics::k ## Name:               \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
       return IsStatic;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -52,8 +52,8 @@
   switch (i) {
     case Intrinsics::kNone:
       return kNeedsEnvironmentOrCache;  // Non-sensical for intrinsic.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \
-    case Intrinsics::k ## Name:               \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
       return NeedsEnvironmentOrCache;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -63,6 +63,38 @@
   return kNeedsEnvironmentOrCache;
 }
 
+// Function that returns whether an intrinsic has side effects.
+static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) {
+  switch (i) {
+    case Intrinsics::kNone:
+      return kAllSideEffects;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
+      return SideEffects;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return kAllSideEffects;
+}
+
+// Function that returns whether an intrinsic can throw exceptions.
+static inline IntrinsicExceptions GetExceptions(Intrinsics i) {
+  switch (i) {
+    case Intrinsics::kNone:
+      return kCanThrow;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+    case Intrinsics::k ## Name: \
+      return Exceptions;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return kCanThrow;
+}
+
 static Primitive::Type GetType(uint64_t data, bool is_op_size) {
   if (is_op_size) {
     switch (static_cast<OpSize>(data)) {
@@ -248,7 +280,7 @@
 
     // Thread.currentThread.
     case kIntrinsicCurrentThread:
-      return  Intrinsics::kThreadCurrentThread;
+      return Intrinsics::kThreadCurrentThread;
 
     // Memory.peek.
     case kIntrinsicPeek:
@@ -473,7 +505,10 @@
                   << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile())
                   << invoke->DebugName();
             } else {
-              invoke->SetIntrinsic(intrinsic, NeedsEnvironmentOrCache(intrinsic));
+              invoke->SetIntrinsic(intrinsic,
+                                   NeedsEnvironmentOrCache(intrinsic),
+                                   GetSideEffects(intrinsic),
+                                   GetExceptions(intrinsic));
             }
           }
         }
@@ -487,7 +522,7 @@
     case Intrinsics::kNone:
       os << "None";
       break;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
     case Intrinsics::k ## Name: \
       os << # Name; \
       break;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index e459516..9f50d18 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -27,6 +27,9 @@
 class CompilerDriver;
 class DexFile;
 
+// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
+static constexpr bool kRoundIsPlusPointFive = false;
+
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
@@ -54,9 +57,9 @@
     switch (invoke->GetIntrinsic()) {
       case Intrinsics::kNone:
         return;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \
-      case Intrinsics::k ## Name:             \
-        Visit ## Name(invoke);                \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \
+      case Intrinsics::k ## Name: \
+        Visit ## Name(invoke);    \
         return;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -69,7 +72,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment)                    \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \
   virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
   }
 #include "intrinsics_list.h"
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 4683aee..1e6b3a1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -502,9 +502,6 @@
                          bool is_volatile,
                          CodeGeneratorARM* codegen) {
   LocationSummary* locations = invoke->GetLocations();
-  DCHECK((type == Primitive::kPrimInt) ||
-         (type == Primitive::kPrimLong) ||
-         (type == Primitive::kPrimNot));
   ArmAssembler* assembler = codegen->GetAssembler();
   Location base_loc = locations->InAt(1);
   Register base = base_loc.AsRegister<Register>();             // Object pointer.
@@ -512,30 +509,67 @@
   Register offset = offset_loc.AsRegisterPairLow<Register>();  // Long offset, lo part only.
   Location trg_loc = locations->Out();
 
-  if (type == Primitive::kPrimLong) {
-    Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
-    __ add(IP, base, ShifterOperand(offset));
-    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-      Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
-      __ ldrexd(trg_lo, trg_hi, IP);
-    } else {
-      __ ldrd(trg_lo, Address(IP));
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register trg = trg_loc.AsRegister<Register>();
+      __ ldr(trg, Address(base, offset));
+      if (is_volatile) {
+        __ dmb(ISH);
+      }
+      break;
     }
-  } else {
-    Register trg = trg_loc.AsRegister<Register>();
-    __ ldr(trg, Address(base, offset));
-  }
 
-  if (is_volatile) {
-    __ dmb(ISH);
-  }
+    case Primitive::kPrimNot: {
+      Register trg = trg_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          if (is_volatile) {
+            __ dmb(ISH);
+          }
+        } else {
+          __ ldr(trg, Address(base, offset));
+          if (is_volatile) {
+            __ dmb(ISH);
+          }
+          codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ ldr(trg, Address(base, offset));
+        if (is_volatile) {
+          __ dmb(ISH);
+        }
+        __ MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
 
-  if (type == Primitive::kPrimNot) {
-    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    case Primitive::kPrimLong: {
+      Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+      __ add(IP, base, ShifterOperand(offset));
+      if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+        Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+        __ ldrexd(trg_lo, trg_hi, IP);
+      } else {
+        __ ldrd(trg_lo, Address(IP));
+      }
+      if (is_volatile) {
+        __ dmb(ISH);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -548,25 +582,30 @@
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 127e9a4..e01b6ff 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -40,7 +40,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -67,7 +67,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 9f6863c..f723940 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -614,7 +614,10 @@
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  CreateFPToIntPlusTempLocations(arena_, invoke);
+  // See intrinsics.h.
+  if (kRoundIsPlusPointFive) {
+    CreateFPToIntPlusTempLocations(arena_, invoke);
+  }
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
@@ -622,7 +625,10 @@
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  CreateFPToIntPlusTempLocations(arena_, invoke);
+  // See intrinsics.h.
+  if (kRoundIsPlusPointFive) {
+    CreateFPToIntPlusTempLocations(arena_, invoke);
+  }
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 4250ecf..d47448a 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -41,7 +41,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -65,7 +65,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 96f43a0..2e87546 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -22,97 +22,97 @@
 // environment.
 
 #define INTRINSICS_LIST(V) \
-  V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache) \
-  V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache) \
-  V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache) \
-  V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongReverse, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache) \
-  V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache) \
-  V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathCos, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathSin, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAcos, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAsin, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAtan, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathAtan2, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathCbrt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathCosh, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathExp, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathExpm1, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathHypot, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathLog, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathLog10, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathSinh, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathTan, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathTanh, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathRint, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache) \
-  V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache) \
-  V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache) \
-  V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache) \
-  V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache) \
-  V(StringCharAt, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringEquals, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache) \
-  V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache) \
-  V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache) \
-  V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache) \
-  V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePut, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache) \
-  V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache) \
-  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache)
+  V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathSin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAcos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathExpm1, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathHypot, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathLog, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathLog10, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathSinh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathTan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathTanh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathSqrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathCeil, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathFloor, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathRint, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+  V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
+  V(StringCharAt, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringEquals, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePut, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
+  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow)
 
 #endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
 #undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index 19ad525..f86b0ef 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -36,7 +36,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -60,7 +60,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 1481d24..4137fbd 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -36,7 +36,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -60,7 +60,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 8019062..677f2e9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -720,6 +720,11 @@
 // Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
 // as it needs 64 bit instructions.
 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
+  // See intrinsics.h.
+  if (!kRoundIsPlusPointFive) {
+    return;
+  }
+
   // Do we have instruction support?
   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
     LocationSummary* locations = new (arena_) LocationSummary(invoke,
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index fefe9c6..08bd197 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -36,7 +36,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -61,7 +61,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index aa1c109..690cf3d 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -610,7 +610,10 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
-  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+  // See intrinsics.h.
+  if (kRoundIsPlusPointFive) {
+    CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+  }
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -657,7 +660,10 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
-  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+  // See intrinsics.h.
+  if (kRoundIsPlusPointFive) {
+    CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+  }
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 6894e1b..155ff65 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -36,7 +36,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -61,7 +61,7 @@
 
   // Define visitor methods.
 
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
   void Visit ## Name(HInvoke* invoke) OVERRIDE;
 #include "intrinsics_list.h"
 INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 9ad003c..2b63ec8 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -16,7 +16,6 @@
 
 #include "base/arena_allocator.h"
 #include "builder.h"
-#include "gtest/gtest.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
@@ -27,7 +26,7 @@
 /**
  * Fixture class for the LICM tests.
  */
-class LICMTest : public testing::Test {
+class LICMTest : public CommonCompilerTest {
  public:
   LICMTest() : pool_(), allocator_(&pool_) {
     graph_ = CreateGraph(&allocator_);
@@ -66,20 +65,21 @@
     // Provide boiler-plate instructions.
     parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
     entry_->AddInstruction(parameter_);
-    constant_ = graph_->GetIntConstant(42);
+    int_constant_ = graph_->GetIntConstant(42);
+    float_constant_ = graph_->GetFloatConstant(42.0f);
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
     loop_header_->AddInstruction(new (&allocator_) HIf(parameter_));
     loop_body_->AddInstruction(new (&allocator_) HGoto());
+    return_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_->AddInstruction(new (&allocator_) HExit());
   }
 
   // Performs LICM optimizations (after proper set up).
   void PerformLICM() {
-    ASSERT_TRUE(graph_->TryBuildingSsa());
+    TransformToSsa(graph_);
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
-    LICM licm(graph_, side_effects);
-    licm.Run();
+    LICM(graph_, side_effects).Run();
   }
 
   // General building fields.
@@ -96,7 +96,8 @@
   HBasicBlock* exit_;
 
   HInstruction* parameter_;  // "this"
-  HInstruction* constant_;
+  HInstruction* int_constant_;
+  HInstruction* float_constant_;
 };
 
 //
@@ -119,7 +120,7 @@
                                                                 0);
   loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction());
   HInstruction* set_field = new (&allocator_) HInstanceFieldSet(
-      parameter_, constant_, Primitive::kPrimInt, MemberOffset(20),
+      parameter_, int_constant_, Primitive::kPrimInt, MemberOffset(20),
       false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), dex_cache, 0);
   loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction());
 
@@ -168,11 +169,13 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get array with different types.
+  // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to
+  // avoid SsaBuilder's typing of ambiguous array operations from reference type info.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong, 0);
+      parameter_, int_constant_, Primitive::kPrimByte, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, constant_, constant_, Primitive::kPrimInt, 0);
+      parameter_, int_constant_, float_constant_, Primitive::kPrimShort, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
@@ -186,11 +189,13 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get array with same types.
+  // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to
+  // avoid SsaBuilder's typing of ambiguous array operations from reference type info.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong, 0);
+      parameter_, int_constant_, Primitive::kPrimByte, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, get_array, constant_, Primitive::kPrimLong, 0);
+      parameter_, get_array, float_constant_, Primitive::kPrimByte, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index a059766..ed275b1 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -29,13 +29,12 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "pretty_printer.h"
-#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LinearizeTest : public CommonCompilerTest {};
+
 template <size_t number_of_blocks>
 static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) {
   ArenaPool pool;
@@ -46,7 +45,7 @@
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
 
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
 
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
@@ -60,7 +59,7 @@
   }
 }
 
-TEST(LinearizeTest, CFG1) {
+TEST_F(LinearizeTest, CFG1) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -85,7 +84,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG2) {
+TEST_F(LinearizeTest, CFG2) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -110,7 +109,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG3) {
+TEST_F(LinearizeTest, CFG3) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -137,7 +136,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG4) {
+TEST_F(LinearizeTest, CFG4) {
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -167,7 +166,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG5) {
+TEST_F(LinearizeTest, CFG5) {
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -197,7 +196,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG6) {
+TEST_F(LinearizeTest, CFG6) {
   //            Block0
   //              |
   //            Block1
@@ -223,7 +222,7 @@
   TestCode(data, blocks);
 }
 
-TEST(LinearizeTest, CFG7) {
+TEST_F(LinearizeTest, CFG7) {
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 7f67560..926f939 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -27,10 +27,10 @@
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LiveRangesTest : public CommonCompilerTest {};
+
 static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
   HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
@@ -39,13 +39,13 @@
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   return graph;
 }
 
-TEST(LiveRangesTest, CFG1) {
+TEST_F(LiveRangesTest, CFG1) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -83,7 +83,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG2) {
+TEST_F(LiveRangesTest, CFG2) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -131,7 +131,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG3) {
+TEST_F(LiveRangesTest, CFG3) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -204,7 +204,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, Loop1) {
+TEST_F(LiveRangesTest, Loop1) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -284,7 +284,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, Loop2) {
+TEST_F(LiveRangesTest, Loop2) {
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -360,7 +360,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
-TEST(LiveRangesTest, CFG4) {
+TEST_F(LiveRangesTest, CFG4) {
   /*
    * Test the following snippet:
    *  var a = 0;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 9d7d0b6..7736eed 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -27,10 +27,10 @@
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
+class LivenessTest : public CommonCompilerTest {};
+
 static void DumpBitVector(BitVector* vector,
                           std::ostream& buffer,
                           size_t count,
@@ -51,7 +51,7 @@
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
@@ -75,7 +75,7 @@
   ASSERT_STREQ(expected, buffer.str().c_str());
 }
 
-TEST(LivenessTest, CFG1) {
+TEST_F(LivenessTest, CFG1) {
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -98,7 +98,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG2) {
+TEST_F(LivenessTest, CFG2) {
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -120,7 +120,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG3) {
+TEST_F(LivenessTest, CFG3) {
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (000)\n"
@@ -149,7 +149,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG4) {
+TEST_F(LivenessTest, CFG4) {
   // var a;
   // if (0 == 0) {
   //   a = 5;
@@ -197,7 +197,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, CFG5) {
+TEST_F(LivenessTest, CFG5) {
   // var a = 0;
   // if (0 == 0) {
   // } else {
@@ -242,7 +242,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop1) {
+TEST_F(LivenessTest, Loop1) {
   // Simple loop with one preheader and one back edge.
   // var a = 0;
   // while (a == a) {
@@ -288,7 +288,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop3) {
+TEST_F(LivenessTest, Loop3) {
   // Test that the returned value stays live in a preceding loop.
   // var a = 0;
   // while (a == a) {
@@ -335,7 +335,7 @@
 }
 
 
-TEST(LivenessTest, Loop4) {
+TEST_F(LivenessTest, Loop4) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   // var a = 0;
@@ -387,7 +387,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop5) {
+TEST_F(LivenessTest, Loop5) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
@@ -443,7 +443,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop6) {
+TEST_F(LivenessTest, Loop6) {
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2)
   const char* expected =
@@ -494,7 +494,7 @@
 }
 
 
-TEST(LivenessTest, Loop7) {
+TEST_F(LivenessTest, Loop7) {
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2, phi in block 6)
   const char* expected =
@@ -548,7 +548,7 @@
   TestCode(data, expected);
 }
 
-TEST(LivenessTest, Loop8) {
+TEST_F(LivenessTest, Loop8) {
   // var a = 0;
   // while (a == a) {
   //   a = a + a;
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 727f2bb..2b313f6 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -678,16 +678,6 @@
     }
   }
 
-  static bool IsIntFloatAlias(Primitive::Type type1, Primitive::Type type2) {
-    return (type1 == Primitive::kPrimFloat && type2 == Primitive::kPrimInt) ||
-           (type2 == Primitive::kPrimFloat && type1 == Primitive::kPrimInt);
-  }
-
-  static bool IsLongDoubleAlias(Primitive::Type type1, Primitive::Type type2) {
-    return (type1 == Primitive::kPrimDouble && type2 == Primitive::kPrimLong) ||
-           (type2 == Primitive::kPrimDouble && type1 == Primitive::kPrimLong);
-  }
-
   void VisitGetLocation(HInstruction* instruction,
                         HInstruction* ref,
                         size_t offset,
@@ -716,22 +706,14 @@
       // Get the real heap value of the store.
       heap_value = store->InputAt(1);
     }
-    if ((heap_value != kUnknownHeapValue) &&
-        // Keep the load due to possible I/F, J/D array aliasing.
-        // See b/22538329 for details.
-        !IsIntFloatAlias(heap_value->GetType(), instruction->GetType()) &&
-        !IsLongDoubleAlias(heap_value->GetType(), instruction->GetType())) {
+    if (heap_value == kUnknownHeapValue) {
+      // Load isn't eliminated. Put the load as the value into the HeapLocation.
+      // This acts like GVN but with better aliasing analysis.
+      heap_values[idx] = instruction;
+    } else {
       removed_loads_.push_back(instruction);
       substitute_instructions_for_loads_.push_back(heap_value);
       TryRemovingNullCheck(instruction);
-      return;
-    }
-
-    // Load isn't eliminated.
-    if (heap_value == kUnknownHeapValue) {
-      // Put the load as the value into the HeapLocation.
-      // This acts like GVN but with better aliasing analysis.
-      heap_values[idx] = instruction;
     }
   }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 926bc156..6d4275d 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -198,10 +198,38 @@
   }
 }
 
-void HGraph::TransformToSsa() {
-  DCHECK(!reverse_post_order_.empty());
-  SsaBuilder ssa_builder(this);
-  ssa_builder.BuildSsa();
+BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) {
+  BuildDominatorTree();
+
+  // The SSA builder requires loops to all be natural. Specifically, the dead phi
+  // elimination phase checks the consistency of the graph when doing a post-order
+  // visit for eliminating dead phis: a dead phi can only have loop header phi
+  // users remaining when being visited.
+  BuildSsaResult result = AnalyzeNaturalLoops();
+  if (result != kBuildSsaSuccess) {
+    return result;
+  }
+
+  // Precompute per-block try membership before entering the SSA builder,
+  // which needs the information to build catch block phis from values of
+  // locals at throwing instructions inside try blocks.
+  ComputeTryBlockInformation();
+
+  // Create the inexact Object reference type and store it in the HGraph.
+  ScopedObjectAccess soa(Thread::Current());
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  inexact_object_rti_ = ReferenceTypeInfo::Create(
+      handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
+      /* is_exact */ false);
+
+  // Tranforms graph to SSA form.
+  result = SsaBuilder(this, handles).BuildSsa();
+  if (result != kBuildSsaSuccess) {
+    return result;
+  }
+
+  in_ssa_form_ = true;
+  return kBuildSsaSuccess;
 }
 
 HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) {
@@ -410,7 +438,7 @@
   }
 }
 
-bool HGraph::AnalyzeNaturalLoops() const {
+BuildSsaResult HGraph::AnalyzeNaturalLoops() const {
   // Order does not matter.
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
@@ -418,16 +446,16 @@
       if (block->IsCatchBlock()) {
         // TODO: Dealing with exceptional back edges could be tricky because
         //       they only approximate the real control flow. Bail out for now.
-        return false;
+        return kBuildSsaFailThrowCatchLoop;
       }
       HLoopInformation* info = block->GetLoopInformation();
       if (!info->Populate()) {
         // Abort if the loop is non natural. We currently bailout in such cases.
-        return false;
+        return kBuildSsaFailNonNaturalLoop;
       }
     }
   }
-  return true;
+  return kBuildSsaSuccess;
 }
 
 void HGraph::InsertConstant(HConstant* constant) {
@@ -446,8 +474,13 @@
   // id and/or any invariants the graph is assuming when adding new instructions.
   if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) {
     cached_null_constant_ = new (arena_) HNullConstant(dex_pc);
+    cached_null_constant_->SetReferenceTypeInfo(inexact_object_rti_);
     InsertConstant(cached_null_constant_);
   }
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(cached_null_constant_->GetReferenceTypeInfo().IsValid());
+  }
   return cached_null_constant_;
 }
 
@@ -777,6 +810,10 @@
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
 }
 
+HInstruction::InstructionKind HInstruction::GetKind() const {
+  return GetKindInternal();
+}
+
 HInstruction* HInstruction::GetNextDisregardingMoves() const {
   HInstruction* next = GetNext();
   while (next != nullptr && next->IsParallelMove()) {
@@ -960,7 +997,7 @@
   visitor->Visit##name(this);                                                  \
 }
 
-FOR_EACH_INSTRUCTION(DEFINE_ACCEPT)
+FOR_EACH_CONCRETE_INSTRUCTION(DEFINE_ACCEPT)
 
 #undef DEFINE_ACCEPT
 
@@ -2023,6 +2060,16 @@
   new_pre_header->SetTryCatchInformation(try_catch_info);
 }
 
+static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (rti.IsValid()) {
+    DCHECK(upper_bound_rti.IsSupertypeOf(rti))
+        << " upper_bound_rti: " << upper_bound_rti
+        << " rti: " << rti;
+    DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact());
+  }
+}
+
 void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
   if (kIsDebugBuild) {
     DCHECK_EQ(GetType(), Primitive::kPrimNot);
@@ -2031,16 +2078,23 @@
     if (IsBoundType()) {
       // Having the test here spares us from making the method virtual just for
       // the sake of a DCHECK.
-      ReferenceTypeInfo upper_bound_rti = AsBoundType()->GetUpperBound();
-      DCHECK(upper_bound_rti.IsSupertypeOf(rti))
-          << " upper_bound_rti: " << upper_bound_rti
-          << " rti: " << rti;
-      DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact());
+      CheckAgainstUpperBound(rti, AsBoundType()->GetUpperBound());
     }
   }
   reference_type_info_ = rti;
 }
 
+void HBoundType::SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null) {
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(upper_bound.IsValid());
+    DCHECK(!upper_bound_.IsValid()) << "Upper bound should only be set once.";
+    CheckAgainstUpperBound(GetReferenceTypeInfo(), upper_bound);
+  }
+  upper_bound_ = upper_bound;
+  upper_can_be_null_ = can_be_null;
+}
+
 ReferenceTypeInfo::ReferenceTypeInfo() : type_handle_(TypeHandle()), is_exact_(false) {}
 
 ReferenceTypeInfo::ReferenceTypeInfo(TypeHandle type_handle, bool is_exact)
@@ -2087,13 +2141,27 @@
 }
 
 void HInvoke::SetIntrinsic(Intrinsics intrinsic,
-                           IntrinsicNeedsEnvironmentOrCache needs_env_or_cache) {
+                           IntrinsicNeedsEnvironmentOrCache needs_env_or_cache,
+                           IntrinsicSideEffects side_effects,
+                           IntrinsicExceptions exceptions) {
   intrinsic_ = intrinsic;
   IntrinsicOptimizations opt(this);
   if (needs_env_or_cache == kNoEnvironmentOrCache) {
     opt.SetDoesNotNeedDexCache();
     opt.SetDoesNotNeedEnvironment();
   }
+  // Adjust method's side effects from intrinsic table.
+  switch (side_effects) {
+    case kNoSideEffects: SetSideEffects(SideEffects::None()); break;
+    case kReadSideEffects: SetSideEffects(SideEffects::AllReads()); break;
+    case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break;
+    case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break;
+  }
+  // Adjust method's exception status from intrinsic table.
+  switch (exceptions) {
+    case kNoThrow: SetCanThrow(false); break;
+    case kCanThrow: SetCanThrow(true); break;
+  }
 }
 
 bool HInvoke::NeedsEnvironment() const {
@@ -2220,4 +2288,19 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) {
+  os << "["
+     << " source=" << rhs.GetSource()
+     << " destination=" << rhs.GetDestination()
+     << " type=" << rhs.GetType()
+     << " instruction=";
+  if (rhs.GetInstruction() != nullptr) {
+    os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId();
+  } else {
+    os << "null";
+  }
+  os << " ]";
+  return os;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 1f8ef47..c06d164 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -98,6 +98,13 @@
   kCondAE,  // >=
 };
 
+enum BuildSsaResult {
+  kBuildSsaFailNonNaturalLoop,
+  kBuildSsaFailThrowCatchLoop,
+  kBuildSsaFailAmbiguousArrayOp,
+  kBuildSsaSuccess,
+};
+
 class HInstructionList : public ValueObject {
  public:
   HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
@@ -143,6 +150,122 @@
   DISALLOW_COPY_AND_ASSIGN(HInstructionList);
 };
 
+class ReferenceTypeInfo : ValueObject {
+ public:
+  typedef Handle<mirror::Class> TypeHandle;
+
+  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
+    // The constructor will check that the type_handle is valid.
+    return ReferenceTypeInfo(type_handle, is_exact);
+  }
+
+  static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
+
+  static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return handle.GetReference() != nullptr;
+  }
+
+  bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return IsValidHandle(type_handle_);
+  }
+
+  bool IsExact() const { return is_exact_; }
+
+  bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsObjectClass();
+  }
+
+  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsStringClass();
+  }
+
+  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
+  }
+
+  bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsInterface();
+  }
+
+  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass();
+  }
+
+  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    if (!rti.IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
+        rti.GetTypeHandle()->GetComponentType());
+  }
+
+  Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
+
+  bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
+    return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
+    return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
+        GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
+  // Returns true if the type information provide the same amount of details.
+  // Note that it does not mean that the instructions have the same actual type
+  // (because the type can be the result of a merge).
+  bool IsEqual(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!IsValid() && !rti.IsValid()) {
+      // Invalid types are equal.
+      return true;
+    }
+    if (!IsValid() || !rti.IsValid()) {
+      // One is valid, the other not.
+      return false;
+    }
+    return IsExact() == rti.IsExact()
+        && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+  }
+
+ private:
+  ReferenceTypeInfo();
+  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
+
+  // The class of the object.
+  TypeHandle type_handle_;
+  // Whether or not the type is exact or a superclass of the actual type.
+  // Whether or not we have any information about this type.
+  bool is_exact_;
+};
+
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
+
 // Control-flow graph of a method. Contains a list of basic blocks.
 class HGraph : public ArenaObject<kArenaAllocGraph> {
  public:
@@ -179,7 +302,8 @@
         cached_float_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
-        cached_current_method_(nullptr) {
+        cached_current_method_(nullptr),
+        inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
@@ -197,36 +321,23 @@
 
   void AddBlock(HBasicBlock* block);
 
-  // Try building the SSA form of this graph, with dominance computation and loop
-  // recognition. Returns whether it was successful in doing all these steps.
-  bool TryBuildingSsa() {
-    BuildDominatorTree();
-    // The SSA builder requires loops to all be natural. Specifically, the dead phi
-    // elimination phase checks the consistency of the graph when doing a post-order
-    // visit for eliminating dead phis: a dead phi can only have loop header phi
-    // users remaining when being visited.
-    if (!AnalyzeNaturalLoops()) return false;
-    // Precompute per-block try membership before entering the SSA builder,
-    // which needs the information to build catch block phis from values of
-    // locals at throwing instructions inside try blocks.
-    ComputeTryBlockInformation();
-    TransformToSsa();
-    in_ssa_form_ = true;
-    return true;
-  }
+  // Try building the SSA form of this graph, with dominance computation and
+  // loop recognition. Returns a code specifying that it was successful or the
+  // reason for failure.
+  BuildSsaResult TryBuildingSsa(StackHandleScopeCollection* handles);
 
   void ComputeDominanceInformation();
   void ClearDominanceInformation();
 
   void BuildDominatorTree();
-  void TransformToSsa();
   void SimplifyCFG();
   void SimplifyCatchBlocks();
 
-  // Analyze all natural loops in this graph. Returns false if one
-  // loop is not natural, that is the header does not dominate the
-  // back edge.
-  bool AnalyzeNaturalLoops() const;
+  // Analyze all natural loops in this graph. Returns a code specifying that it
+  // was successful or the reason for failure. The method will fail if a loop
+  // is not natural, that is the header does not dominate a back edge, or if it
+  // is a throw-catch loop, i.e. the header is a catch block.
+  BuildSsaResult AnalyzeNaturalLoops() const;
 
   // Iterate over blocks to compute try block membership. Needs reverse post
   // order and loop information.
@@ -487,6 +598,10 @@
   // (such as when the superclass could not be found).
   ArtMethod* art_method_;
 
+  // Keep the RTI of inexact Object to avoid having to pass stack handle
+  // collection pointer to passes which may create NullConstant.
+  ReferenceTypeInfo inexact_object_rti_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -1034,7 +1149,6 @@
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
-  M(Condition, BinaryOperation)                                         \
   M(CurrentMethod, Instruction)                                         \
   M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
@@ -1067,6 +1181,7 @@
   M(MemoryBarrier, Instruction)                                         \
   M(MonitorOperation, Instruction)                                      \
   M(Mul, BinaryOperation)                                               \
+  M(NativeDebugInfo, Instruction)                                       \
   M(Neg, UnaryOperation)                                                \
   M(NewArray, Instruction)                                              \
   M(NewInstance, Instruction)                                           \
@@ -1141,27 +1256,34 @@
   FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
 
-#define FOR_EACH_INSTRUCTION(M)                                         \
-  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+#define FOR_EACH_ABSTRACT_INSTRUCTION(M)                                \
+  M(Condition, BinaryOperation)                                         \
   M(Constant, Instruction)                                              \
   M(UnaryOperation, Instruction)                                        \
   M(BinaryOperation, Instruction)                                       \
   M(Invoke, Instruction)
 
+#define FOR_EACH_INSTRUCTION(M)                                         \
+  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+  FOR_EACH_ABSTRACT_INSTRUCTION(M)
+
 #define FORWARD_DECLARATION(type, super) class H##type;
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
 
 #define DECLARE_INSTRUCTION(type)                                       \
-  InstructionKind GetKind() const OVERRIDE { return k##type; }          \
+  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }  \
   const char* DebugName() const OVERRIDE { return #type; }              \
-  const H##type* As##type() const OVERRIDE { return this; }             \
-  H##type* As##type() OVERRIDE { return this; }                         \
   bool InstructionTypeEquals(HInstruction* other) const OVERRIDE {      \
     return other->Is##type();                                           \
   }                                                                     \
   void Accept(HGraphVisitor* visitor) OVERRIDE
 
+#define DECLARE_ABSTRACT_INSTRUCTION(type)                              \
+  bool Is##type() const { return As##type() != nullptr; }               \
+  const H##type* As##type() const { return this; }                      \
+  H##type* As##type() { return this; }
+
 template <typename T> class HUseList;
 
 template <typename T>
@@ -1674,122 +1796,6 @@
   DISALLOW_COPY_AND_ASSIGN(HEnvironment);
 };
 
-class ReferenceTypeInfo : ValueObject {
- public:
-  typedef Handle<mirror::Class> TypeHandle;
-
-  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
-    // The constructor will check that the type_handle is valid.
-    return ReferenceTypeInfo(type_handle, is_exact);
-  }
-
-  static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
-
-  static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
-    return handle.GetReference() != nullptr;
-  }
-
-  bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return IsValidHandle(type_handle_);
-  }
-
-  bool IsExact() const { return is_exact_; }
-
-  bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsObjectClass();
-  }
-
-  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsStringClass();
-  }
-
-  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
-  }
-
-  bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsInterface();
-  }
-
-  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsArrayClass();
-  }
-
-  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsPrimitiveArray();
-  }
-
-  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
-  }
-
-  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    if (!IsExact()) return false;
-    if (!IsArrayClass()) return false;
-    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    if (!IsExact()) return false;
-    if (!IsArrayClass()) return false;
-    if (!rti.IsArrayClass()) return false;
-    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
-        rti.GetTypeHandle()->GetComponentType());
-  }
-
-  Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
-
-  bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    DCHECK(rti.IsValid());
-    return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(IsValid());
-    DCHECK(rti.IsValid());
-    return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
-        GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
-  }
-
-  // Returns true if the type information provide the same amount of details.
-  // Note that it does not mean that the instructions have the same actual type
-  // (because the type can be the result of a merge).
-  bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!IsValid() && !rti.IsValid()) {
-      // Invalid types are equal.
-      return true;
-    }
-    if (!IsValid() || !rti.IsValid()) {
-      // One is valid, the other not.
-      return false;
-    }
-    return IsExact() == rti.IsExact()
-        && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
-  }
-
- private:
-  ReferenceTypeInfo();
-  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
-
-  // The class of the object.
-  TypeHandle type_handle_;
-  // Whether or not the type is exact or a superclass of the actual type.
-  // Whether or not we have any information about this type.
-  bool is_exact_;
-};
-
-std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
-
 class HInstruction : public ArenaObject<kArenaAllocInstruction> {
  public:
   HInstruction(SideEffects side_effects, uint32_t dex_pc)
@@ -1972,11 +1978,18 @@
   void MoveBeforeFirstUserAndOutOfLoops();
 
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  bool Is##type() const;                                                       \
+  const H##type* As##type() const;                                             \
+  H##type* As##type();
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
   virtual H##type* As##type() { return nullptr; }
-
-  FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+  FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
   // Returns whether the instruction can be moved within the graph.
@@ -1999,7 +2012,12 @@
   // 2) Their inputs are identical.
   bool Equals(HInstruction* other) const;
 
-  virtual InstructionKind GetKind() const = 0;
+  // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744)
+  // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide
+  // the virtual function because the __attribute__((__pure__)) doesn't really
+  // apply the strong requirement for virtual functions, preventing optimizations.
+  InstructionKind GetKind() const PURE;
+  virtual InstructionKind GetKindInternal() const = 0;
 
   virtual size_t ComputeHashCode() const {
     size_t result = GetKind();
@@ -2045,6 +2063,7 @@
  protected:
   virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
   virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
+  void SetSideEffects(SideEffects other) { side_effects_ = other; }
 
  private:
   void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
@@ -2297,7 +2316,7 @@
 
   virtual uint64_t GetValueAsUint64() const = 0;
 
-  DECLARE_INSTRUCTION(Constant);
+  DECLARE_ABSTRACT_INSTRUCTION(Constant);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HConstant);
@@ -2558,7 +2577,7 @@
   virtual HConstant* Evaluate(HIntConstant* x) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x) const = 0;
 
-  DECLARE_INSTRUCTION(UnaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HUnaryOperation);
@@ -2651,7 +2670,7 @@
   // one. Otherwise it returns null.
   HInstruction* GetLeastConstantLeft() const;
 
-  DECLARE_INSTRUCTION(BinaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(BinaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
@@ -2679,7 +2698,7 @@
   // `instruction`, and disregard moves in between.
   bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
 
-  DECLARE_INSTRUCTION(Condition);
+  DECLARE_ABSTRACT_INSTRUCTION(Condition);
 
   virtual IfCondition GetCondition() const = 0;
 
@@ -3228,7 +3247,8 @@
 };
 
 enum class Intrinsics {
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) k ## Name,
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+  k ## Name,
 #include "intrinsics_list.h"
   kNone,
   INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -3242,6 +3262,18 @@
   kNeedsEnvironmentOrCache      // Intrinsic requires an environment or requires a dex cache.
 };
 
+enum IntrinsicSideEffects {
+  kNoSideEffects,     // Intrinsic does not have any heap memory side effects.
+  kReadSideEffects,   // Intrinsic may read heap memory.
+  kWriteSideEffects,  // Intrinsic may write heap memory.
+  kAllSideEffects     // Intrinsic may read or write heap memory, or trigger GC.
+};
+
+enum IntrinsicExceptions {
+  kNoThrow,  // Intrinsic does not throw any exceptions.
+  kCanThrow  // Intrinsic may throw exceptions.
+};
+
 class HInvoke : public HInstruction {
  public:
   size_t InputCount() const OVERRIDE { return inputs_.size(); }
@@ -3260,7 +3292,6 @@
 
   Primitive::Type GetType() const OVERRIDE { return return_type_; }
 
-
   uint32_t GetDexMethodIndex() const { return dex_method_index_; }
   const DexFile& GetDexFile() const { return GetEnvironment()->GetDexFile(); }
 
@@ -3270,13 +3301,22 @@
     return intrinsic_;
   }
 
-  void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
+  void SetIntrinsic(Intrinsics intrinsic,
+                    IntrinsicNeedsEnvironmentOrCache needs_env_or_cache,
+                    IntrinsicSideEffects side_effects,
+                    IntrinsicExceptions exceptions);
 
   bool IsFromInlinedInvoke() const {
     return GetEnvironment()->IsFromInlinedInvoke();
   }
 
-  bool CanThrow() const OVERRIDE { return true; }
+  bool CanThrow() const OVERRIDE { return can_throw_; }
+
+  bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
+
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
+  }
 
   uint32_t* GetIntrinsicOptimizations() {
     return &intrinsic_optimizations_;
@@ -3288,7 +3328,7 @@
 
   bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
 
-  DECLARE_INSTRUCTION(Invoke);
+  DECLARE_ABSTRACT_INSTRUCTION(Invoke);
 
  protected:
   HInvoke(ArenaAllocator* arena,
@@ -3306,6 +3346,7 @@
       return_type_(return_type),
       dex_method_index_(dex_method_index),
       original_invoke_type_(original_invoke_type),
+      can_throw_(true),
       intrinsic_(Intrinsics::kNone),
       intrinsic_optimizations_(0) {
   }
@@ -3318,11 +3359,14 @@
     inputs_[index] = input;
   }
 
+  void SetCanThrow(bool can_throw) { can_throw_ = can_throw; }
+
   uint32_t number_of_arguments_;
   ArenaVector<HUserRecord<HInstruction*>> inputs_;
   const Primitive::Type return_type_;
   const uint32_t dex_method_index_;
   const InvokeType original_invoke_type_;
+  bool can_throw_;
   Intrinsics intrinsic_;
 
   // A magic word holding optimizations for intrinsics. See intrinsics.h.
@@ -4417,7 +4461,16 @@
   void RemoveInputAt(size_t index);
 
   Primitive::Type GetType() const OVERRIDE { return type_; }
-  void SetType(Primitive::Type type) { type_ = type; }
+  void SetType(Primitive::Type new_type) {
+    // Make sure that only valid type changes occur. The following are allowed:
+    //  (1) int  -> float/ref (primitive type propagation),
+    //  (2) long -> double (primitive type propagation).
+    DCHECK(type_ == new_type ||
+           (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
+           (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
+           (type_ == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
+    type_ = new_type;
+  }
 
   bool CanBeNull() const OVERRIDE { return can_be_null_; }
   void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; }
@@ -4657,7 +4710,21 @@
     return false;
   }
 
-  void SetType(Primitive::Type type) { type_ = type; }
+  bool IsEquivalentOf(HArrayGet* other) const {
+    bool result = (GetDexPc() == other->GetDexPc());
+    if (kIsDebugBuild && result) {
+      DCHECK_EQ(GetBlock(), other->GetBlock());
+      DCHECK_EQ(GetArray(), other->GetArray());
+      DCHECK_EQ(GetIndex(), other->GetIndex());
+      if (Primitive::IsIntOrLongType(GetType())) {
+        DCHECK(Primitive::IsFloatingPointType(other->GetType()));
+      } else {
+        DCHECK(Primitive::IsFloatingPointType(GetType()));
+        DCHECK(Primitive::IsIntOrLongType(other->GetType()));
+      }
+    }
+    return result;
+  }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -4854,6 +4921,23 @@
   DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
 };
 
+// Pseudo-instruction which provides the native debugger with mapping information.
+// It ensures that we can generate line number and local variables at this point.
+class HNativeDebugInfo : public HTemplateInstruction<0> {
+ public:
+  explicit HNativeDebugInfo(uint32_t dex_pc)
+      : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {}
+
+  bool NeedsEnvironment() const OVERRIDE {
+    return true;
+  }
+
+  DECLARE_INSTRUCTION(NativeDebugInfo);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNativeDebugInfo);
+};
+
 /**
  * Instruction to load a Class object.
  */
@@ -5347,24 +5431,19 @@
 
 class HBoundType : public HExpression<1> {
  public:
-  // Constructs an HBoundType with the given upper_bound.
-  // Ensures that the upper_bound is valid.
-  HBoundType(HInstruction* input,
-             ReferenceTypeInfo upper_bound,
-             bool upper_can_be_null,
-             uint32_t dex_pc = kNoDexPc)
+  HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc),
-        upper_bound_(upper_bound),
-        upper_can_be_null_(upper_can_be_null),
-        can_be_null_(upper_can_be_null) {
+        upper_bound_(ReferenceTypeInfo::CreateInvalid()),
+        upper_can_be_null_(true),
+        can_be_null_(true) {
     DCHECK_EQ(input->GetType(), Primitive::kPrimNot);
     SetRawInputAt(0, input);
-    SetReferenceTypeInfo(upper_bound_);
   }
 
-  // GetUpper* should only be used in reference type propagation.
+  // {Get,Set}Upper* should only be used in reference type propagation.
   const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; }
   bool GetUpperCanBeNull() const { return upper_can_be_null_; }
+  void SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null);
 
   void SetCanBeNull(bool can_be_null) {
     DCHECK(upper_can_be_null_ || !can_be_null);
@@ -5382,10 +5461,10 @@
   //   if (x instanceof ClassX) {
   //     // uper_bound_ will be ClassX
   //   }
-  const ReferenceTypeInfo upper_bound_;
+  ReferenceTypeInfo upper_bound_;
   // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this
   // is false then can_be_null_ cannot be true).
-  const bool upper_can_be_null_;
+  bool upper_can_be_null_;
   bool can_be_null_;
 
   DISALLOW_COPY_AND_ASSIGN(HBoundType);
@@ -5534,8 +5613,8 @@
   }
 
   bool IsPending() const {
-    DCHECK(!source_.IsInvalid() || destination_.IsInvalid());
-    return destination_.IsInvalid() && !source_.IsInvalid();
+    DCHECK(source_.IsValid() || destination_.IsInvalid());
+    return destination_.IsInvalid() && source_.IsValid();
   }
 
   // True if this blocks a move from the given location.
@@ -5579,6 +5658,8 @@
   HInstruction* instruction_;
 };
 
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs);
+
 static constexpr size_t kDefaultNumberOfMoves = 4;
 
 class HParallelMove : public HTemplateInstruction<0> {
@@ -5869,6 +5950,18 @@
   return &lhs == &rhs;
 }
 
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  inline bool HInstruction::Is##type() const { return GetKind() == k##type; }  \
+  inline const H##type* HInstruction::As##type() const {                       \
+    return Is##type() ? down_cast<const H##type*>(this) : nullptr;             \
+  }                                                                            \
+  inline H##type* HInstruction::As##type() {                                   \
+    return Is##type() ? static_cast<H##type*>(this) : nullptr;                 \
+  }
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 831b626..3eb7274 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -426,8 +426,18 @@
   if (!should_inline) {
     return;
   }
+  size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
   HInliner* inliner = new (graph->GetArena()) HInliner(
-      graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+      graph,
+      graph,
+      codegen,
+      dex_compilation_unit,
+      dex_compilation_unit,
+      driver,
+      handles,
+      stats,
+      number_of_dex_registers,
+      /* depth */ 0);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -501,11 +511,8 @@
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
                              const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
-  ScopedThreadSuspension sts(soa.Self(), kNative);
-
+                             PassObserver* pass_observer,
+                             StackHandleScopeCollection* handles) {
   ArenaAllocator* arena = graph->GetArena();
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -522,29 +529,23 @@
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
-  ReferenceTypePropagation* type_propagation =
-      new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier_after_types");
-  InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_after_bce");
-  InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
+  InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
 
   HOptimization* optimizations1[] = {
     intrinsics,
+    sharpening,
     fold1,
     simplify1,
-    type_propagation,
-    sharpening,
     dce1,
-    simplify2
   };
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
-  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
+  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
 
   HOptimization* optimizations2[] = {
     // BooleanSimplifier depends on the InstructionSimplifier removing
@@ -557,13 +558,13 @@
     induction,
     bce,
     fold3,  // evaluates code generated by dynamic bce
-    simplify3,
+    simplify2,
     lse,
     dce2,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
-    simplify4,
+    simplify3,
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
@@ -768,14 +769,29 @@
   }
 
   VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
+
   if (run_optimizations_) {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScopeCollection handles(soa.Self());
+    ScopedThreadSuspension sts(soa.Self(), kNative);
+
     {
       PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-      if (!graph->TryBuildingSsa()) {
-        // We could not transform the graph to SSA, bailout.
-        LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName()
-            << ": it contains a non natural loop";
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
+      BuildSsaResult result = graph->TryBuildingSsa(&handles);
+      if (result != kBuildSsaSuccess) {
+        switch (result) {
+          case kBuildSsaFailNonNaturalLoop:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledNonNaturalLoop);
+            break;
+          case kBuildSsaFailThrowCatchLoop:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+            break;
+          case kBuildSsaFailAmbiguousArrayOp:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+            break;
+          case kBuildSsaSuccess:
+            UNREACHABLE();
+        }
         pass_observer.SetGraphInBadState();
         return nullptr;
       }
@@ -786,7 +802,8 @@
                      compiler_driver,
                      compilation_stats_.get(),
                      dex_compilation_unit,
-                     &pass_observer);
+                     &pass_observer,
+                     &handles);
     codegen->CompileOptimized(code_allocator);
   } else {
     codegen->CompileBaseline(code_allocator);
@@ -880,7 +897,11 @@
 
 bool IsCompilingWithCoreImage() {
   const std::string& image = Runtime::Current()->GetImageLocation();
-  return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art");
+  // TODO: This is under-approximating...
+  if (EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art")) {
+    return true;
+  }
+  return false;
 }
 
 bool OptimizingCompiler::JitCompile(Thread* self,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6296eed..bca1632 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,9 @@
   kRemovedDeadInstruction,
   kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
-  kNotCompiledCannotBuildSSA,
+  kNotCompiledNonNaturalLoop,
+  kNotCompiledThrowCatchLoop,
+  kNotCompiledAmbiguousArrayOp,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -104,7 +106,9 @@
       case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
       case kRemovedNullCheck: name = "RemovedNullCheck"; break;
       case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
-      case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+      case kNotCompiledNonNaturalLoop : name = "NotCompiledNonNaturalLoop"; break;
+      case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
+      case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break;
       case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
       case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
       case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 350f0b1..af3a005 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -19,9 +19,13 @@
 
 #include "nodes.h"
 #include "builder.h"
+#include "common_compiler_test.h"
 #include "compiler/dex/pass_manager.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 
 #include "gtest/gtest.h"
@@ -42,7 +46,6 @@
 #define FIVE_REGISTERS_CODE_ITEM(...)  N_REGISTERS_CODE_ITEM(5, __VA_ARGS__)
 #define SIX_REGISTERS_CODE_ITEM(...)   N_REGISTERS_CODE_ITEM(6, __VA_ARGS__)
 
-
 LiveInterval* BuildInterval(const size_t ranges[][2],
                             size_t number_of_ranges,
                             ArenaAllocator* allocator,
@@ -111,6 +114,12 @@
   return instruction->GetBlock() == nullptr;
 }
 
+inline void TransformToSsa(HGraph* graph) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  EXPECT_EQ(graph->TryBuildingSsa(&handles), kBuildSsaSuccess);
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 176c50c..9d136f3 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <iostream>
 
 #include "parallel_move_resolver.h"
 
@@ -172,7 +171,7 @@
         i = -1;
       } else if (required_swap != nullptr) {
         // A move is required to swap. We walk back the cycle to find the
-        // move by just returning from this `PerforrmMove`.
+        // move by just returning from this `PerformMove`.
         moves_[index]->ClearPending(destination);
         return required_swap;
       }
@@ -201,7 +200,7 @@
   } else {
     for (MoveOperands* other_move : moves_) {
       if (other_move->Blocks(destination)) {
-        DCHECK(other_move->IsPending());
+        DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
         if (!move->Is64BitMove() && other_move->Is64BitMove()) {
           // We swap 64bits moves before swapping 32bits moves. Go back from the
           // cycle by returning the move that must be swapped.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index d1770b7..63ef600 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -96,7 +96,7 @@
     if (can_merge_with_load_class && !load_class->HasUses()) {
       load_class->GetBlock()->RemoveInstruction(load_class);
     }
-  } else if (can_merge_with_load_class) {
+  } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
     // Pass the initialization duty to the `HLoadClass` instruction,
     // and remove the instruction from the graph.
     load_class->SetMustGenerateClinitCheck(true);
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
deleted file mode 100644
index bde54ee..0000000
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "primitive_type_propagation.h"
-
-#include "nodes.h"
-#include "ssa_builder.h"
-
-namespace art {
-
-static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
-  // We trust the verifier has already done the necessary checking.
-  switch (existing) {
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimNot:
-      return existing;
-    default:
-      // Phis are initialized with a void type, so if we are asked
-      // to merge with a void type, we should use the existing one.
-      return new_type == Primitive::kPrimVoid
-          ? existing
-          : HPhi::ToPhiType(new_type);
-  }
-}
-
-// Re-compute and update the type of the instruction. Returns
-// whether or not the type was changed.
-bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
-  DCHECK(phi->IsLive());
-  Primitive::Type existing = phi->GetType();
-
-  Primitive::Type new_type = existing;
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    Primitive::Type input_type = phi->InputAt(i)->GetType();
-    new_type = MergeTypes(new_type, input_type);
-  }
-  phi->SetType(new_type);
-
-  if (new_type == Primitive::kPrimDouble
-      || new_type == Primitive::kPrimFloat
-      || new_type == Primitive::kPrimNot) {
-    // If the phi is of floating point type, we need to update its inputs to that
-    // type. For inputs that are phis, we need to recompute their types.
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      HInstruction* input = phi->InputAt(i);
-      if (input->GetType() != new_type) {
-        HInstruction* equivalent = (new_type == Primitive::kPrimNot)
-            ? SsaBuilder::GetReferenceTypeEquivalent(input)
-            : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
-        phi->ReplaceInput(equivalent, i);
-        if (equivalent->IsPhi()) {
-          AddToWorklist(equivalent->AsPhi());
-        } else if (equivalent == input) {
-          // The input has changed its type. It can be an input of other phis,
-          // so we need to put phi users in the work list.
-          AddDependentInstructionsToWorklist(equivalent);
-        }
-      }
-    }
-  }
-
-  return existing != new_type;
-}
-
-void PrimitiveTypePropagation::Run() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-}
-
-void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  if (block->IsLoopHeader()) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      if (phi->IsLive()) {
-        AddToWorklist(phi);
-      }
-    }
-  } else {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      // Eagerly compute the type of the phi, for quicker convergence. Note
-      // that we don't need to add users to the worklist because we are
-      // doing a reverse post-order visit, therefore either the phi users are
-      // non-loop phi and will be visited later in the visit, or are loop-phis,
-      // and they are already in the work list.
-      HPhi* phi = it.Current()->AsPhi();
-      if (phi->IsLive()) {
-        UpdateType(phi);
-      }
-    }
-  }
-}
-
-void PrimitiveTypePropagation::ProcessWorklist() {
-  while (!worklist_.empty()) {
-    HPhi* instruction = worklist_.back();
-    worklist_.pop_back();
-    if (UpdateType(instruction)) {
-      AddDependentInstructionsToWorklist(instruction);
-    }
-  }
-}
-
-void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
-  DCHECK(instruction->IsLive());
-  worklist_.push_back(instruction);
-}
-
-void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) {
-      AddToWorklist(phi);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h
deleted file mode 100644
index 212fcfc..0000000
--- a/compiler/optimizing/primitive_type_propagation.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-#define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-
-#include "base/arena_containers.h"
-#include "nodes.h"
-
-namespace art {
-
-// Compute and propagate primitive types of phis in the graph.
-class PrimitiveTypePropagation : public ValueObject {
- public:
-  explicit PrimitiveTypePropagation(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) {
-    worklist_.reserve(kDefaultWorklistSize);
-  }
-
-  void Run();
-
- private:
-  void VisitBasicBlock(HBasicBlock* block);
-  void ProcessWorklist();
-  void AddToWorklist(HPhi* phi);
-  void AddDependentInstructionsToWorklist(HInstruction* instruction);
-  bool UpdateType(HPhi* phi);
-
-  HGraph* const graph_;
-  ArenaVector<HPhi*> worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
-
-  DISALLOW_COPY_AND_ASSIGN(PrimitiveTypePropagation);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index fea903d..1c25e48 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -40,7 +40,6 @@
       throwable_class_handle_(throwable_class_handle),
       worklist_(worklist) {}
 
-  void VisitNullConstant(HNullConstant* null_constant) OVERRIDE;
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
@@ -57,6 +56,7 @@
   void VisitInvoke(HInvoke* instr) OVERRIDE;
   void VisitArrayGet(HArrayGet* instr) OVERRIDE;
   void VisitCheckCast(HCheckCast* instr) OVERRIDE;
+  void VisitBoundType(HBoundType* instr) OVERRIDE;
   void VisitNullCheck(HNullCheck* instr) OVERRIDE;
   void VisitFakeString(HFakeString* instr) OVERRIDE;
   void UpdateReferenceTypeInfo(HInstruction* instr,
@@ -71,8 +71,6 @@
   ReferenceTypeInfo::TypeHandle string_class_handle_;
   ReferenceTypeInfo::TypeHandle throwable_class_handle_;
   ArenaVector<HInstruction*>* worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
 };
 
 ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
@@ -127,87 +125,6 @@
   }
 }
 
-static void CheckHasNoTypedInputs(HInstruction* root_instr) {
-  ArenaAllocatorAdapter<void> adapter =
-      root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
-
-  ArenaVector<HPhi*> visited_phis(adapter);
-  ArenaVector<HInstruction*> worklist(adapter);
-  worklist.push_back(root_instr);
-
-  while (!worklist.empty()) {
-    HInstruction* instr = worklist.back();
-    worklist.pop_back();
-
-    if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
-      // Expect that both `root_instr` and its inputs have invalid RTI.
-      ScopedObjectAccess soa(Thread::Current());
-      DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
-
-      // Insert all unvisited inputs to the worklist.
-      for (HInputIterator it(instr); !it.Done(); it.Advance()) {
-        HInstruction* input = it.Current();
-        if (input->IsPhi()) {
-          if (ContainsElement(visited_phis, input->AsPhi())) {
-            continue;
-          } else {
-            visited_phis.push_back(input->AsPhi());
-          }
-        }
-        worklist.push_back(input);
-      }
-    } else if (instr->IsNullConstant()) {
-      // The only input of `root_instr` allowed to have valid RTI because it is ignored.
-    } else {
-      LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
-          << instr->GetReferenceTypeInfo();
-      UNREACHABLE();
-    }
-  }
-}
-
-template<typename Functor>
-static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
-  ScopedObjectAccess soa(Thread::Current());
-  for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
-    for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
-      HInstruction* instr = it.Current();
-      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
-        fn(instr);
-      }
-    }
-    for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* instr = it.Current();
-      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
-        fn(instr);
-      }
-    }
-  }
-}
-
-void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
-  // In some cases, the fix-point iteration will leave kPrimNot instructions with
-  // invalid RTI because bytecode does not provide enough typing information.
-  // Set the RTI of such instructions to Object.
-  // Example:
-  //   MyClass a = null, b = null;
-  //   while (a == null) {
-  //     if (cond) { a = b; } else { b = a; }
-  //   }
-
-  if (kIsDebugBuild) {
-    // Test that if we are going to set RTI from invalid to Object, that
-    // instruction did not have any typed instructions in its def-use chain
-    // and therefore its type could not be inferred.
-    ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
-  }
-
-  ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
-  ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
-    instr->SetReferenceTypeInfo(obj_rti);
-  });
-}
-
 void ReferenceTypePropagation::Run() {
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
@@ -217,7 +134,6 @@
   }
 
   ProcessWorklist();
-  SetUntypedInstructionsToObject();
   ValidateTypes();
 }
 
@@ -245,34 +161,6 @@
   BoundTypeForIfInstanceOf(block);
 }
 
-// Create a bound type for the given object narrowing the type as much as possible.
-// The BoundType upper values for the super type and can_be_null will be taken from
-// load_class.GetLoadedClassRTI() and upper_can_be_null.
-static HBoundType* CreateBoundType(ArenaAllocator* arena,
-                                   HInstruction* obj,
-                                   HLoadClass* load_class,
-                                   bool upper_can_be_null)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-  ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
-  DCHECK(class_rti.IsValid());
-  HBoundType* bound_type = new (arena) HBoundType(obj, class_rti, upper_can_be_null);
-  // Narrow the type as much as possible.
-  if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) {
-    bound_type->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true));
-  } else if (obj_rti.IsValid() && class_rti.IsSupertypeOf(obj_rti)) {
-    bound_type->SetReferenceTypeInfo(obj_rti);
-  } else {
-    bound_type->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
-  }
-  if (upper_can_be_null) {
-    bound_type->SetCanBeNull(obj->CanBeNull());
-  }
-  return bound_type;
-}
-
 // Check if we should create a bound type for the given object at the specified
 // position. Because of inlining and the fact we run RTP more than once and we
 // might have a HBoundType already. If we do, we should not create a new one.
@@ -358,8 +246,8 @@
         ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create(
             object_class_handle_, /* is_exact */ true);
         if (ShouldCreateBoundType(insert_point, obj, object_rti, nullptr, notNullBlock)) {
-          bound_type = new (graph_->GetArena()) HBoundType(
-              obj, object_rti, /* bound_can_be_null */ false);
+          bound_type = new (graph_->GetArena()) HBoundType(obj);
+          bound_type->SetUpperBound(object_rti, /* bound_can_be_null */ false);
           if (obj->GetReferenceTypeInfo().IsValid()) {
             bound_type->SetReferenceTypeInfo(obj->GetReferenceTypeInfo());
           }
@@ -376,6 +264,75 @@
   }
 }
 
+// Returns true if one of the patterns below has been recognized. If so, the
+// InstanceOf instruction together with the true branch of `ifInstruction` will
+// be returned using the out parameters.
+// Recognized patterns:
+//   (1) patterns equivalent to `if (obj instanceof X)`
+//     (a) InstanceOf -> Equal to 1 -> If
+//     (b) InstanceOf -> NotEqual to 0 -> If
+//     (c) InstanceOf -> If
+//   (2) patterns equivalent to `if (!(obj instanceof X))`
+//     (a) InstanceOf -> Equal to 0 -> If
+//     (b) InstanceOf -> NotEqual to 1 -> If
+//     (c) InstanceOf -> BooleanNot -> If
+static bool MatchIfInstanceOf(HIf* ifInstruction,
+                              /* out */ HInstanceOf** instanceOf,
+                              /* out */ HBasicBlock** trueBranch) {
+  HInstruction* input = ifInstruction->InputAt(0);
+
+  if (input->IsEqual()) {
+    HInstruction* rhs = input->AsEqual()->GetConstantRight();
+    if (rhs != nullptr) {
+      HInstruction* lhs = input->AsEqual()->GetLeastConstantLeft();
+      if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+        if (rhs->AsIntConstant()->IsOne()) {
+          // Case (1a)
+          *trueBranch = ifInstruction->IfTrueSuccessor();
+        } else {
+          // Case (2a)
+          DCHECK(rhs->AsIntConstant()->IsZero());
+          *trueBranch = ifInstruction->IfFalseSuccessor();
+        }
+        *instanceOf = lhs->AsInstanceOf();
+        return true;
+      }
+    }
+  } else if (input->IsNotEqual()) {
+    HInstruction* rhs = input->AsNotEqual()->GetConstantRight();
+    if (rhs != nullptr) {
+      HInstruction* lhs = input->AsNotEqual()->GetLeastConstantLeft();
+      if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+        if (rhs->AsIntConstant()->IsZero()) {
+          // Case (1b)
+          *trueBranch = ifInstruction->IfTrueSuccessor();
+        } else {
+          // Case (2b)
+          DCHECK(rhs->AsIntConstant()->IsOne());
+          *trueBranch = ifInstruction->IfFalseSuccessor();
+        }
+        *instanceOf = lhs->AsInstanceOf();
+        return true;
+      }
+    }
+  } else if (input->IsInstanceOf()) {
+    // Case (1c)
+    *instanceOf = input->AsInstanceOf();
+    *trueBranch = ifInstruction->IfTrueSuccessor();
+    return true;
+  } else if (input->IsBooleanNot()) {
+    HInstruction* not_input = input->InputAt(0);
+    if (not_input->IsInstanceOf()) {
+      // Case (2c)
+      *instanceOf = not_input->AsInstanceOf();
+      *trueBranch = ifInstruction->IfFalseSuccessor();
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // Detects if `block` is the True block for the pattern
 // `if (x instanceof ClassX) { }`
 // If that's the case insert an HBoundType instruction to bound the type of `x`
@@ -385,22 +342,11 @@
   if (ifInstruction == nullptr) {
     return;
   }
-  HInstruction* ifInput = ifInstruction->InputAt(0);
-  HInstruction* instanceOf = nullptr;
-  HBasicBlock* instanceOfTrueBlock = nullptr;
 
-  // The instruction simplifier has transformed:
-  //   - `if (a instanceof A)` into an HIf with an HInstanceOf input
-  //   - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn
-  //     has an HInstanceOf input)
-  // So we should not see the usual HEqual here.
-  if (ifInput->IsInstanceOf()) {
-    instanceOf = ifInput;
-    instanceOfTrueBlock = ifInstruction->IfTrueSuccessor();
-  } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) {
-    instanceOf = ifInput->InputAt(0);
-    instanceOfTrueBlock = ifInstruction->IfFalseSuccessor();
-  } else {
+  // Try to recognize common `if (instanceof)` and `if (!instanceof)` patterns.
+  HInstanceOf* instanceOf = nullptr;
+  HBasicBlock* instanceOfTrueBlock = nullptr;
+  if (!MatchIfInstanceOf(ifInstruction, &instanceOf, &instanceOfTrueBlock)) {
     return;
   }
 
@@ -435,11 +381,8 @@
         ScopedObjectAccess soa(Thread::Current());
         HInstruction* insert_point = instanceOfTrueBlock->GetFirstInstruction();
         if (ShouldCreateBoundType(insert_point, obj, class_rti, nullptr, instanceOfTrueBlock)) {
-          bound_type = CreateBoundType(
-              graph_->GetArena(),
-              obj,
-              load_class,
-              false /* InstanceOf ensures the object is not null. */);
+          bound_type = new (graph_->GetArena()) HBoundType(obj);
+          bound_type->SetUpperBound(class_rti, /* InstanceOf fails for null. */ false);
           instanceOfTrueBlock->InsertInstructionBefore(bound_type, insert_point);
         } else {
           // We already have a bound type on the position we would need to insert
@@ -505,13 +448,6 @@
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
 
-void RTPVisitor::VisitNullConstant(HNullConstant* instr) {
-  // TODO: The null constant could be bound contextually (e.g. based on return statements)
-  // to a more precise type.
-  instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
-}
-
 void RTPVisitor::VisitNewInstance(HNewInstance* instr) {
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
@@ -523,7 +459,11 @@
 static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false);
+      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, /* allow_failure */ true);
+  if (dex_cache == nullptr) {
+    // Dex cache could not be found. This should only happen during gtests.
+    return nullptr;
+  }
   // Get type from dex cache assuming it was populated by the verifier.
   return dex_cache->GetResolvedType(type_idx);
 }
@@ -540,17 +480,24 @@
 
 void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
                                            const FieldInfo& info) {
-  // The field index is unknown only during tests.
-  if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) {
+  if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
 
   ScopedObjectAccess soa(Thread::Current());
-  ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
-  // TODO: There are certain cases where we can't resolve the field.
-  // b/21914925 is open to keep track of a repro case for this issue.
-  mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>();
+  mirror::Class* klass = nullptr;
+
+  // The field index is unknown only during tests.
+  if (info.GetFieldIndex() != kUnknownFieldIndex) {
+    ClassLinker* cl = Runtime::Current()->GetClassLinker();
+    ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
+    // TODO: There are certain cases where we can't resolve the field.
+    // b/21914925 is open to keep track of a repro case for this issue.
+    if (field != nullptr) {
+      klass = field->GetType<false>();
+    }
+  }
+
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
@@ -625,48 +572,66 @@
   instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
 }
 
+void RTPVisitor::VisitBoundType(HBoundType* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+
+  ReferenceTypeInfo class_rti = instr->GetUpperBound();
+  if (class_rti.IsValid()) {
+    // Narrow the type as much as possible.
+    HInstruction* obj = instr->InputAt(0);
+    ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
+    if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) {
+      instr->SetReferenceTypeInfo(
+          ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true));
+    } else if (obj_rti.IsValid()) {
+      if (class_rti.IsSupertypeOf(obj_rti)) {
+        // Object type is more specific.
+        instr->SetReferenceTypeInfo(obj_rti);
+      } else {
+        // Upper bound is more specific.
+        instr->SetReferenceTypeInfo(
+            ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+      }
+    } else {
+      // Object not typed yet. Leave BoundType untyped for now rather than
+      // assign the type conservatively.
+    }
+    instr->SetCanBeNull(obj->CanBeNull() && instr->GetUpperCanBeNull());
+  } else {
+    // The owner of the BoundType was already visited. If the class is unresolved,
+    // the BoundType should have been removed from the data flow and this method
+    // should remove it from the graph.
+    DCHECK(!instr->HasUses());
+    instr->GetBlock()->RemoveInstruction(instr);
+  }
+}
+
 void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
+  ScopedObjectAccess soa(Thread::Current());
+
   HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
   ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    if (!class_rti.IsValid()) {
-      // He have loaded an unresolved class. Don't bother bounding the type.
-      return;
-    }
+  HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
+  if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
+    // The next instruction is not an uninitialized BoundType. This must be
+    // an RTP pass after SsaBuilder and we do not need to do anything.
+    return;
   }
-  HInstruction* obj = check_cast->InputAt(0);
-  HBoundType* bound_type = nullptr;
-  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
-    if (check_cast->StrictlyDominates(user)) {
-      if (bound_type == nullptr) {
-        ScopedObjectAccess soa(Thread::Current());
-        if (ShouldCreateBoundType(check_cast->GetNext(), obj, class_rti, check_cast, nullptr)) {
-          bound_type = CreateBoundType(
-              GetGraph()->GetArena(),
-              obj,
-              load_class,
-              true /* CheckCast succeeds for nulls. */);
-          check_cast->GetBlock()->InsertInstructionAfter(bound_type, check_cast);
-        } else {
-          // Update nullability of the existing bound type, which may not have known
-          // that its input was not null when it was being created.
-          bound_type = check_cast->GetNext()->AsBoundType();
-          bound_type->SetCanBeNull(obj->CanBeNull());
-          // We already have a bound type on the position we would need to insert
-          // the new one. The existing bound type should dominate all the users
-          // (dchecked) so there's no need to continue.
-          break;
-        }
-      }
-      user->ReplaceInput(bound_type, it.Current()->GetIndex());
-    }
+  DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
+
+  if (class_rti.IsValid()) {
+    // This is the first run of RTP and class is resolved.
+    bound_type->SetUpperBound(class_rti, /* CheckCast succeeds for nulls. */ true);
+  } else {
+    // This is the first run of RTP and class is unresolved. Remove the binding.
+    // The instruction itself is removed in VisitBoundType so as to not
+    // invalidate HInstructionIterator.
+    bound_type->ReplaceWith(bound_type->InputAt(0));
   }
 }
 
 void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
-  if (phi->GetType() != Primitive::kPrimNot) {
+  if (phi->IsDead() || phi->GetType() != Primitive::kPrimNot) {
     return;
   }
 
@@ -824,6 +789,8 @@
 // NullConstant inputs are ignored during merging as they do not provide any useful information.
 // If all the inputs are NullConstants then the type of the phi will be set to Object.
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
+  DCHECK(instr->IsLive());
+
   size_t input_count = instr->InputCount();
   size_t first_input_index_not_null = 0;
   while (first_input_index_not_null < input_count &&
@@ -868,7 +835,7 @@
 // Re-computes and updates the nullability of the instruction. Returns whether or
 // not the nullability was changed.
 bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
-  DCHECK(instr->IsPhi()
+  DCHECK((instr->IsPhi() && instr->AsPhi()->IsLive())
       || instr->IsBoundType()
       || instr->IsNullCheck()
       || instr->IsArrayGet());
@@ -916,7 +883,7 @@
 void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
   for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* user = it.Current()->GetUser();
-    if (user->IsPhi()
+    if ((user->IsPhi() && user->AsPhi()->IsLive())
        || user->IsBoundType()
        || user->IsNullCheck()
        || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) {
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 21789e1..5c05592 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -57,7 +57,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ValidateTypes();
-  void SetUntypedInstructionsToObject();
 
   StackHandleScopeCollection* handles_;
 
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 8706854..306a457 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -28,13 +28,13 @@
 #include "ssa_liveness_analysis.h"
 #include "ssa_phi_elimination.h"
 
-#include "gtest/gtest.h"
-
 namespace art {
 
 // Note: the register allocator tests rely on the fact that constants have live
 // intervals and registers get allocated to them.
 
+class RegisterAllocatorTest : public CommonCompilerTest {};
+
 static bool Check(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -42,7 +42,7 @@
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -57,7 +57,7 @@
  * Unit testing of RegisterAllocator::ValidateIntervals. Register allocator
  * tests are based on this validation method.
  */
-TEST(RegisterAllocatorTest, ValidateIntervals) {
+TEST_F(RegisterAllocatorTest, ValidateIntervals) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateGraph(&allocator);
@@ -146,7 +146,7 @@
   }
 }
 
-TEST(RegisterAllocatorTest, CFG1) {
+TEST_F(RegisterAllocatorTest, CFG1) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -166,7 +166,7 @@
   ASSERT_TRUE(Check(data));
 }
 
-TEST(RegisterAllocatorTest, Loop1) {
+TEST_F(RegisterAllocatorTest, Loop1) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -205,7 +205,7 @@
   ASSERT_TRUE(Check(data));
 }
 
-TEST(RegisterAllocatorTest, Loop2) {
+TEST_F(RegisterAllocatorTest, Loop2) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -259,11 +259,11 @@
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
-  graph->TryBuildingSsa();
+  TransformToSsa(graph);
   return graph;
 }
 
-TEST(RegisterAllocatorTest, Loop3) {
+TEST_F(RegisterAllocatorTest, Loop3) {
   /*
    * Test the following snippet:
    *  int a = 0
@@ -326,7 +326,7 @@
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
 
-TEST(RegisterAllocatorTest, FirstRegisterUse) {
+TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8,
@@ -366,7 +366,7 @@
   ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 }
 
-TEST(RegisterAllocatorTest, DeadPhi) {
+TEST_F(RegisterAllocatorTest, DeadPhi) {
   /* Test for a dead loop phi taking as back-edge input a phi that also has
    * this loop phi as input. Walking backwards in SsaDeadPhiElimination
    * does not solve the problem because the loop phi will be visited last.
@@ -407,7 +407,7 @@
  * that share the same register. It should split the interval it is currently
  * allocating for at the minimum lifetime position between the two inactive intervals.
  */
-TEST(RegisterAllocatorTest, FreeUntil) {
+TEST_F(RegisterAllocatorTest, FreeUntil) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
@@ -539,7 +539,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, PhiHint) {
+TEST_F(RegisterAllocatorTest, PhiHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HPhi *phi;
@@ -658,7 +658,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *field, *ret;
@@ -726,7 +726,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *first_sub, *second_sub;
@@ -795,7 +795,7 @@
   return graph;
 }
 
-TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *div;
@@ -819,7 +819,7 @@
 // Test a bug in the register allocator, where allocating a blocked
 // register would lead to spilling an inactive interval at the wrong
 // position.
-TEST(RegisterAllocatorTest, SpillInactive) {
+TEST_F(RegisterAllocatorTest, SpillInactive) {
   ArenaPool pool;
 
   // Create a synthesized graph to please the register_allocator and
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 9e6cfbe..f6bab8e 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -17,214 +17,11 @@
 #include "ssa_builder.h"
 
 #include "nodes.h"
-#include "primitive_type_propagation.h"
+#include "reference_type_propagation.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
 
-// Returns whether this is a loop header phi which was eagerly created but later
-// found inconsistent due to the vreg being undefined in one of its predecessors.
-// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination.
-static bool IsUndefinedLoopHeaderPhi(HPhi* phi) {
-  return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size();
-}
-
-/**
- * A debuggable application may require to reviving phis, to ensure their
- * associated DEX register is available to a debugger. This class implements
- * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It
- * also makes sure that phis with incompatible input types are not revived
- * (statement (b) of the SsaBuilder).
- *
- * This phase must be run after detecting dead phis through the
- * DeadPhiElimination phase, and before deleting the dead phis.
- */
-class DeadPhiHandling : public ValueObject {
- public:
-  explicit DeadPhiHandling(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
-    worklist_.reserve(kDefaultWorklistSize);
-  }
-
-  void Run();
-
- private:
-  void VisitBasicBlock(HBasicBlock* block);
-  void ProcessWorklist();
-  void AddToWorklist(HPhi* phi);
-  void AddDependentInstructionsToWorklist(HPhi* phi);
-  bool UpdateType(HPhi* phi);
-
-  HGraph* const graph_;
-  ArenaVector<HPhi*> worklist_;
-
-  static constexpr size_t kDefaultWorklistSize = 8;
-
-  DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling);
-};
-
-static bool HasConflictingEquivalent(HPhi* phi) {
-  if (phi->GetNext() == nullptr) {
-    return false;
-  }
-  HPhi* next = phi->GetNext()->AsPhi();
-  if (next->GetRegNumber() == phi->GetRegNumber()) {
-    if (next->GetType() == Primitive::kPrimVoid) {
-      // We only get a void type for an equivalent phi we processed and found out
-      // it was conflicting.
-      return true;
-    } else {
-      // Go to the next phi, in case it is also an equivalent.
-      return HasConflictingEquivalent(next);
-    }
-  }
-  return false;
-}
-
-bool DeadPhiHandling::UpdateType(HPhi* phi) {
-  if (phi->IsDead()) {
-    // Phi was rendered dead while waiting in the worklist because it was replaced
-    // with an equivalent.
-    return false;
-  }
-
-  Primitive::Type existing = phi->GetType();
-
-  bool conflict = false;
-  Primitive::Type new_type = existing;
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    HInstruction* input = phi->InputAt(i);
-    if (input->IsPhi() && input->AsPhi()->IsDead()) {
-      // We are doing a reverse post order visit of the graph, reviving
-      // phis that have environment uses and updating their types. If an
-      // input is a phi, and it is dead (because its input types are
-      // conflicting), this phi must be marked dead as well.
-      conflict = true;
-      break;
-    }
-    Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
-
-    // The only acceptable transitions are:
-    // - From void to typed: first time we update the type of this phi.
-    // - From int to reference (or reference to int): the phi has to change
-    //   to reference type. If the integer input cannot be converted to a
-    //   reference input, the phi will remain dead.
-    if (new_type == Primitive::kPrimVoid) {
-      new_type = input_type;
-    } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) {
-      if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) {
-        // If we already asked for an equivalent of the input phi, but that equivalent
-        // ended up conflicting, make this phi conflicting too.
-        conflict = true;
-        break;
-      }
-      HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input);
-      if (equivalent == nullptr) {
-        conflict = true;
-        break;
-      }
-      phi->ReplaceInput(equivalent, i);
-      if (equivalent->IsPhi()) {
-        DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
-        // We created a new phi, but that phi has the same inputs as the old phi. We
-        // add it to the worklist to ensure its inputs can also be converted to reference.
-        // If not, it will remain dead, and the algorithm will make the current phi dead
-        // as well.
-        equivalent->AsPhi()->SetLive();
-        AddToWorklist(equivalent->AsPhi());
-      }
-    } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) {
-      new_type = Primitive::kPrimNot;
-      // Start over, we may request reference equivalents for the inputs of the phi.
-      i = -1;
-    } else if (new_type != input_type) {
-      conflict = true;
-      break;
-    }
-  }
-
-  if (conflict) {
-    phi->SetType(Primitive::kPrimVoid);
-    phi->SetDead();
-    return true;
-  } else if (existing == new_type) {
-    return false;
-  }
-
-  DCHECK(phi->IsLive());
-  phi->SetType(new_type);
-
-  // There might exist a `new_type` equivalent of `phi` already. In that case,
-  // we replace the equivalent with the, now live, `phi`.
-  HPhi* equivalent = phi->GetNextEquivalentPhiWithSameType();
-  if (equivalent != nullptr) {
-    // There cannot be more than two equivalents with the same type.
-    DCHECK(equivalent->GetNextEquivalentPhiWithSameType() == nullptr);
-    // If doing fix-point iteration, the equivalent might be in `worklist_`.
-    // Setting it dead will make UpdateType skip it.
-    equivalent->SetDead();
-    equivalent->ReplaceWith(phi);
-  }
-
-  return true;
-}
-
-void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
-  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->AsPhi();
-    if (IsUndefinedLoopHeaderPhi(phi)) {
-      DCHECK(phi->IsDead());
-      continue;
-    }
-    if (phi->IsDead() && phi->HasEnvironmentUses()) {
-      phi->SetLive();
-      if (block->IsLoopHeader()) {
-        // Loop phis must have a type to guarantee convergence of the algorithm.
-        DCHECK_NE(phi->GetType(), Primitive::kPrimVoid);
-        AddToWorklist(phi);
-      } else {
-        // Because we are doing a reverse post order visit, all inputs of
-        // this phi have been visited and therefore had their (initial) type set.
-        UpdateType(phi);
-      }
-    }
-  }
-}
-
-void DeadPhiHandling::ProcessWorklist() {
-  while (!worklist_.empty()) {
-    HPhi* instruction = worklist_.back();
-    worklist_.pop_back();
-    // Note that the same equivalent phi can be added multiple times in the work list, if
-    // used by multiple phis. The first call to `UpdateType` will know whether the phi is
-    // dead or live.
-    if (instruction->IsLive() && UpdateType(instruction)) {
-      AddDependentInstructionsToWorklist(instruction);
-    }
-  }
-}
-
-void DeadPhiHandling::AddToWorklist(HPhi* instruction) {
-  DCHECK(instruction->IsLive());
-  worklist_.push_back(instruction);
-}
-
-void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr && !phi->IsDead()) {
-      AddToWorklist(phi);
-    }
-  }
-}
-
-void DeadPhiHandling::Run() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-}
-
 void SsaBuilder::SetLoopHeaderPhiInputs() {
   for (size_t i = loop_headers_.size(); i > 0; --i) {
     HBasicBlock* block = loop_headers_[i - 1];
@@ -285,10 +82,11 @@
       HPhi* phi = it.Current()->AsPhi();
       HPhi* next = phi->GetNextEquivalentPhiWithSameType();
       if (next != nullptr) {
-        // Make sure we do not replace a live phi with a dead phi. A live phi has been
-        // handled by the type propagation phase, unlike a dead phi.
+        // Make sure we do not replace a live phi with a dead phi. A live phi
+        // has been handled by the type propagation phase, unlike a dead phi.
         if (next->IsLive()) {
           phi->ReplaceWith(next);
+          phi->SetDead();
         } else {
           next->ReplaceWith(phi);
         }
@@ -300,64 +98,7 @@
   }
 }
 
-void SsaBuilder::BuildSsa() {
-  // 1) Visit in reverse post order. We need to have all predecessors of a block visited
-  // (with the exception of loops) in order to create the right environment for that
-  // block. For loops, we create phis whose inputs will be set in 2).
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-
-  // 2) Set inputs of loop phis.
-  SetLoopHeaderPhiInputs();
-
-  // 3) Mark dead phis. This will mark phis that are only used by environments:
-  // at the DEX level, the type of these phis does not need to be consistent, but
-  // our code generator will complain if the inputs of a phi do not have the same
-  // type. The marking allows the type propagation to know which phis it needs
-  // to handle. We mark but do not eliminate: the elimination will be done in
-  // step 9).
-  SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph());
-  dead_phis_for_type_propagation.MarkDeadPhis();
-
-  // 4) Propagate types of phis. At this point, phis are typed void in the general
-  // case, or float/double/reference when we created an equivalent phi. So we
-  // need to propagate the types across phis to give them a correct type.
-  PrimitiveTypePropagation type_propagation(GetGraph());
-  type_propagation.Run();
-
-  // 5) When creating equivalent phis we copy the inputs of the original phi which
-  // may be improperly typed. This was fixed during the type propagation in 4) but
-  // as a result we may end up with two equivalent phis with the same type for
-  // the same dex register. This pass cleans them up.
-  EquivalentPhisCleanup();
-
-  // 6) Mark dead phis again. Step 4) may have introduced new phis.
-  // Step 5) might enable the death of new phis.
-  SsaDeadPhiElimination dead_phis(GetGraph());
-  dead_phis.MarkDeadPhis();
-
-  // 7) Now that the graph is correctly typed, we can get rid of redundant phis.
-  // Note that we cannot do this phase before type propagation, otherwise
-  // we could get rid of phi equivalents, whose presence is a requirement for the
-  // type propagation phase. Note that this is to satisfy statement (a) of the
-  // SsaBuilder (see ssa_builder.h).
-  SsaRedundantPhiElimination redundant_phi(GetGraph());
-  redundant_phi.Run();
-
-  // 8) Fix the type for null constants which are part of an equality comparison.
-  // We need to do this after redundant phi elimination, to ensure the only cases
-  // that we can see are reference comparison against 0. The redundant phi
-  // elimination ensures we do not see a phi taking two 0 constants in a HEqual
-  // or HNotEqual.
-  FixNullConstantType();
-
-  // 9) Make sure environments use the right phi "equivalent": a phi marked dead
-  // can have a phi equivalent that is not dead. We must therefore update
-  // all environment uses of the dead phi to use its equivalent. Note that there
-  // can be multiple phis for the same Dex register that are live (for example
-  // when merging constants), in which case it is OK for the environments
-  // to just reference one.
+void SsaBuilder::FixEnvironmentPhis() {
   for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
@@ -378,24 +119,375 @@
       phi->ReplaceWith(next);
     }
   }
+}
 
-  // 10) Deal with phis to guarantee liveness of phis in case of a debuggable
-  // application. This is for satisfying statement (c) of the SsaBuilder
-  // (see ssa_builder.h).
-  if (GetGraph()->IsDebuggable()) {
-    DeadPhiHandling dead_phi_handler(GetGraph());
-    dead_phi_handler.Run();
+static void AddDependentInstructionsToWorklist(HInstruction* instruction,
+                                               ArenaVector<HPhi*>* worklist) {
+  // If `instruction` is a dead phi, type conflict was just identified. All its
+  // live phi users, and transitively users of those users, therefore need to be
+  // marked dead/conflicting too, so we add them to the worklist. Otherwise we
+  // add users whose type does not match and needs to be updated.
+  bool add_all_live_phis = instruction->IsPhi() && instruction->AsPhi()->IsDead();
+  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+    HInstruction* user = it.Current()->GetUser();
+    if (user->IsPhi() && user->AsPhi()->IsLive()) {
+      if (add_all_live_phis || user->GetType() != instruction->GetType()) {
+        worklist->push_back(user->AsPhi());
+      }
+    }
+  }
+}
+
+// Find a candidate primitive type for `phi` by merging the type of its inputs.
+// Return false if conflict is identified.
+static bool TypePhiFromInputs(HPhi* phi) {
+  Primitive::Type common_type = phi->GetType();
+
+  for (HInputIterator it(phi); !it.Done(); it.Advance()) {
+    HInstruction* input = it.Current();
+    if (input->IsPhi() && input->AsPhi()->IsDead()) {
+      // Phis are constructed live so if an input is a dead phi, it must have
+      // been made dead due to type conflict. Mark this phi conflicting too.
+      return false;
+    }
+
+    Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
+    if (common_type == input_type) {
+      // No change in type.
+    } else if (Primitive::Is64BitType(common_type) != Primitive::Is64BitType(input_type)) {
+      // Types are of different sizes, e.g. int vs. long. Must be a conflict.
+      return false;
+    } else if (Primitive::IsIntegralType(common_type)) {
+      // Previous inputs were integral, this one is not but is of the same size.
+      // This does not imply conflict since some bytecode instruction types are
+      // ambiguous. TypeInputsOfPhi will either type them or detect a conflict.
+      DCHECK(Primitive::IsFloatingPointType(input_type) || input_type == Primitive::kPrimNot);
+      common_type = input_type;
+    } else if (Primitive::IsIntegralType(input_type)) {
+      // Input is integral, common type is not. Same as in the previous case, if
+      // there is a conflict, it will be detected during TypeInputsOfPhi.
+      DCHECK(Primitive::IsFloatingPointType(common_type) || common_type == Primitive::kPrimNot);
+    } else {
+      // Combining float and reference types. Clearly a conflict.
+      DCHECK((common_type == Primitive::kPrimFloat && input_type == Primitive::kPrimNot) ||
+             (common_type == Primitive::kPrimNot && input_type == Primitive::kPrimFloat));
+      return false;
+    }
   }
 
-  // 11) Now that the right phis are used for the environments, and we
-  // have potentially revive dead phis in case of a debuggable application,
-  // we can eliminate phis we do not need. Regardless of the debuggable status,
-  // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h),
-  // as well as for the code generation, which does not deal with phis of conflicting
-  // input types.
-  dead_phis.EliminateDeadPhis();
+  // We have found a candidate type for the phi. Set it and return true. We may
+  // still discover conflict whilst typing the individual inputs in TypeInputsOfPhi.
+  phi->SetType(common_type);
+  return true;
+}
 
-  // 12) Clear locals.
+// Replace inputs of `phi` to match its type. Return false if conflict is identified.
+bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+  Primitive::Type common_type = phi->GetType();
+  if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) {
+    // Phi either contains only other untyped phis (common_type == kPrimVoid),
+    // or `common_type` is integral and we do not need to retype ambiguous inputs
+    // because they are always constructed with the integral type candidate.
+    if (kIsDebugBuild) {
+      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+        HInstruction* input = phi->InputAt(i);
+        if (common_type == Primitive::kPrimVoid) {
+          DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid);
+        } else {
+          DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) ||
+                 HPhi::ToPhiType(input->GetType()) == common_type);
+        }
+      }
+    }
+    // Inputs did not need to be replaced, hence no conflict. Report success.
+    return true;
+  } else {
+    DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type));
+    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+      HInstruction* input = phi->InputAt(i);
+      if (input->GetType() != common_type) {
+        // Input type does not match phi's type. Try to retype the input or
+        // generate a suitably typed equivalent.
+        HInstruction* equivalent = (common_type == Primitive::kPrimNot)
+            ? GetReferenceTypeEquivalent(input)
+            : GetFloatOrDoubleEquivalent(input, common_type);
+        if (equivalent == nullptr) {
+          // Input could not be typed. Report conflict.
+          return false;
+        }
+        // Make sure the input did not change its type and we do not need to
+        // update its users.
+        DCHECK_NE(input, equivalent);
+
+        phi->ReplaceInput(equivalent, i);
+        if (equivalent->IsPhi()) {
+          worklist->push_back(equivalent->AsPhi());
+        }
+      }
+    }
+    // All inputs either matched the type of the phi or we successfully replaced
+    // them with a suitable equivalent. Report success.
+    return true;
+  }
+}
+
+// Attempt to set the primitive type of `phi` to match its inputs. Return whether
+// it was changed by the algorithm or not.
+bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+  DCHECK(phi->IsLive());
+  Primitive::Type original_type = phi->GetType();
+
+  // Try to type the phi in two stages:
+  // (1) find a candidate type for the phi by merging types of all its inputs,
+  // (2) try to type the phi's inputs to that candidate type.
+  // Either of these stages may detect a type conflict and fail, in which case
+  // we immediately abort.
+  if (!TypePhiFromInputs(phi) || !TypeInputsOfPhi(phi, worklist)) {
+    // Conflict detected. Mark the phi dead and return true because it changed.
+    phi->SetDead();
+    return true;
+  }
+
+  // Return true if the type of the phi has changed.
+  return phi->GetType() != original_type;
+}
+
+void SsaBuilder::RunPrimitiveTypePropagation() {
+  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (block->IsLoopHeader()) {
+      for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* phi = phi_it.Current()->AsPhi();
+        if (phi->IsLive()) {
+          worklist.push_back(phi);
+        }
+      }
+    } else {
+      for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        // Eagerly compute the type of the phi, for quicker convergence. Note
+        // that we don't need to add users to the worklist because we are
+        // doing a reverse post-order visit, therefore either the phi users are
+        // non-loop phi and will be visited later in the visit, or are loop-phis,
+        // and they are already in the work list.
+        HPhi* phi = phi_it.Current()->AsPhi();
+        if (phi->IsLive()) {
+          UpdatePrimitiveType(phi, &worklist);
+        }
+      }
+    }
+  }
+
+  ProcessPrimitiveTypePropagationWorklist(&worklist);
+  EquivalentPhisCleanup();
+}
+
+void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist) {
+  // Process worklist
+  while (!worklist->empty()) {
+    HPhi* phi = worklist->back();
+    worklist->pop_back();
+    // The phi could have been made dead as a result of conflicts while in the
+    // worklist. If it is now dead, there is no point in updating its type.
+    if (phi->IsLive() && UpdatePrimitiveType(phi, worklist)) {
+      AddDependentInstructionsToWorklist(phi, worklist);
+    }
+  }
+}
+
+static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(Primitive::IsIntOrLongType(type));
+  HArrayGet* next = aget->GetNext()->AsArrayGet();
+  return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr;
+}
+
+static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(Primitive::IsIntOrLongType(type));
+  DCHECK(FindFloatOrDoubleEquivalentOfArrayGet(aget) == nullptr);
+
+  HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetArena()) HArrayGet(
+      aget->GetArray(),
+      aget->GetIndex(),
+      type == Primitive::kPrimInt ? Primitive::kPrimFloat : Primitive::kPrimDouble,
+      aget->GetDexPc());
+  aget->GetBlock()->InsertInstructionAfter(equivalent, aget);
+  return equivalent;
+}
+
+static Primitive::Type GetPrimitiveArrayComponentType(HInstruction* array)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ReferenceTypeInfo array_type = array->GetReferenceTypeInfo();
+  DCHECK(array_type.IsPrimitiveArrayClass());
+  return array_type.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
+}
+
+bool SsaBuilder::FixAmbiguousArrayOps() {
+  if (ambiguous_agets_.empty() && ambiguous_asets_.empty()) {
+    return true;
+  }
+
+  // The wrong ArrayGet equivalent may still have Phi uses coming from ArraySet
+  // uses (because they are untyped) and environment uses (if --debuggable).
+  // After resolving all ambiguous ArrayGets, we will re-run primitive type
+  // propagation on the Phis which need to be updated.
+  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+
+    for (HArrayGet* aget_int : ambiguous_agets_) {
+      HInstruction* array = aget_int->GetArray();
+      if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
+        // RTP did not type the input array. Bail.
+        return false;
+      }
+
+      HArrayGet* aget_float = FindFloatOrDoubleEquivalentOfArrayGet(aget_int);
+      Primitive::Type array_type = GetPrimitiveArrayComponentType(array);
+      DCHECK_EQ(Primitive::Is64BitType(aget_int->GetType()), Primitive::Is64BitType(array_type));
+
+      if (Primitive::IsIntOrLongType(array_type)) {
+        if (aget_float != nullptr) {
+          // There is a float/double equivalent. We must replace it and re-run
+          // primitive type propagation on all dependent instructions.
+          aget_float->ReplaceWith(aget_int);
+          aget_float->GetBlock()->RemoveInstruction(aget_float);
+          AddDependentInstructionsToWorklist(aget_int, &worklist);
+        }
+      } else {
+        DCHECK(Primitive::IsFloatingPointType(array_type));
+        if (aget_float == nullptr) {
+          // This is a float/double ArrayGet but there were no typed uses which
+          // would create the typed equivalent. Create it now.
+          aget_float = CreateFloatOrDoubleEquivalentOfArrayGet(aget_int);
+        }
+        // Replace the original int/long instruction. Note that it may have phi
+        // uses, environment uses, as well as real uses (from untyped ArraySets).
+        // We need to re-run primitive type propagation on its dependent instructions.
+        aget_int->ReplaceWith(aget_float);
+        aget_int->GetBlock()->RemoveInstruction(aget_int);
+        AddDependentInstructionsToWorklist(aget_float, &worklist);
+      }
+    }
+
+    // Set a flag stating that types of ArrayGets have been resolved. Requesting
+    // equivalent of the wrong type with GetFloatOrDoubleEquivalentOfArrayGet
+    // will fail from now on.
+    agets_fixed_ = true;
+
+    for (HArraySet* aset : ambiguous_asets_) {
+      HInstruction* array = aset->GetArray();
+      if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
+        // RTP did not type the input array. Bail.
+        return false;
+      }
+
+      HInstruction* value = aset->GetValue();
+      Primitive::Type value_type = value->GetType();
+      Primitive::Type array_type = GetPrimitiveArrayComponentType(array);
+      DCHECK_EQ(Primitive::Is64BitType(value_type), Primitive::Is64BitType(array_type));
+
+      if (Primitive::IsFloatingPointType(array_type)) {
+        if (!Primitive::IsFloatingPointType(value_type)) {
+          DCHECK(Primitive::IsIntegralType(value_type));
+          // Array elements are floating-point but the value has not been replaced
+          // with its floating-point equivalent. The replacement must always
+          // succeed in code validated by the verifier.
+          HInstruction* equivalent = GetFloatOrDoubleEquivalent(value, array_type);
+          DCHECK(equivalent != nullptr);
+          aset->ReplaceInput(equivalent, /* input_index */ 2);
+          if (equivalent->IsPhi()) {
+            // Returned equivalent is a phi which may not have had its inputs
+            // replaced yet. We need to run primitive type propagation on it.
+            worklist.push_back(equivalent->AsPhi());
+          }
+        }
+      } else {
+        // Array elements are integral and the value assigned to it initially
+        // was integral too. Nothing to do.
+        DCHECK(Primitive::IsIntegralType(array_type));
+        DCHECK(Primitive::IsIntegralType(value_type));
+      }
+    }
+  }
+
+  if (!worklist.empty()) {
+    ProcessPrimitiveTypePropagationWorklist(&worklist);
+    EquivalentPhisCleanup();
+  }
+
+  return true;
+}
+
+BuildSsaResult SsaBuilder::BuildSsa() {
+  // 1) Visit in reverse post order. We need to have all predecessors of a block
+  // visited (with the exception of loops) in order to create the right environment
+  // for that block. For loops, we create phis whose inputs will be set in 2).
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
+  }
+
+  // 2) Set inputs of loop header phis.
+  SetLoopHeaderPhiInputs();
+
+  // 3) Propagate types of phis. At this point, phis are typed void in the general
+  // case, or float/double/reference if we created an equivalent phi. So we need
+  // to propagate the types across phis to give them a correct type. If a type
+  // conflict is detected in this stage, the phi is marked dead.
+  RunPrimitiveTypePropagation();
+
+  // 4) Now that the correct primitive types have been assigned, we can get rid
+  // of redundant phis. Note that we cannot do this phase before type propagation,
+  // otherwise we could get rid of phi equivalents, whose presence is a requirement
+  // for the type propagation phase. Note that this is to satisfy statement (a)
+  // of the SsaBuilder (see ssa_builder.h).
+  SsaRedundantPhiElimination(GetGraph()).Run();
+
+  // 5) Fix the type for null constants which are part of an equality comparison.
+  // We need to do this after redundant phi elimination, to ensure the only cases
+  // that we can see are reference comparison against 0. The redundant phi
+  // elimination ensures we do not see a phi taking two 0 constants in a HEqual
+  // or HNotEqual.
+  FixNullConstantType();
+
+  // 6) Compute type of reference type instructions. The pass assumes that
+  // NullConstant has been fixed up.
+  ReferenceTypePropagation(GetGraph(), handles_).Run();
+
+  // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float
+  // or long/double) and marked ArraySets with ambiguous input type. Now that RTP
+  // computed the type of the array input, the ambiguity can be resolved and the
+  // correct equivalents kept.
+  if (!FixAmbiguousArrayOps()) {
+    return kBuildSsaFailAmbiguousArrayOp;
+  }
+
+  // 8) Mark dead phis. This will mark phis which are not used by instructions
+  // or other live phis. If compiling as debuggable code, phis will also be kept
+  // live if they have an environment use.
+  SsaDeadPhiElimination dead_phi_elimimation(GetGraph());
+  dead_phi_elimimation.MarkDeadPhis();
+
+  // 9) Make sure environments use the right phi equivalent: a phi marked dead
+  // can have a phi equivalent that is not dead. In that case we have to replace
+  // it with the live equivalent because deoptimization and try/catch rely on
+  // environments containing values of all live vregs at that point. Note that
+  // there can be multiple phis for the same Dex register that are live
+  // (for example when merging constants), in which case it is okay for the
+  // environments to just reference one.
+  FixEnvironmentPhis();
+
+  // 10) Now that the right phis are used for the environments, we can eliminate
+  // phis we do not need. Regardless of the debuggable status, this phase is
+  /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
+  // as for the code generation, which does not deal with phis of conflicting
+  // input types.
+  dead_phi_elimimation.EliminateDeadPhis();
+
+  // 11) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
@@ -404,6 +496,8 @@
       current->GetBlock()->RemoveInstruction(current);
     }
   }
+
+  return kBuildSsaSuccess;
 }
 
 ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
@@ -591,6 +685,8 @@
  * phi with a floating point / reference type.
  */
 HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+  DCHECK(phi->IsLive()) << "Cannot get equivalent of a dead phi since it would create a live one.";
+
   // We place the floating point /reference phi next to this phi.
   HInstruction* next = phi->GetNext();
   if (next != nullptr
@@ -606,35 +702,50 @@
     ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      // Copy the inputs. Note that the graph may not be correctly typed by doing this copy,
-      // but the type propagation phase will fix it.
+      // Copy the inputs. Note that the graph may not be correctly typed
+      // by doing this copy, but the type propagation phase will fix it.
       new_phi->SetRawInputAt(i, phi->InputAt(i));
     }
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
+    DCHECK(new_phi->IsLive());
     return new_phi;
   } else {
+    // An existing equivalent was found. If it is dead, conflict was previously
+    // identified and we return nullptr instead.
     HPhi* next_phi = next->AsPhi();
     DCHECK_EQ(next_phi->GetType(), type);
-    if (next_phi->IsDead()) {
-      // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis)
-      // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This
-      // cannot revive undefined loop header phis because they cannot have uses.
-      DCHECK(!IsUndefinedLoopHeaderPhi(next_phi));
-      next_phi->SetLive();
-    }
-    return next_phi;
+    return next_phi->IsLive() ? next_phi : nullptr;
   }
 }
 
-HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
-                                                     HInstruction* value,
-                                                     Primitive::Type type) {
+HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+  DCHECK(Primitive::IsIntegralType(aget->GetType()));
+
+  if (!Primitive::IsIntOrLongType(aget->GetType())) {
+    // Cannot type boolean, char, byte, short to float/double.
+    return nullptr;
+  }
+
+  DCHECK(ContainsElement(ambiguous_agets_, aget));
+  if (agets_fixed_) {
+    // This used to be an ambiguous ArrayGet but its type has been resolved to
+    // int/long. Requesting a float/double equivalent should lead to a conflict.
+    if (kIsDebugBuild) {
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(Primitive::IsIntOrLongType(GetPrimitiveArrayComponentType(aget->GetArray())));
+    }
+    return nullptr;
+  } else {
+    // This is an ambiguous ArrayGet which has not been resolved yet. Return an
+    // equivalent float/double instruction to use until it is resolved.
+    HArrayGet* equivalent = FindFloatOrDoubleEquivalentOfArrayGet(aget);
+    return (equivalent == nullptr) ? CreateFloatOrDoubleEquivalentOfArrayGet(aget) : equivalent;
+  }
+}
+
+HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, Primitive::Type type) {
   if (value->IsArrayGet()) {
-    // The verifier has checked that values in arrays cannot be used for both
-    // floating point and non-floating point operations. It is therefore safe to just
-    // change the type of the operation.
-    value->AsArrayGet()->SetType(type);
-    return value;
+    return GetFloatOrDoubleEquivalentOfArrayGet(value->AsArrayGet());
   } else if (value->IsLongConstant()) {
     return GetDoubleEquivalent(value->AsLongConstant());
   } else if (value->IsIntConstant()) {
@@ -642,12 +753,7 @@
   } else if (value->IsPhi()) {
     return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
   } else {
-    // For other instructions, we assume the verifier has checked that the dex format is correctly
-    // typed and the value in a dex register will not be used for both floating point and
-    // non-floating point operations. So the only reason an instruction would want a floating
-    // point equivalent is for an unused phi that will be removed by the dead phi elimination phase.
-    DCHECK(user->IsPhi()) << "is actually " << user->DebugName() << " (" << user->GetId() << ")";
-    return value;
+    return nullptr;
   }
 }
 
@@ -662,15 +768,17 @@
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
+  Primitive::Type load_type = load->GetType();
   HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
   // If the operation requests a specific type, we make sure its input is of that type.
-  if (load->GetType() != value->GetType()) {
-    if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
-      value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
-    } else if (load->GetType() == Primitive::kPrimNot) {
+  if (load_type != value->GetType()) {
+    if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) {
+      value = GetFloatOrDoubleEquivalent(value, load_type);
+    } else if (load_type == Primitive::kPrimNot) {
       value = GetReferenceTypeEquivalent(value);
     }
   }
+
   load->ReplaceWith(value);
   load->GetBlock()->RemoveInstruction(load);
 }
@@ -760,4 +868,21 @@
   temp->GetBlock()->RemoveInstruction(temp);
 }
 
+void SsaBuilder::VisitArrayGet(HArrayGet* aget) {
+  Primitive::Type type = aget->GetType();
+  DCHECK(!Primitive::IsFloatingPointType(type));
+  if (Primitive::IsIntOrLongType(type)) {
+    ambiguous_agets_.push_back(aget);
+  }
+  VisitInstruction(aget);
+}
+
+void SsaBuilder::VisitArraySet(HArraySet* aset) {
+  Primitive::Type type = aset->GetValue()->GetType();
+  if (Primitive::IsIntOrLongType(type)) {
+    ambiguous_asets_.push_back(aset);
+  }
+  VisitInstruction(aset);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index dcce5e4..0fcc3a1 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -49,17 +49,21 @@
  */
 class SsaBuilder : public HGraphVisitor {
  public:
-  explicit SsaBuilder(HGraph* graph)
+  explicit SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
       : HGraphVisitor(graph),
+        handles_(handles),
+        agets_fixed_(false),
         current_locals_(nullptr),
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         locals_for_(graph->GetBlocks().size(),
                     ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
                     graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
     loop_headers_.reserve(kDefaultNumberOfLoops);
   }
 
-  void BuildSsa();
+  BuildSsaResult BuildSsa();
 
   // Returns locals vector for `block`. If it is a catch block, the vector will be
   // prepopulated with catch phis for vregs which are defined in `current_locals_`.
@@ -71,23 +75,39 @@
   void VisitStoreLocal(HStoreLocal* store);
   void VisitInstruction(HInstruction* instruction);
   void VisitTemporary(HTemporary* instruction);
-
-  static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user,
-                                                  HInstruction* instruction,
-                                                  Primitive::Type type);
-
-  static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+  void VisitArrayGet(HArrayGet* aget);
+  void VisitArraySet(HArraySet* aset);
 
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
  private:
   void SetLoopHeaderPhiInputs();
+  void FixEnvironmentPhis();
   void FixNullConstantType();
   void EquivalentPhisCleanup();
+  void RunPrimitiveTypePropagation();
 
-  static HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
-  static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
-  static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+  // Attempts to resolve types of aget(-wide) instructions and type values passed
+  // to aput(-wide) instructions from reference type information on the array
+  // input. Returns false if the type of an array is unknown.
+  bool FixAmbiguousArrayOps();
+
+  bool TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist);
+  bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist);
+  void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist);
+
+  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
+  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+
+  HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
+  HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
+  HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+  HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
+
+  StackHandleScopeCollection* const handles_;
+
+  // True if types of ambiguous ArrayGets have been resolved.
+  bool agets_fixed_;
 
   // Locals for the current block being visited.
   ArenaVector<HInstruction*>* current_locals_;
@@ -96,6 +116,9 @@
   // over these blocks to set the inputs of their phis.
   ArenaVector<HBasicBlock*> loop_headers_;
 
+  ArenaVector<HArrayGet*> ambiguous_agets_;
+  ArenaVector<HArraySet*> ambiguous_asets_;
+
   // HEnvironment for each block.
   ArenaVector<ArenaVector<HInstruction*>> locals_for_;
 
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index a3219dc..2eef307 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -17,6 +17,7 @@
 #include "ssa_phi_elimination.h"
 
 #include "base/arena_containers.h"
+#include "base/bit_vector-inl.h"
 
 namespace art {
 
@@ -40,15 +41,17 @@
         continue;
       }
 
-      bool has_non_phi_use = false;
-      for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-        if (!use_it.Current()->GetUser()->IsPhi()) {
-          has_non_phi_use = true;
-          break;
+      bool keep_alive = (graph_->IsDebuggable() && phi->HasEnvironmentUses());
+      if (!keep_alive) {
+        for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
+          if (!use_it.Current()->GetUser()->IsPhi()) {
+            keep_alive = true;
+            break;
+          }
         }
       }
 
-      if (has_non_phi_use) {
+      if (keep_alive) {
         worklist_.push_back(phi);
       } else {
         phi->SetDead();
@@ -94,8 +97,8 @@
           for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
                use_it.Advance()) {
             HInstruction* user = use_it.Current()->GetUser();
-            DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
-            DCHECK(user->AsPhi()->IsDead()) << user->GetId();
+            DCHECK(user->IsLoopHeaderPhi());
+            DCHECK(user->AsPhi()->IsDead());
           }
         }
         // Remove the phi from use lists of its inputs.
@@ -127,6 +130,9 @@
     }
   }
 
+  ArenaSet<uint32_t> visited_phis_in_cycle(graph_->GetArena()->Adapter());
+  ArenaVector<HPhi*> cycle_worklist(graph_->GetArena()->Adapter());
+
   while (!worklist_.empty()) {
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
@@ -141,46 +147,92 @@
       continue;
     }
 
-    // Find if the inputs of the phi are the same instruction.
-    HInstruction* candidate = phi->InputAt(0);
-    // A loop phi cannot have itself as the first phi. Note that this
-    // check relies on our simplification pass ensuring the pre-header
-    // block is first in the list of predecessors of the loop header.
-    DCHECK(!phi->IsLoopHeaderPhi() || phi->GetBlock()->IsLoopPreHeaderFirstPredecessor());
-    DCHECK_NE(phi, candidate);
+    HInstruction* candidate = nullptr;
+    visited_phis_in_cycle.clear();
+    cycle_worklist.clear();
 
-    for (size_t i = 1; i < phi->InputCount(); ++i) {
-      HInstruction* input = phi->InputAt(i);
-      // For a loop phi, if the input is the phi, the phi is still candidate for
-      // elimination.
-      if (input != candidate && input != phi) {
+    cycle_worklist.push_back(phi);
+    visited_phis_in_cycle.insert(phi->GetId());
+    bool catch_phi_in_cycle = phi->IsCatchPhi();
+
+    // First do a simple loop over inputs and check if they are all the same.
+    for (size_t j = 0; j < phi->InputCount(); ++j) {
+      HInstruction* input = phi->InputAt(j);
+      if (input == phi) {
+        continue;
+      } else if (candidate == nullptr) {
+        candidate = input;
+      } else if (candidate != input) {
         candidate = nullptr;
         break;
       }
     }
 
-    // If the inputs are not the same, continue.
+    // If we haven't found a candidate, check for a phi cycle. Note that we need to detect
+    // such cycles to avoid having reference and non-reference equivalents. We check this
+    // invariant in the graph checker.
+    if (candidate == nullptr) {
+      // We iterate over the array as long as it grows.
+      for (size_t i = 0; i < cycle_worklist.size(); ++i) {
+        HPhi* current = cycle_worklist[i];
+        DCHECK(!current->IsLoopHeaderPhi() ||
+               current->GetBlock()->IsLoopPreHeaderFirstPredecessor());
+
+        for (size_t j = 0; j < current->InputCount(); ++j) {
+          HInstruction* input = current->InputAt(j);
+          if (input == current) {
+            continue;
+          } else if (input->IsPhi()) {
+            if (!ContainsElement(visited_phis_in_cycle, input->GetId())) {
+              cycle_worklist.push_back(input->AsPhi());
+              visited_phis_in_cycle.insert(input->GetId());
+              catch_phi_in_cycle |= input->AsPhi()->IsCatchPhi();
+            } else {
+              // Already visited, nothing to do.
+            }
+          } else if (candidate == nullptr) {
+            candidate = input;
+          } else if (candidate != input) {
+            candidate = nullptr;
+            // Clear the cycle worklist to break out of the outer loop.
+            cycle_worklist.clear();
+            break;
+          }
+        }
+      }
+    }
+
     if (candidate == nullptr) {
       continue;
     }
 
-    // The candidate may not dominate a phi in a catch block.
-    if (phi->IsCatchPhi() && !candidate->StrictlyDominates(phi)) {
-      continue;
-    }
-
-    // Because we're updating the users of this phi, we may have new candidates
-    // for elimination. Add phis that use this phi to the worklist.
-    for (HUseIterator<HInstruction*> it(phi->GetUses()); !it.Done(); it.Advance()) {
-      HUseListNode<HInstruction*>* current = it.Current();
-      HInstruction* user = current->GetUser();
-      if (user->IsPhi()) {
-        worklist_.push_back(user->AsPhi());
+    for (HPhi* current : cycle_worklist) {
+      // The candidate may not dominate a phi in a catch block: there may be non-throwing
+      // instructions at the beginning of a try range, that may be the first input of
+      // catch phis.
+      // TODO(dbrazdil): Remove this situation by moving those non-throwing instructions
+      // before the try entry.
+      if (catch_phi_in_cycle) {
+        if (!candidate->StrictlyDominates(current)) {
+          continue;
+        }
+      } else {
+        DCHECK(candidate->StrictlyDominates(current));
       }
-    }
 
-    phi->ReplaceWith(candidate);
-    phi->GetBlock()->RemovePhi(phi);
+      // Because we're updating the users of this phi, we may have new candidates
+      // for elimination. Add phis that use this phi to the worklist.
+      for (HUseIterator<HInstruction*> it(current->GetUses()); !it.Done(); it.Advance()) {
+        HUseListNode<HInstruction*>* use = it.Current();
+        HInstruction* user = use->GetUser();
+        if (user->IsPhi() && !ContainsElement(visited_phis_in_cycle, user->GetId())) {
+          worklist_.push_back(user->AsPhi());
+        }
+      }
+      DCHECK(candidate->StrictlyDominates(current));
+      current->ReplaceWith(candidate);
+      current->GetBlock()->RemovePhi(current);
+    }
   }
 }
 
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 024278f..d2885a8 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -28,6 +28,8 @@
 
 namespace art {
 
+class SsaTest : public CommonCompilerTest {};
+
 class SsaPrettyPrinter : public HPrettyPrinter {
  public:
   explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
@@ -83,11 +85,10 @@
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
 
-  graph->BuildDominatorTree();
+  TransformToSsa(graph);
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  graph->TransformToSsa();
   ReNumberInstructions(graph);
 
   // Test that phis had their type set.
@@ -103,7 +104,7 @@
   ASSERT_STREQ(expected, printer.str().c_str());
 }
 
-TEST(SsaTest, CFG1) {
+TEST_F(SsaTest, CFG1) {
   // Test that we get rid of loads and stores.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -131,7 +132,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, CFG2) {
+TEST_F(SsaTest, CFG2) {
   // Test that we create a phi for the join block of an if control flow instruction
   // when there is only code in the else branch.
   const char* expected =
@@ -162,7 +163,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, CFG3) {
+TEST_F(SsaTest, CFG3) {
   // Test that we create a phi for the join block of an if control flow instruction
   // when both branches update a local.
   const char* expected =
@@ -195,7 +196,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop1) {
+TEST_F(SsaTest, Loop1) {
   // Test that we create a phi for an initialized local at entry of a loop.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -228,7 +229,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop2) {
+TEST_F(SsaTest, Loop2) {
   // Simple loop with one preheader and one back edge.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -258,7 +259,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop3) {
+TEST_F(SsaTest, Loop3) {
   // Test that a local not yet defined at the entry of a loop is handled properly.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -290,7 +291,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop4) {
+TEST_F(SsaTest, Loop4) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   const char* expected =
@@ -325,7 +326,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop5) {
+TEST_F(SsaTest, Loop5) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   const char* expected =
@@ -367,7 +368,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop6) {
+TEST_F(SsaTest, Loop6) {
   // Test a loop with one preheader and two back edges (e.g. continue).
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -406,7 +407,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, Loop7) {
+TEST_F(SsaTest, Loop7) {
   // Test a loop with one preheader, one back edge, and two exit edges (e.g. break).
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -448,7 +449,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, DeadLocal) {
+TEST_F(SsaTest, DeadLocal) {
   // Test that we correctly handle a local not being used.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
@@ -466,7 +467,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, LocalInIf) {
+TEST_F(SsaTest, LocalInIf) {
   // Test that we do not create a phi in the join block when one predecessor
   // does not update the local.
   const char* expected =
@@ -496,7 +497,7 @@
   TestCode(data, expected);
 }
 
-TEST(SsaTest, MultiplePredecessors) {
+TEST_F(SsaTest, MultiplePredecessors) {
   // Test that we do not create a phi when one predecessor
   // does not update the local.
   const char* expected =
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
new file mode 100644
index 0000000..81f2a56
--- /dev/null
+++ b/compiler/profile_assistant.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profile_assistant.h"
+
+namespace art {
+
+// Minimum number of new methods that profiles must contain to enable recompilation.
+static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
+
+bool ProfileAssistant::ProcessProfiles(
+      const std::vector<std::string>& profile_files,
+      const std::vector<std::string>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  DCHECK(!profile_files.empty());
+  DCHECK(reference_profile_files.empty() ||
+      (profile_files.size() == reference_profile_files.size()));
+
+  std::vector<ProfileCompilationInfo> new_info(profile_files.size());
+  bool should_compile = false;
+  // Read the main profile files.
+  for (size_t i = 0; i < profile_files.size(); i++) {
+    if (!new_info[i].Load(profile_files[i])) {
+      LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+      return false;
+    }
+    // Do we have enough new profiled methods that will make the compilation worthwhile?
+    should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
+  }
+  if (!should_compile) {
+    *profile_compilation_info = nullptr;
+    return true;
+  }
+
+  std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+  for (size_t i = 0; i < new_info.size(); i++) {
+    // Merge all data into a single object.
+    result->Load(new_info[i]);
+    // If we have any reference profile information merge their information with
+    // the current profiles and save them back to disk.
+    if (!reference_profile_files.empty()) {
+      if (!new_info[i].Load(reference_profile_files[i])) {
+        LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+        return false;
+      }
+      if (!new_info[i].Save(reference_profile_files[i])) {
+        LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+        return false;
+      }
+    }
+  }
+  *profile_compilation_info = result.release();
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
new file mode 100644
index 0000000..088c8bd
--- /dev/null
+++ b/compiler/profile_assistant.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_PROFILE_ASSISTANT_H_
+#define ART_COMPILER_PROFILE_ASSISTANT_H_
+
+#include <string>
+#include <vector>
+
+#include "jit/offline_profiling_info.cc"
+
+namespace art {
+
+class ProfileAssistant {
+ public:
+  // Process the profile information present in the given files. Returns true
+  // if the analysis ended up successfully (i.e. no errors during reading,
+  // merging or writing of profile files).
+  //
+  // If the returned value is true and there is a significant difference between
+  // profile_files and reference_profile_files:
+  //   - profile_compilation_info is set to a not null object that
+  //     can be used to drive compilation. It will be the merge of all the data
+  //     found in profile_files and reference_profile_files.
+  //   - the data from profile_files[i] is merged into
+  //     reference_profile_files[i] and the corresponding backing file is
+  //     updated.
+  //
+  // If the returned value is false or the difference is insignificant,
+  // profile_compilation_info will be set to null.
+  //
+  // Additional notes:
+  //   - as mentioned above, this function may update the content of the files
+  //     passed with the reference_profile_files.
+  //   - if reference_profile_files is not empty it must be the same size as
+  //     profile_files.
+  static bool ProcessProfiles(
+      const std::vector<std::string>& profile_files,
+      const std::vector<std::string>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_PROFILE_ASSISTANT_H_
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index afca8ad..0dc307c 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -400,6 +400,20 @@
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
 
+void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(0 < size && size <= 32) << size;
+  CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
+  EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00);
+}
+
+void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) {
+  CHECK(IsUint<5>(pos)) << pos;
+  CHECK(0 < size && size <= 32) << size;
+  CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
+  EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04);
+}
+
 void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x20, rs, rt, imm16);
 }
@@ -1121,8 +1135,14 @@
 }
 
 void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) {
-  LoadConst32(reg_lo, Low32Bits(value));
-  LoadConst32(reg_hi, High32Bits(value));
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  LoadConst32(reg_lo, low);
+  if (high != low) {
+    LoadConst32(reg_hi, high);
+  } else {
+    Move(reg_hi, reg_lo);
+  }
 }
 
 void MipsAssembler::StoreConst32ToOffset(int32_t value,
@@ -1136,7 +1156,11 @@
     base = AT;
     offset = 0;
   }
-  LoadConst32(temp, value);
+  if (value == 0) {
+    temp = ZERO;
+  } else {
+    LoadConst32(temp, value);
+  }
   Sw(temp, base, offset);
 }
 
@@ -1152,22 +1176,48 @@
     base = AT;
     offset = 0;
   }
-  LoadConst32(temp, Low32Bits(value));
-  Sw(temp, base, offset);
-  LoadConst32(temp, High32Bits(value));
-  Sw(temp, base, offset + kMipsWordSize);
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  if (low == 0) {
+    Sw(ZERO, base, offset);
+  } else {
+    LoadConst32(temp, low);
+    Sw(temp, base, offset);
+  }
+  if (high == 0) {
+    Sw(ZERO, base, offset + kMipsWordSize);
+  } else {
+    if (high != low) {
+      LoadConst32(temp, high);
+    }
+    Sw(temp, base, offset + kMipsWordSize);
+  }
 }
 
 void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) {
-  LoadConst32(temp, value);
+  if (value == 0) {
+    temp = ZERO;
+  } else {
+    LoadConst32(temp, value);
+  }
   Mtc1(temp, r);
 }
 
 void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) {
-  LoadConst32(temp, Low32Bits(value));
-  Mtc1(temp, rd);
-  LoadConst32(temp, High32Bits(value));
-  Mthc1(temp, rd);
+  uint32_t low = Low32Bits(value);
+  uint32_t high = High32Bits(value);
+  if (low == 0) {
+    Mtc1(ZERO, rd);
+  } else {
+    LoadConst32(temp, low);
+    Mtc1(temp, rd);
+  }
+  if (high == 0) {
+    Mthc1(ZERO, rd);
+  } else {
+    LoadConst32(temp, high);
+    Mthc1(temp, rd);
+  }
 }
 
 void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index f569aa8..066e7b0 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -156,6 +156,8 @@
   void Srlv(Register rd, Register rt, Register rs);
   void Rotrv(Register rd, Register rt, Register rs);  // R2+
   void Srav(Register rd, Register rt, Register rs);
+  void Ext(Register rd, Register rt, int pos, int size);  // R2+
+  void Ins(Register rd, Register rt, int pos, int size);  // R2+
 
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 6f8b3e8..4361843 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -367,6 +367,44 @@
   DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav");
 }
 
+TEST_F(AssemblerMIPSTest, Ins) {
+  std::vector<mips::Register*> regs = GetRegisters();
+  WarnOnCombinations(regs.size() * regs.size() * 33 * 16);
+  std::string expected;
+  for (mips::Register* reg1 : regs) {
+    for (mips::Register* reg2 : regs) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; pos + size <= 32; size++) {
+          __ Ins(*reg1, *reg2, pos, size);
+          std::ostringstream instr;
+          instr << "ins $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+          expected += instr.str();
+        }
+      }
+    }
+  }
+  DriverStr(expected, "Ins");
+}
+
+TEST_F(AssemblerMIPSTest, Ext) {
+  std::vector<mips::Register*> regs = GetRegisters();
+  WarnOnCombinations(regs.size() * regs.size() * 33 * 16);
+  std::string expected;
+  for (mips::Register* reg1 : regs) {
+    for (mips::Register* reg2 : regs) {
+      for (int32_t pos = 0; pos < 32; pos++) {
+        for (int32_t size = 1; pos + size <= 32; size++) {
+          __ Ext(*reg1, *reg2, pos, size);
+          std::ostringstream instr;
+          instr << "ext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n";
+          expected += instr.str();
+        }
+      }
+    }
+  }
+  DriverStr(expected, "Ext");
+}
+
 TEST_F(AssemblerMIPSTest, Lb) {
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb");
 }
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 42ed881..244a5fe 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 #include <numeric>
+#include <sys/mman.h>
 
 #include "base/logging.h"
 #include "base/macros.h"
@@ -44,23 +45,17 @@
   }
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void RemoveChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        typename FreeBySizeSet::const_iterator free_by_size_pos) {
+void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
   auto free_by_start_pos = free_by_size_pos->second;
-  free_by_size->erase(free_by_size_pos);
-  free_by_start->erase(free_by_start_pos);
+  free_by_size_.erase(free_by_size_pos);
+  free_by_start_.erase(free_by_start_pos);
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void InsertChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        const SpaceChunk& chunk) {
+inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) {
   DCHECK_NE(chunk.size, 0u);
-  auto insert_result = free_by_start->insert(chunk);
+  auto insert_result = free_by_start_.insert(chunk);
   DCHECK(insert_result.second);
-  free_by_size->emplace(chunk.size, insert_result.first);
+  free_by_size_.emplace(chunk.size, insert_result.first);
 }
 
 SwapSpace::SwapSpace(int fd, size_t initial_size)
@@ -69,10 +64,18 @@
       lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) {
   // Assume that the file is unlinked.
 
-  InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size));
+  InsertChunk(NewFileChunk(initial_size));
 }
 
 SwapSpace::~SwapSpace() {
+  // Unmap all mmapped chunks. Nothing should be allocated anymore at
+  // this point, so there should be only full size chunks in free_by_start_.
+  for (const SpaceChunk& chunk : free_by_start_) {
+    if (munmap(chunk.ptr, chunk.size) != 0) {
+      PLOG(ERROR) << "Failed to unmap swap space chunk at "
+          << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size;
+    }
+  }
   // All arenas are backed by the same file. Just close the descriptor.
   close(fd_);
 }
@@ -113,7 +116,7 @@
       : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
   if (it != free_by_size_.end()) {
     old_chunk = *it->second;
-    RemoveChunk(&free_by_start_, &free_by_size_, it);
+    RemoveChunk(it);
   } else {
     // Not a big enough free chunk, need to increase file size.
     old_chunk = NewFileChunk(size);
@@ -124,13 +127,13 @@
   if (old_chunk.size != size) {
     // Insert the remainder.
     SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size };
-    InsertChunk(&free_by_start_, &free_by_size_, new_chunk);
+    InsertChunk(new_chunk);
   }
 
   return ret;
 }
 
-SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
+SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
 #if !defined(__APPLE__)
   size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize));
   int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part));
@@ -159,7 +162,7 @@
 }
 
 // TODO: Full coalescing.
-void SwapSpace::Free(void* ptrV, size_t size) {
+void SwapSpace::Free(void* ptr, size_t size) {
   MutexLock lock(Thread::Current(), lock_);
   size = RoundUp(size, 8U);
 
@@ -168,7 +171,7 @@
     free_before = CollectFree(free_by_start_, free_by_size_);
   }
 
-  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size };
+  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size };
   auto it = free_by_start_.lower_bound(chunk);
   if (it != free_by_start_.begin()) {
     auto prev = it;
@@ -180,7 +183,7 @@
       chunk.ptr -= prev->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "prev" is invalidated but "it" remains valid.
     }
   }
@@ -191,11 +194,11 @@
       chunk.size += it->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "it" is invalidated but we don't need it anymore.
     }
   }
-  InsertChunk(&free_by_start_, &free_by_size_, chunk);
+  InsertChunk(chunk);
 
   if (kCheckFreeMaps) {
     size_t free_after = CollectFree(free_by_start_, free_by_size_);
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 9127b6b..b659f1d 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -19,42 +19,17 @@
 
 #include <cstdlib>
 #include <list>
+#include <vector>
 #include <set>
 #include <stdint.h>
 #include <stddef.h>
 
-#include "base/debug_stack.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "mem_map.h"
 
 namespace art {
 
-// Chunk of space.
-struct SpaceChunk {
-  uint8_t* ptr;
-  size_t size;
-
-  uintptr_t Start() const {
-    return reinterpret_cast<uintptr_t>(ptr);
-  }
-  uintptr_t End() const {
-    return reinterpret_cast<uintptr_t>(ptr) + size;
-  }
-};
-
-inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) {
-  return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr);
-}
-
-class SortChunkByPtr {
- public:
-  bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
-    return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
-  }
-};
-
 // An arena pool that creates arenas backed by an mmaped file.
 class SwapSpace {
  public:
@@ -68,17 +43,27 @@
   }
 
  private:
-  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+  // Chunk of space.
+  struct SpaceChunk {
+    uint8_t* ptr;
+    size_t size;
 
-  int fd_;
-  size_t size_;
-  std::list<SpaceChunk> maps_;
+    uintptr_t Start() const {
+      return reinterpret_cast<uintptr_t>(ptr);
+    }
+    uintptr_t End() const {
+      return reinterpret_cast<uintptr_t>(ptr) + size;
+    }
+  };
 
-  // NOTE: Boost.Bimap would be useful for the two following members.
+  class SortChunkByPtr {
+   public:
+    bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
+      return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
+    }
+  };
 
-  // Map start of a free chunk to its size.
   typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet;
-  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
 
   // Map size to an iterator to free_by_start_'s entry.
   typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry;
@@ -92,6 +77,21 @@
     }
   };
   typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet;
+
+  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+
+  void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_);
+  void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_);
+
+  int fd_;
+  size_t size_;
+  std::list<SpaceChunk> maps_;
+
+  // NOTE: Boost.Bimap would be useful for the two following members.
+
+  // Map start of a free chunk to its size.
+  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
+  // Free chunks ordered by size.
   FreeBySizeSet free_by_size_ GUARDED_BY(lock_);
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -126,6 +126,9 @@
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
@@ -201,9 +204,22 @@
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
+inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return lhs.swap_space_ == rhs.swap_space_;
+}
+
+template <typename T>
+inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T>
 using SwapVector = std::vector<T, SwapAllocator<T>>;
 template <typename T, typename Comparator>
 using SwapSet = std::set<T, Comparator, SwapAllocator<T>>;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 9eb5e67..db07267 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1213,6 +1213,7 @@
 
 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
   EmitOperandSizeOverride();
   EmitOptionalRex32(address);
   EmitComplex(7, address, imm);
@@ -1221,6 +1222,7 @@
 
 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
   EmitOptionalRex32(reg);
   EmitComplex(7, Operand(reg), imm);
 }
@@ -1252,6 +1254,7 @@
 
 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
   EmitOptionalRex32(address);
   EmitComplex(7, address, imm);
 }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 6fae8e4..32a237a 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -63,6 +63,7 @@
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
 #include "interpreter/unstarted_runtime.h"
+#include "jit/offline_profiling_info.h"
 #include "leb128.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -70,6 +71,7 @@
 #include "mirror/object_array-inl.h"
 #include "oat_writer.h"
 #include "os.h"
+#include "profile_assistant.h"
 #include "runtime.h"
 #include "runtime_options.h"
 #include "ScopedLocalRef.h"
@@ -193,7 +195,7 @@
   UsageError("      corresponding to the file descriptor specified by --zip-fd.");
   UsageError("      Example: --zip-location=/system/app/Calculator.apk");
   UsageError("");
-  UsageError("  --oat-file=<file.oat>: specifies the oat output destination via a filename.");
+  UsageError("  --oat-file=<file.oat>: specifies an oat output destination via a filename.");
   UsageError("      Example: --oat-file=/system/framework/boot.oat");
   UsageError("");
   UsageError("  --oat-fd=<number>: specifies the oat output destination via a file descriptor.");
@@ -203,10 +205,10 @@
   UsageError("      to the file descriptor specified by --oat-fd.");
   UsageError("      Example: --oat-location=/data/dalvik-cache/system@app@Calculator.apk.oat");
   UsageError("");
-  UsageError("  --oat-symbols=<file.oat>: specifies the oat output destination with full symbols.");
+  UsageError("  --oat-symbols=<file.oat>: specifies an oat output destination with full symbols.");
   UsageError("      Example: --oat-symbols=/symbols/system/framework/boot.oat");
   UsageError("");
-  UsageError("  --image=<file.art>: specifies the output image filename.");
+  UsageError("  --image=<file.art>: specifies an output image filename.");
   UsageError("      Example: --image=/system/framework/boot.art");
   UsageError("");
   UsageError("  --image-format=(uncompressed|lz4):");
@@ -315,16 +317,29 @@
   UsageError("      stripped using standard command line tools such as strip or objcopy.");
   UsageError("      (enabled by default in debug builds, disabled by default otherwise)");
   UsageError("");
-  UsageError("  --debuggable: Produce debuggable code. Implies --generate-debug-info.");
-  UsageError("");
   UsageError("  --no-generate-debug-info: Do not generate debug information for native debugging.");
   UsageError("");
+  UsageError("  --debuggable: Produce code debuggable with Java debugger. Implies -g.");
+  UsageError("");
+  UsageError("  --native-debuggable: Produce code debuggable with native debugger (like LLDB).");
+  UsageError("      Implies --debuggable.");
+  UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
   UsageError("      Example: --runtime-arg -Xms256m");
   UsageError("");
   UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
+  UsageError("      Can be specified multiple time, in which case the data from the different");
+  UsageError("      profiles will be aggregated.");
+  UsageError("");
+  UsageError("  --reference-profile-file=<filename>: specify a reference profile file to use when");
+  UsageError("      compiling. The data in this file will be compared with the data in the");
+  UsageError("      associated --profile-file and the compilation will proceed only if there is");
+  UsageError("      a significant difference (--reference-profile-file is paired with");
+  UsageError("      --profile-file in the natural order). If the compilation was attempted then");
+  UsageError("      --profile-file will be merged into --reference-profile-file. Valid only when");
+  UsageError("      specified together with --profile-file.");
   UsageError("");
   UsageError("  --print-pass-names: print a list of pass names");
   UsageError("");
@@ -352,6 +367,9 @@
   UsageError("  --app-image-file=<file-name>: specify a file name for app image.");
   UsageError("      Example: --app-image-file=/data/dalvik-cache/system@app@Calculator.apk.art");
   UsageError("");
+  UsageError("  --multi-image: specify that separate oat and image files be generated for each "
+             "input dex file.");
+  UsageError("");
   std::cerr << "See log for usage error information\n";
   exit(EXIT_FAILURE);
 }
@@ -533,7 +551,9 @@
       for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files_) {
         dex_file.release();
       }
-      oat_file_.release();
+      for (std::unique_ptr<File>& oat_file : oat_files_) {
+        oat_file.release();
+      }
       runtime_.release();
       verification_results_.release();
       key_value_store_.release();
@@ -541,7 +561,7 @@
   }
 
   struct ParserOptions {
-    std::string oat_symbols;
+    std::vector<const char*> oat_symbols;
     std::string boot_image_filename;
     bool watch_dog_enabled = true;
     bool requested_specific_compiler = false;
@@ -641,8 +661,8 @@
     }
   }
 
-  void ProcessOptions(ParserOptions* parser_options) {
-    boot_image_ = !image_filename_.empty();
+  void ProcessOptions(ParserOptions* parser_options, bool multi_image) {
+    boot_image_ = !image_filenames_.empty();
     app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
 
     if (IsAppImage() && IsBootImage()) {
@@ -654,11 +674,11 @@
       compiler_options_->debuggable_ = true;
     }
 
-    if (oat_filename_.empty() && oat_fd_ == -1) {
+    if (oat_filenames_.empty() && oat_fd_ == -1) {
       Usage("Output must be supplied with either --oat-file or --oat-fd");
     }
 
-    if (!oat_filename_.empty() && oat_fd_ != -1) {
+    if (!oat_filenames_.empty() && oat_fd_ != -1) {
       Usage("--oat-file should not be used with --oat-fd");
     }
 
@@ -670,10 +690,19 @@
       Usage("--oat-symbols should not be used with --host");
     }
 
-    if (oat_fd_ != -1 && !image_filename_.empty()) {
+    if (oat_fd_ != -1 && !image_filenames_.empty()) {
       Usage("--oat-fd should not be used with --image");
     }
 
+    if (!parser_options->oat_symbols.empty() &&
+        parser_options->oat_symbols.size() != oat_filenames_.size()) {
+      Usage("--oat-file arguments do not match --oat-symbols arguments");
+    }
+
+    if (!image_filenames_.empty() && image_filenames_.size() != oat_filenames_.size()) {
+      Usage("--oat-file arguments do not match --image arguments");
+    }
+
     if (android_root_.empty()) {
       const char* android_root_env_var = getenv("ANDROID_ROOT");
       if (android_root_env_var == nullptr) {
@@ -734,6 +763,12 @@
       Usage("--dex-location arguments do not match --dex-file arguments");
     }
 
+    if (!dex_filenames_.empty() && !oat_filenames_.empty()) {
+      if (oat_filenames_.size() != 1 && oat_filenames_.size() != dex_filenames_.size()) {
+        Usage("--oat-file arguments must be singular or match --dex-file arguments");
+      }
+    }
+
     if (zip_fd_ != -1 && zip_location_.empty()) {
       Usage("--zip-location should be supplied with --zip-fd");
     }
@@ -744,11 +779,15 @@
       }
     }
 
-    oat_stripped_ = oat_filename_;
+    if (!profile_files_.empty()) {
+      if (!reference_profile_files_.empty() &&
+          (reference_profile_files_.size() != profile_files_.size())) {
+        Usage("If specified, --reference-profile-file should match the number of --profile-file.");
+      }
+    }
+
     if (!parser_options->oat_symbols.empty()) {
-      oat_unstripped_ = parser_options->oat_symbols;
-    } else {
-      oat_unstripped_ = oat_filename_;
+      oat_unstripped_ = std::move(parser_options->oat_symbols);
     }
 
     // If no instruction set feature was given, use the default one for the target
@@ -813,6 +852,89 @@
 
     compiler_options_->verbose_methods_ = verbose_methods_.empty() ? nullptr : &verbose_methods_;
 
+    if (!IsBootImage() && multi_image) {
+      Usage("--multi-image can only be used when creating boot images");
+    }
+    if (IsBootImage() && multi_image && image_filenames_.size() > 1) {
+      Usage("--multi-image cannot be used with multiple image names");
+    }
+
+    // For now, if we're on the host and compile the boot image, *always* use multiple image files.
+    if (!kIsTargetBuild && IsBootImage()) {
+      if (image_filenames_.size() == 1) {
+        multi_image = true;
+      }
+    }
+
+    if (IsBootImage() && multi_image) {
+      // Expand the oat and image filenames.
+      std::string base_oat = oat_filenames_[0];
+      size_t last_oat_slash = base_oat.rfind('/');
+      if (last_oat_slash == std::string::npos) {
+        Usage("--multi-image used with unusable oat filename %s", base_oat.c_str());
+      }
+      // We also need to honor path components that were encoded through '@'. Otherwise the loading
+      // code won't be able to find the images.
+      if (base_oat.find('@', last_oat_slash) != std::string::npos) {
+        last_oat_slash = base_oat.rfind('@');
+      }
+      base_oat = base_oat.substr(0, last_oat_slash + 1);
+
+      std::string base_img = image_filenames_[0];
+      size_t last_img_slash = base_img.rfind('/');
+      if (last_img_slash == std::string::npos) {
+        Usage("--multi-image used with unusable image filename %s", base_img.c_str());
+      }
+      // We also need to honor path components that were encoded through '@'. Otherwise the loading
+      // code won't be able to find the images.
+      if (base_img.find('@', last_img_slash) != std::string::npos) {
+        last_img_slash = base_img.rfind('@');
+      }
+
+      // Get the prefix, which is the primary image name (without path components). Strip the
+      // extension.
+      std::string prefix = base_img.substr(last_img_slash + 1);
+      if (prefix.rfind('.') != std::string::npos) {
+        prefix = prefix.substr(0, prefix.rfind('.'));
+      }
+      if (!prefix.empty()) {
+        prefix = prefix + "-";
+      }
+
+      base_img = base_img.substr(0, last_img_slash + 1);
+
+      // Note: we have some special case here for our testing. We have to inject the differentiating
+      //       parts for the different core images.
+      std::string infix;  // Empty infix by default.
+      {
+        // Check the first name.
+        std::string dex_file = oat_filenames_[0];
+        size_t last_dex_slash = dex_file.rfind('/');
+        if (last_dex_slash != std::string::npos) {
+          dex_file = dex_file.substr(last_dex_slash + 1);
+        }
+        size_t last_dex_dot = dex_file.rfind('.');
+        if (last_dex_dot != std::string::npos) {
+          dex_file = dex_file.substr(0, last_dex_dot);
+        }
+        if (StartsWith(dex_file, "core-")) {
+          infix = dex_file.substr(strlen("core"));
+        }
+      }
+
+      // Now create the other names. Use a counted loop to skip the first one.
+      for (size_t i = 1; i < dex_locations_.size(); ++i) {
+        // TODO: Make everything properly std::string.
+        std::string image_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".art");
+        char_backing_storage_.push_back(base_img + image_name);
+        image_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+
+        std::string oat_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".oat");
+        char_backing_storage_.push_back(base_oat + oat_name);
+        oat_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+      }
+    }
+
     // Done with usage checks, enable watchdog if requested
     if (parser_options->watch_dog_enabled) {
       watchdog_.reset(new WatchDog(true));
@@ -822,6 +944,37 @@
     key_value_store_.reset(new SafeMap<std::string, std::string>());
   }
 
+  // Modify the input string in the following way:
+  //   0) Assume input is /a/b/c.d
+  //   1) Strip the path  -> c.d
+  //   2) Inject prefix p -> pc.d
+  //   3) Inject infix i  -> pci.d
+  //   4) Replace suffix with s if it's "jar"  -> d == "jar" -> pci.s
+  static std::string CreateMultiImageName(std::string in,
+                                          const std::string& prefix,
+                                          const std::string& infix,
+                                          const char* replace_suffix) {
+    size_t last_dex_slash = in.rfind('/');
+    if (last_dex_slash != std::string::npos) {
+      in = in.substr(last_dex_slash + 1);
+    }
+    if (!prefix.empty()) {
+      in = prefix + in;
+    }
+    if (!infix.empty()) {
+      // Inject infix.
+      size_t last_dot = in.rfind('.');
+      if (last_dot != std::string::npos) {
+        in.insert(last_dot, infix);
+      }
+    }
+    if (EndsWith(in, ".jar")) {
+      in = in.substr(0, in.length() - strlen(".jar")) +
+          (replace_suffix != nullptr ? replace_suffix : "");
+    }
+    return in;
+  }
+
   void InsertCompileOptions(int argc, char** argv) {
     std::ostringstream oss;
     for (int i = 0; i < argc; ++i) {
@@ -862,6 +1015,8 @@
     std::unique_ptr<ParserOptions> parser_options(new ParserOptions());
     compiler_options_.reset(new CompilerOptions());
 
+    bool multi_image = false;
+
     for (int i = 0; i < argc; i++) {
       const StringPiece option(argv[i]);
       const bool log_options = false;
@@ -877,9 +1032,9 @@
       } else if (option.starts_with("--zip-location=")) {
         zip_location_ = option.substr(strlen("--zip-location=")).data();
       } else if (option.starts_with("--oat-file=")) {
-        oat_filename_ = option.substr(strlen("--oat-file=")).data();
+        oat_filenames_.push_back(option.substr(strlen("--oat-file=")).data());
       } else if (option.starts_with("--oat-symbols=")) {
-        parser_options->oat_symbols = option.substr(strlen("--oat-symbols=")).data();
+        parser_options->oat_symbols.push_back(option.substr(strlen("--oat-symbols=")).data());
       } else if (option.starts_with("--oat-fd=")) {
         ParseOatFd(option);
       } else if (option == "--watch-dog") {
@@ -891,7 +1046,7 @@
       } else if (option.starts_with("--oat-location=")) {
         oat_location_ = option.substr(strlen("--oat-location=")).data();
       } else if (option.starts_with("--image=")) {
-        image_filename_ = option.substr(strlen("--image=")).data();
+        image_filenames_.push_back(option.substr(strlen("--image=")).data());
       } else if (option.starts_with("--image-classes=")) {
         image_classes_filename_ = option.substr(strlen("--image-classes=")).data();
       } else if (option.starts_with("--image-classes-zip=")) {
@@ -921,8 +1076,10 @@
       } else if (option.starts_with("--compiler-backend=")) {
         ParseCompilerBackend(option, parser_options.get());
       } else if (option.starts_with("--profile-file=")) {
-        profile_file_ = option.substr(strlen("--profile-file=")).data();
-        VLOG(compiler) << "dex2oat: profile file is " << profile_file_;
+        profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
+      } else if (option.starts_with("--reference-profile-file=")) {
+        reference_profile_files_.push_back(
+            option.substr(strlen("--reference-profile-file=")).ToString());
       } else if (option == "--no-profile-file") {
         // No profile
       } else if (option == "--host") {
@@ -958,41 +1115,56 @@
         //       conditional on having verbost methods.
         gLogVerbosity.compiler = false;
         Split(option.substr(strlen("--verbose-methods=")).ToString(), ',', &verbose_methods_);
+      } else if (option == "--multi-image") {
+        multi_image = true;
+      } else if (option.starts_with("--no-inline-from=")) {
+        no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
     }
 
-    ProcessOptions(parser_options.get());
+    ProcessOptions(parser_options.get(), multi_image);
 
     // Insert some compiler things.
     InsertCompileOptions(argc, argv);
   }
 
-  // Check whether the oat output file is writable, and open it for later. Also open a swap file,
-  // if a name is given.
+  // Check whether the oat output files are writable, and open them for later. Also open a swap
+  // file, if a name is given.
   bool OpenFile() {
-    bool create_file = !oat_unstripped_.empty();  // as opposed to using open file descriptor
+    bool create_file = oat_fd_ == -1;  // as opposed to using open file descriptor
     if (create_file) {
-      oat_file_.reset(OS::CreateEmptyFile(oat_unstripped_.c_str()));
-      if (oat_location_.empty()) {
-        oat_location_ = oat_filename_;
+      for (const char* oat_filename : oat_filenames_) {
+        std::unique_ptr<File> oat_file(OS::CreateEmptyFile(oat_filename));
+        if (oat_file.get() == nullptr) {
+          PLOG(ERROR) << "Failed to create oat file: " << oat_filename;
+          return false;
+        }
+        if (create_file && fchmod(oat_file->Fd(), 0644) != 0) {
+          PLOG(ERROR) << "Failed to make oat file world readable: " << oat_filename;
+          oat_file->Erase();
+          return false;
+        }
+        oat_files_.push_back(std::move(oat_file));
       }
     } else {
-      oat_file_.reset(new File(oat_fd_, oat_location_, true));
-      oat_file_->DisableAutoClose();
-      if (oat_file_->SetLength(0) != 0) {
+      std::unique_ptr<File> oat_file(new File(oat_fd_, oat_location_, true));
+      oat_file->DisableAutoClose();
+      if (oat_file->SetLength(0) != 0) {
         PLOG(WARNING) << "Truncating oat file " << oat_location_ << " failed.";
       }
-    }
-    if (oat_file_.get() == nullptr) {
-      PLOG(ERROR) << "Failed to create oat file: " << oat_location_;
-      return false;
-    }
-    if (create_file && fchmod(oat_file_->Fd(), 0644) != 0) {
-      PLOG(ERROR) << "Failed to make oat file world readable: " << oat_location_;
-      oat_file_->Erase();
-      return false;
+      if (oat_file.get() == nullptr) {
+        PLOG(ERROR) << "Failed to create oat file: " << oat_location_;
+        return false;
+      }
+      if (create_file && fchmod(oat_file->Fd(), 0644) != 0) {
+        PLOG(ERROR) << "Failed to make oat file world readable: " << oat_location_;
+        oat_file->Erase();
+        return false;
+      }
+      oat_filenames_.push_back(oat_location_.c_str());
+      oat_files_.push_back(std::move(oat_file));
     }
 
     // Swap file handling.
@@ -1017,10 +1189,12 @@
     return true;
   }
 
-  void EraseOatFile() {
-    DCHECK(oat_file_.get() != nullptr);
-    oat_file_->Erase();
-    oat_file_.reset();
+  void EraseOatFiles() {
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      DCHECK(oat_files_[i].get() != nullptr);
+      oat_files_[i]->Erase();
+      oat_files_[i].reset();
+    }
   }
 
   void Shutdown() {
@@ -1155,9 +1329,40 @@
       }
     }
 
+    // Organize inputs, handling multi-dex and multiple oat file outputs.
+    CreateDexOatMappings();
+
     return true;
   }
 
+  void CreateDexOatMappings() {
+    if (oat_files_.size() > 1) {
+      // TODO: This needs to change, as it is not a stable mapping. If a dex file is missing,
+      //       the images will be out of whack. b/26317072
+      size_t index = 0;
+      for (size_t i = 0; i < oat_files_.size(); ++i) {
+        std::vector<const DexFile*> dex_files;
+        if (index < dex_files_.size()) {
+          dex_files.push_back(dex_files_[index]);
+          dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
+          index++;
+          while (index < dex_files_.size() &&
+              (dex_files_[index]->GetBaseLocation() == dex_files_[index - 1]->GetBaseLocation())) {
+            dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
+            dex_files.push_back(dex_files_[index]);
+            index++;
+          }
+        }
+        dex_files_per_oat_file_.push_back(std::move(dex_files));
+      }
+    } else {
+      dex_files_per_oat_file_.push_back(dex_files_);
+      for (const DexFile* dex_file : dex_files_) {
+        dex_file_oat_filename_map_.emplace(dex_file, oat_filenames_[0]);
+      }
+    }
+  }
+
   // Create and invoke the compiler driver. This will compile all the dex files.
   void Compile() {
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
@@ -1188,6 +1393,96 @@
       class_loader = class_linker->CreatePathClassLoader(self, dex_files_, class_path_class_loader);
     }
 
+    // Find the dex file we should not inline from.
+
+    // For now, on the host always have core-oj removed.
+    if (!kIsTargetBuild && no_inline_from_string_.empty()) {
+      no_inline_from_string_ = "core-oj";
+    }
+
+    if (!no_inline_from_string_.empty()) {
+      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+      std::vector<const std::vector<const DexFile*>*> dex_file_vectors = {
+          &class_linker->GetBootClassPath(),
+          &class_path_files,
+          &dex_files_
+      };
+      for (const std::vector<const DexFile*>* dex_file_vector : dex_file_vectors) {
+        if (dex_file_vector == nullptr) {
+          continue;
+        }
+
+        bool found = false;
+
+        for (const DexFile* dex_file : *dex_file_vector) {
+          // Try the complete location first.
+          found = no_inline_from_string_ == dex_file->GetLocation();
+          // The try just the name.
+          if (!found) {
+            size_t last_slash = dex_file->GetLocation().rfind('/');
+            if (last_slash != std::string::npos) {
+              found = StartsWith(dex_file->GetLocation().substr(last_slash + 1),
+                                 no_inline_from_string_.c_str());
+            }
+          }
+
+          if (found) {
+            VLOG(compiler) << "Disabling inlining from " << dex_file->GetLocation();
+            compiler_options_->no_inline_from_ = dex_file;
+            break;
+          }
+        }
+
+        if (found) {
+          break;
+        }
+      }
+    }
+
+    if (IsBootImage() && image_filenames_.size() > 1) {
+      // If we're compiling the boot image, store the boot classpath into the Key-Value store. If
+      // the image filename was adapted (e.g., for our tests), we need to change this here, too, but
+      // need to strip all path components (they will be re-established when loading).
+      // We need this for the multi-image case.
+      std::ostringstream bootcp_oss;
+      bool first_bootcp = true;
+      for (size_t i = 0; i < dex_locations_.size(); ++i) {
+        if (!first_bootcp) {
+          bootcp_oss << ":";
+        }
+
+        std::string dex_loc = dex_locations_[i];
+        std::string image_filename = image_filenames_[i];
+
+        // Use the dex_loc path, but the image_filename name (without path elements).
+        size_t dex_last_slash = dex_loc.rfind('/');
+
+        // npos is max(size_t). That makes this a bit ugly.
+        size_t image_last_slash = image_filename.rfind('/');
+        size_t image_last_at = image_filename.rfind('@');
+        size_t image_last_sep = (image_last_slash == std::string::npos)
+                                    ? image_last_at
+                                    : (image_last_at == std::string::npos)
+                                          ? std::string::npos
+                                          : std::max(image_last_slash, image_last_at);
+        // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
+
+        if (dex_last_slash == std::string::npos) {
+          dex_loc = image_filename.substr(image_last_sep + 1);
+        } else {
+          dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
+              image_filename.substr(image_last_sep + 1);
+        }
+
+        // Image filenames already end with .art, no need to replace.
+
+        bootcp_oss << dex_loc;
+        first_bootcp = false;
+      }
+      key_value_store_->Put(OatHeader::kBootClassPath, bootcp_oss.str());
+    }
+
     driver_.reset(new CompilerDriver(compiler_options_.get(),
                                      verification_results_.get(),
                                      &method_inliner_map_,
@@ -1205,12 +1500,13 @@
                                      dump_cfg_append_,
                                      compiler_phases_timings_.get(),
                                      swap_fd_,
-                                     profile_file_));
-
+                                     &dex_file_oat_filename_map_,
+                                     profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
     driver_->CompileAll(class_loader, dex_files_, timings_);
   }
 
+  // TODO: Update comments about how this works for multi image. b/26317072
   // Notes on the interleaving of creating the image and oat file to
   // ensure the references between the two are correct.
   //
@@ -1272,17 +1568,16 @@
   // Steps 1.-3. are done by the CreateOatFile() above, steps 4.-5.
   // are done by the CreateImageFile() below.
 
-
   // Write out the generated code part. Calls the OatWriter and ElfBuilder. Also prepares the
   // ImageWriter, if necessary.
   // Note: Flushing (and closing) the file is the caller's responsibility, except for the failure
   //       case (when the file will be explicitly erased).
-  bool CreateOatFile() {
+  bool CreateOatFiles() {
     CHECK(key_value_store_.get() != nullptr);
 
     TimingLogger::ScopedTiming t("dex2oat Oat", timings_);
 
-    std::unique_ptr<OatWriter> oat_writer;
+    std::vector<std::unique_ptr<OatWriter>> oat_writers;
     {
       TimingLogger::ScopedTiming t2("dex2oat OatWriter", timings_);
       std::string image_file_location;
@@ -1291,10 +1586,13 @@
       int32_t image_patch_delta = 0;
 
       if (app_image_ && image_base_ == 0) {
-        gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-        image_base_ = RoundUp(
-            reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatFileEnd()),
-            kPageSize);
+        std::vector<gc::space::ImageSpace*> image_spaces =
+            Runtime::Current()->GetHeap()->GetBootImageSpaces();
+        for (gc::space::ImageSpace* image_space : image_spaces) {
+          image_base_ = std::max(image_base_, RoundUp(
+              reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatFileEnd()),
+              kPageSize));
+        }
         VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
       }
 
@@ -1304,27 +1602,36 @@
 
       if (!IsBootImage()) {
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
-        gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-        image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
+        std::vector<gc::space::ImageSpace*> image_spaces =
+            Runtime::Current()->GetHeap()->GetBootImageSpaces();
+        image_file_location_oat_checksum = image_spaces[0]->GetImageHeader().GetOatChecksum();
         image_file_location_oat_data_begin =
-            reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatDataBegin());
-        image_file_location = image_space->GetImageFilename();
-        image_patch_delta = image_space->GetImageHeader().GetPatchDelta();
+            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
+        image_patch_delta = image_spaces[0]->GetImageHeader().GetPatchDelta();
+        std::vector<std::string> image_filenames;
+        for (const gc::space::ImageSpace* image_space : image_spaces) {
+          image_filenames.push_back(image_space->GetImageFilename());
+        }
+        image_file_location = Join(image_filenames, ':');
       }
 
       if (!image_file_location.empty()) {
         key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
       }
 
-      oat_writer.reset(new OatWriter(dex_files_,
-                                     image_file_location_oat_checksum,
-                                     image_file_location_oat_data_begin,
-                                     image_patch_delta,
-                                     driver_.get(),
-                                     image_writer_.get(),
-                                     IsBootImage(),
-                                     timings_,
-                                     key_value_store_.get()));
+      for (size_t i = 0; i < oat_files_.size(); ++i) {
+        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
+        std::unique_ptr<OatWriter> oat_writer(new OatWriter(dex_files,
+                                                            image_file_location_oat_checksum,
+                                                            image_file_location_oat_data_begin,
+                                                            image_patch_delta,
+                                                            driver_.get(),
+                                                            image_writer_.get(),
+                                                            IsBootImage(),
+                                                            timings_,
+                                                            key_value_store_.get()));
+        oat_writers.push_back(std::move(oat_writer));
+      }
     }
 
     if (IsImage()) {
@@ -1339,37 +1646,56 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      std::unique_ptr<ElfWriter> elf_writer =
-          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file_.get());
+      for (size_t i = 0; i < oat_files_.size(); ++i) {
+        std::unique_ptr<File>& oat_file = oat_files_[i];
+        std::unique_ptr<OatWriter>& oat_writer = oat_writers[i];
+        std::unique_ptr<ElfWriter> elf_writer =
+            CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get());
 
-      elf_writer->Start();
+        elf_writer->Start();
 
-      OutputStream* rodata = elf_writer->StartRoData();
-      if (!oat_writer->WriteRodata(rodata)) {
-        LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file_->GetPath();
-        return false;
-      }
-      elf_writer->EndRoData(rodata);
+        OutputStream* rodata = elf_writer->StartRoData();
+        if (!oat_writer->WriteRodata(rodata)) {
+          LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+        elf_writer->EndRoData(rodata);
 
-      OutputStream* text = elf_writer->StartText();
-      if (!oat_writer->WriteCode(text)) {
-        LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file_->GetPath();
-        return false;
-      }
-      elf_writer->EndText(text);
+        OutputStream* text = elf_writer->StartText();
+        if (!oat_writer->WriteCode(text)) {
+          LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+        elf_writer->EndText(text);
 
-      elf_writer->SetBssSize(oat_writer->GetBssSize());
-      elf_writer->WriteDynamicSection();
-      elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-      elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
+        elf_writer->SetBssSize(oat_writer->GetBssSize());
+        elf_writer->WriteDynamicSection();
+        elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
+        elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
 
-      if (!elf_writer->End()) {
-        LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
-        return false;
+        if (!elf_writer->End()) {
+          LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
+          return false;
+        }
+
+        // Flush the oat file.
+        if (oat_files_[i] != nullptr) {
+          if (oat_files_[i]->Flush() != 0) {
+            PLOG(ERROR) << "Failed to flush oat file: " << oat_filenames_[i];
+            oat_files_[i]->Erase();
+            return false;
+          }
+        }
+
+        if (IsImage()) {
+          // Update oat estimates.
+          UpdateImageWriter(i);
+        }
+
+        VLOG(compiler) << "Oat file written successfully: " << oat_filenames_[i];
       }
     }
 
-    VLOG(compiler) << "Oat file written successfully (unstripped): " << oat_location_;
     return true;
   }
 
@@ -1380,70 +1706,80 @@
       if (!CreateImageFile()) {
         return false;
       }
-      VLOG(compiler) << "Image written successfully: " << image_filename_;
+      VLOG(compiler) << "Images written successfully";
     }
     return true;
   }
 
-  // Create a copy from unstripped to stripped.
-  bool CopyUnstrippedToStripped() {
-    // If we don't want to strip in place, copy from unstripped location to stripped location.
-    // We need to strip after image creation because FixupElf needs to use .strtab.
-    if (oat_unstripped_ != oat_stripped_) {
-      // If the oat file is still open, flush it.
-      if (oat_file_.get() != nullptr && oat_file_->IsOpened()) {
-        if (!FlushCloseOatFile()) {
+  // Create a copy from stripped to unstripped.
+  bool CopyStrippedToUnstripped() {
+    for (size_t i = 0; i < oat_unstripped_.size(); ++i) {
+      // If we don't want to strip in place, copy from stripped location to unstripped location.
+      // We need to strip after image creation because FixupElf needs to use .strtab.
+      if (strcmp(oat_unstripped_[i], oat_filenames_[i]) != 0) {
+        // If the oat file is still open, flush it.
+        if (oat_files_[i].get() != nullptr && oat_files_[i]->IsOpened()) {
+          if (!FlushCloseOatFile(i)) {
+            return false;
+          }
+        }
+
+        TimingLogger::ScopedTiming t("dex2oat OatFile copy", timings_);
+        std::unique_ptr<File> in(OS::OpenFileForReading(oat_filenames_[i]));
+        std::unique_ptr<File> out(OS::CreateEmptyFile(oat_unstripped_[i]));
+        size_t buffer_size = 8192;
+        std::unique_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
+        while (true) {
+          int bytes_read = TEMP_FAILURE_RETRY(read(in->Fd(), buffer.get(), buffer_size));
+          if (bytes_read <= 0) {
+            break;
+          }
+          bool write_ok = out->WriteFully(buffer.get(), bytes_read);
+          CHECK(write_ok);
+        }
+        if (out->FlushCloseOrErase() != 0) {
+          PLOG(ERROR) << "Failed to flush and close copied oat file: " << oat_unstripped_[i];
+          return false;
+        }
+        VLOG(compiler) << "Oat file copied successfully (unstripped): " << oat_unstripped_[i];
+      }
+    }
+    return true;
+  }
+
+  bool FlushOatFiles() {
+    TimingLogger::ScopedTiming t2("dex2oat Flush ELF", timings_);
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      if (oat_files_[i].get() != nullptr) {
+        if (oat_files_[i]->Flush() != 0) {
+          PLOG(ERROR) << "Failed to flush oat file: " << oat_filenames_[i];
+          oat_files_[i]->Erase();
           return false;
         }
       }
-
-      TimingLogger::ScopedTiming t("dex2oat OatFile copy", timings_);
-      std::unique_ptr<File> in(OS::OpenFileForReading(oat_unstripped_.c_str()));
-      std::unique_ptr<File> out(OS::CreateEmptyFile(oat_stripped_.c_str()));
-      size_t buffer_size = 8192;
-      std::unique_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
-      while (true) {
-        int bytes_read = TEMP_FAILURE_RETRY(read(in->Fd(), buffer.get(), buffer_size));
-        if (bytes_read <= 0) {
-          break;
-        }
-        bool write_ok = out->WriteFully(buffer.get(), bytes_read);
-        CHECK(write_ok);
-      }
-      if (out->FlushCloseOrErase() != 0) {
-        PLOG(ERROR) << "Failed to flush and close copied oat file: " << oat_stripped_;
-        return false;
-      }
-      VLOG(compiler) << "Oat file copied successfully (stripped): " << oat_stripped_;
     }
     return true;
   }
 
-  bool FlushOatFile() {
-    if (oat_file_.get() != nullptr) {
-      TimingLogger::ScopedTiming t2("dex2oat Flush ELF", timings_);
-      if (oat_file_->Flush() != 0) {
-        PLOG(ERROR) << "Failed to flush oat file: " << oat_location_ << " / "
-            << oat_filename_;
-        oat_file_->Erase();
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool FlushCloseOatFile() {
-    if (oat_file_.get() != nullptr) {
-      std::unique_ptr<File> tmp(oat_file_.release());
+  bool FlushCloseOatFile(size_t i) {
+    if (oat_files_[i].get() != nullptr) {
+      std::unique_ptr<File> tmp(oat_files_[i].release());
       if (tmp->FlushCloseOrErase() != 0) {
-        PLOG(ERROR) << "Failed to flush and close oat file: " << oat_location_ << " / "
-            << oat_filename_;
+        PLOG(ERROR) << "Failed to flush and close oat file: " << oat_filenames_[i];
         return false;
       }
     }
     return true;
   }
 
+  bool FlushCloseOatFiles() {
+    bool result = true;
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      result &= FlushCloseOatFile(i);
+    }
+    return result;
+  }
+
   void DumpTiming() {
     if (dump_timing_ || (dump_slow_timing_ && timings_->GetTotalNs() > MsToNs(1000))) {
       LOG(INFO) << Dumpable<TimingLogger>(*timings_);
@@ -1473,6 +1809,26 @@
     return is_host_;
   }
 
+  bool UseProfileGuidedCompilation() const {
+    return !profile_files_.empty();
+  }
+
+  bool ProcessProfiles() {
+    DCHECK(UseProfileGuidedCompilation());
+    ProfileCompilationInfo* info = nullptr;
+    if (ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_files_, &info)) {
+      profile_compilation_info_.reset(info);
+      return true;
+    }
+    return false;
+  }
+
+  bool ShouldCompileBasedOnProfiles() const {
+    DCHECK(UseProfileGuidedCompilation());
+    // If we are given profiles, compile only if we have new information.
+    return profile_compilation_info_ != nullptr;
+  }
+
  private:
   template <typename T>
   static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
@@ -1704,42 +2060,52 @@
                                         image_base,
                                         compiler_options_->GetCompilePic(),
                                         IsAppImage(),
-                                        image_storage_mode_));
+                                        image_storage_mode_,
+                                        oat_filenames_,
+                                        dex_file_oat_filename_map_));
   }
 
-  // Let the ImageWriter write the image file. If we do not compile PIC, also fix up the oat file.
+  // Let the ImageWriter write the image files. If we do not compile PIC, also fix up the oat files.
   bool CreateImageFile()
       REQUIRES(!Locks::mutator_lock_) {
     CHECK(image_writer_ != nullptr);
-    if (!image_writer_->Write(app_image_fd_,
-                              IsBootImage() ? image_filename_ : app_image_file_name_,
-                              oat_unstripped_,
-                              oat_location_)) {
-      LOG(ERROR) << "Failed to create image file " << image_filename_;
+    if (!IsBootImage()) {
+      image_filenames_.push_back(app_image_file_name_.c_str());
+    }
+    if (!image_writer_->Write(app_image_fd_, image_filenames_, oat_filenames_)) {
+      LOG(ERROR) << "Failure during image file creation";
       return false;
     }
-    uintptr_t oat_data_begin = image_writer_->GetOatDataBegin();
 
+    // We need the OatDataBegin entries.
+    std::map<const char*, uintptr_t> oat_data_begins;
+    for (const char* oat_filename : oat_filenames_) {
+      oat_data_begins.emplace(oat_filename, image_writer_->GetOatDataBegin(oat_filename));
+    }
     // Destroy ImageWriter before doing FixupElf.
     image_writer_.reset();
 
-    // Do not fix up the ELF file if we are --compile-pic or compiing the app image
-    if (!compiler_options_->GetCompilePic() && IsBootImage()) {
-      std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_unstripped_.c_str()));
-      if (oat_file.get() == nullptr) {
-        PLOG(ERROR) << "Failed to open ELF file: " << oat_unstripped_;
-        return false;
-      }
+    for (const char* oat_filename : oat_filenames_) {
+      // Do not fix up the ELF file if we are --compile-pic or compiling the app image
+      if (!compiler_options_->GetCompilePic() && IsBootImage()) {
+        std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
+        if (oat_file.get() == nullptr) {
+          PLOG(ERROR) << "Failed to open ELF file: " << oat_filename;
+          return false;
+        }
 
-      if (!ElfWriter::Fixup(oat_file.get(), oat_data_begin)) {
-        oat_file->Erase();
-        LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath();
-        return false;
-      }
+        uintptr_t oat_data_begin = oat_data_begins.find(oat_filename)->second;
 
-      if (oat_file->FlushCloseOrErase()) {
-        PLOG(ERROR) << "Failed to flush and close fixed ELF file " << oat_file->GetPath();
-        return false;
+        if (!ElfWriter::Fixup(oat_file.get(), oat_data_begin)) {
+          oat_file->Erase();
+          LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath();
+          return false;
+        }
+
+        if (oat_file->FlushCloseOrErase()) {
+          PLOG(ERROR) << "Failed to flush and close fixed ELF file " << oat_file->GetPath();
+          return false;
+        }
       }
     }
 
@@ -1842,6 +2208,33 @@
                   "");
   }
 
+  std::string StripIsaFrom(const char* image_filename, InstructionSet isa) {
+    std::string res(image_filename);
+    size_t last_slash = res.rfind('/');
+    if (last_slash == std::string::npos || last_slash == 0) {
+      return res;
+    }
+    size_t penultimate_slash = res.rfind('/', last_slash - 1);
+    if (penultimate_slash == std::string::npos) {
+      return res;
+    }
+    // Check that the string in-between is the expected one.
+    if (res.substr(penultimate_slash + 1, last_slash - penultimate_slash - 1) !=
+            GetInstructionSetString(isa)) {
+      LOG(WARNING) << "Unexpected string when trying to strip isa: " << res;
+      return res;
+    }
+    return res.substr(0, penultimate_slash) + res.substr(last_slash);
+  }
+
+  // Update the estimate for the oat file with the given index.
+  void UpdateImageWriter(size_t index) {
+    DCHECK(image_writer_ != nullptr);
+    DCHECK_LT(index, oat_filenames_.size());
+
+    image_writer_->UpdateOatFile(oat_filenames_[index]);
+  }
+
   std::unique_ptr<CompilerOptions> compiler_options_;
   Compiler::Kind compiler_kind_;
 
@@ -1863,11 +2256,10 @@
   size_t thread_count_;
   uint64_t start_ns_;
   std::unique_ptr<WatchDog> watchdog_;
-  std::unique_ptr<File> oat_file_;
-  std::string oat_stripped_;
-  std::string oat_unstripped_;
+  std::vector<std::unique_ptr<File>> oat_files_;
   std::string oat_location_;
-  std::string oat_filename_;
+  std::vector<const char*> oat_filenames_;
+  std::vector<const char*> oat_unstripped_;
   int oat_fd_;
   std::vector<const char*> dex_filenames_;
   std::vector<const char*> dex_locations_;
@@ -1875,7 +2267,7 @@
   std::string zip_location_;
   std::string boot_image_filename_;
   std::vector<const char*> runtime_args_;
-  std::string image_filename_;
+  std::vector<const char*> image_filenames_;
   uintptr_t image_base_;
   const char* image_classes_zip_filename_;
   const char* image_classes_filename_;
@@ -1892,6 +2284,7 @@
   bool is_host_;
   std::string android_root_;
   std::vector<const DexFile*> dex_files_;
+  std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
@@ -1909,9 +2302,16 @@
   int swap_fd_;
   std::string app_image_file_name_;
   int app_image_fd_;
-  std::string profile_file_;  // Profile file to use
+  std::vector<std::string> profile_files_;
+  std::vector<std::string> reference_profile_files_;
+  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
+  std::vector<std::vector<const DexFile*>> dex_files_per_oat_file_;
+  std::unordered_map<const DexFile*, const char*> dex_file_oat_filename_map_;
+
+  // Backing storage.
+  std::vector<std::string> char_backing_storage_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
@@ -1939,19 +2339,18 @@
 static int CompileImage(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  // Create the boot.oat.
-  if (!dex2oat.CreateOatFile()) {
-    dex2oat.EraseOatFile();
+  if (!dex2oat.CreateOatFiles()) {
+    dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
 
-  // Flush and close the boot.oat. We always expect the output file by name, and it will be
-  // re-opened from the unstripped name.
-  if (!dex2oat.FlushCloseOatFile()) {
+  // Close the image oat files. We always expect the output file by name, and it will be
+  // re-opened from the unstripped name. Note: it's easier to *flush* and close...
+  if (!dex2oat.FlushCloseOatFiles()) {
     return EXIT_FAILURE;
   }
 
-  // Creates the boot.art and patches the boot.oat.
+  // Creates the boot.art and patches the oat files.
   if (!dex2oat.HandleImage()) {
     return EXIT_FAILURE;
   }
@@ -1962,13 +2361,13 @@
     return EXIT_SUCCESS;
   }
 
-  // Copy unstripped to stripped location, if necessary.
-  if (!dex2oat.CopyUnstrippedToStripped()) {
+  // Copy stripped to unstripped location, if necessary.
+  if (!dex2oat.CopyStrippedToUnstripped()) {
     return EXIT_FAILURE;
   }
 
-  // FlushClose again, as stripping might have re-opened the oat file.
-  if (!dex2oat.FlushCloseOatFile()) {
+  // FlushClose again, as stripping might have re-opened the oat files.
+  if (!dex2oat.FlushCloseOatFiles()) {
     return EXIT_FAILURE;
   }
 
@@ -1979,21 +2378,17 @@
 static int CompileApp(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  // Create the app oat.
-  if (!dex2oat.CreateOatFile()) {
-    dex2oat.EraseOatFile();
+  if (!dex2oat.CreateOatFiles()) {
+    dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
 
-  // Do not close the oat file here. We might haven gotten the output file by file descriptor,
+  // Do not close the oat files here. We might have gotten the output file by file descriptor,
   // which we would lose.
-  if (!dex2oat.FlushOatFile()) {
-    return EXIT_FAILURE;
-  }
 
   // When given --host, finish early without stripping.
   if (dex2oat.IsHost()) {
-    if (!dex2oat.FlushCloseOatFile()) {
+    if (!dex2oat.FlushCloseOatFiles()) {
       return EXIT_FAILURE;
     }
 
@@ -2001,14 +2396,14 @@
     return EXIT_SUCCESS;
   }
 
-  // Copy unstripped to stripped location, if necessary. This will implicitly flush & close the
-  // unstripped version. If this is given, we expect to be able to open writable files by name.
-  if (!dex2oat.CopyUnstrippedToStripped()) {
+  // Copy stripped to unstripped location, if necessary. This will implicitly flush & close the
+  // stripped versions. If this is given, we expect to be able to open writable files by name.
+  if (!dex2oat.CopyStrippedToUnstripped()) {
     return EXIT_FAILURE;
   }
 
-  // Flush and close the file.
-  if (!dex2oat.FlushCloseOatFile()) {
+  // Flush and close the files.
+  if (!dex2oat.FlushCloseOatFiles()) {
     return EXIT_FAILURE;
   }
 
@@ -2026,6 +2421,20 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   dex2oat.ParseArgs(argc, argv);
 
+  // Process profile information and assess if we need to do a profile guided compilation.
+  // This operation involves I/O.
+  if (dex2oat.UseProfileGuidedCompilation()) {
+    if (dex2oat.ProcessProfiles()) {
+      if (!dex2oat.ShouldCompileBasedOnProfiles()) {
+        LOG(INFO) << "Skipped compilation because of insignificant profile delta";
+        return EXIT_SUCCESS;
+      }
+    } else {
+      LOG(WARNING) << "Failed to process profile files";
+      return EXIT_FAILURE;
+    }
+  }
+
   // Check early that the result of compilation can be written
   if (!dex2oat.OpenFile()) {
     return EXIT_FAILURE;
@@ -2044,7 +2453,7 @@
   }
 
   if (!dex2oat.Setup()) {
-    dex2oat.EraseOatFile();
+    dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
 
diff --git a/dexdump/dexdump_test.cc b/dexdump/dexdump_test.cc
index 4230cb2..9819233 100644
--- a/dexdump/dexdump_test.cc
+++ b/dexdump/dexdump_test.cc
@@ -37,7 +37,7 @@
   virtual void SetUp() {
     CommonRuntimeTest::SetUp();
     // Dogfood our own lib core dex file.
-    dex_file_ = GetLibCoreDexFileName();
+    dex_file_ = GetLibCoreDexFileNames()[0];
   }
 
   // Runs test with given arguments.
diff --git a/dexlist/dexlist_test.cc b/dexlist/dexlist_test.cc
index 82179dea..9a65ba6 100644
--- a/dexlist/dexlist_test.cc
+++ b/dexlist/dexlist_test.cc
@@ -37,7 +37,7 @@
   virtual void SetUp() {
     CommonRuntimeTest::SetUp();
     // Dogfood our own lib core dex file.
-    dex_file_ = GetLibCoreDexFileName();
+    dex_file_ = GetLibCoreDexFileNames()[0];
   }
 
   // Runs test with given arguments.
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index cd64a4f..ee7b21c 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -150,7 +150,9 @@
   { kSpecial2Mask | 0x3f, (28 << kOpcodeShift) | 0x3f, "sdbbp", "" },  // TODO: code
 
   // SPECIAL3
+  { kSpecial3Mask | 0x3f, (31 << kOpcodeShift), "ext", "TSAZ", },
   { kSpecial3Mask | 0x3f, (31 << kOpcodeShift) | 3, "dext", "TSAZ", },
+  { kSpecial3Mask | 0x3f, (31 << kOpcodeShift) | 4, "ins", "TSAz", },
   { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
     (31 << kOpcodeShift) | (16 << 6) | 32,
     "seb",
@@ -421,7 +423,7 @@
       opcode = gMipsInstructions[i].name;
       for (const char* args_fmt = gMipsInstructions[i].args_fmt; *args_fmt; ++args_fmt) {
         switch (*args_fmt) {
-          case 'A':  // sa (shift amount or [d]ext position).
+          case 'A':  // sa (shift amount or [d]ins/[d]ext position).
             args << sa;
             break;
           case 'B':  // Branch offset.
@@ -519,7 +521,8 @@
           case 's': args << 'f' << rs; break;
           case 'T': args << 'r' << rt; break;
           case 't': args << 'f' << rt; break;
-          case 'Z': args << rd; break;   // sz ([d]ext size).
+          case 'Z': args << (rd + 1); break;  // sz ([d]ext size).
+          case 'z': args << (rd - sa + 1); break;  // sz ([d]ins size).
         }
         if (*(args_fmt + 1)) {
           args << ", ";
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 5e71053..93a0974 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -49,7 +49,7 @@
  public:
   explicit ImgDiagDumper(std::ostream* os,
                        const ImageHeader& image_header,
-                       const char* image_location,
+                       const std::string& image_location,
                        pid_t image_diff_pid)
       : os_(os),
         image_header_(image_header),
@@ -163,7 +163,7 @@
     std::string error_msg;
 
     // Walk the bytes and diff against our boot image
-    const ImageHeader& boot_image_header = GetBootImageHeader();
+    const ImageHeader& boot_image_header = image_header_;
 
     os << "\nObserving boot image header at address "
        << reinterpret_cast<const void*>(&boot_image_header)
@@ -812,14 +812,6 @@
     return page_frame_number != page_frame_number_clean;
   }
 
-  static const ImageHeader& GetBootImageHeader() {
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
-    CHECK(image_space != nullptr);
-    const ImageHeader& image_header = image_space->GetImageHeader();
-    return image_header;
-  }
-
  private:
   // Return the image location, stripped of any directories, e.g. "boot.art" or "core.art"
   std::string GetImageLocationBaseName() const {
@@ -828,28 +820,31 @@
 
   std::ostream* os_;
   const ImageHeader& image_header_;
-  const char* image_location_;
+  const std::string image_location_;
   pid_t image_diff_pid_;  // Dump image diff against boot.art if pid is non-negative
 
   DISALLOW_COPY_AND_ASSIGN(ImgDiagDumper);
 };
 
-static int DumpImage(Runtime* runtime, const char* image_location,
-                     std::ostream* os, pid_t image_diff_pid) {
+static int DumpImage(Runtime* runtime, std::ostream* os, pid_t image_diff_pid) {
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
-  gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
-  CHECK(image_space != nullptr);
-  const ImageHeader& image_header = image_space->GetImageHeader();
-  if (!image_header.IsValid()) {
-    fprintf(stderr, "Invalid image header %s\n", image_location);
-    return EXIT_FAILURE;
+  std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+  CHECK(!image_spaces.empty());
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    const ImageHeader& image_header = image_space->GetImageHeader();
+    if (!image_header.IsValid()) {
+      fprintf(stderr, "Invalid image header %s\n", image_space->GetImageLocation().c_str());
+      return EXIT_FAILURE;
+    }
+
+    ImgDiagDumper img_diag_dumper(
+        os, image_header, image_space->GetImageLocation(), image_diff_pid);
+    if (!img_diag_dumper.Dump()) {
+      return EXIT_FAILURE;
+    }
   }
-
-  ImgDiagDumper img_diag_dumper(os, image_header, image_location, image_diff_pid);
-
-  bool success = img_diag_dumper.Dump();
-  return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
+  return EXIT_SUCCESS;
 }
 
 struct ImgDiagArgs : public CmdlineArgs {
@@ -935,7 +930,6 @@
     CHECK(args_ != nullptr);
 
     return DumpImage(runtime,
-                     args_->boot_image_location_,
                      args_->os_,
                      args_->image_diff_pid_) == EXIT_SUCCESS;
   }
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index a926ca5..dc101e5 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -47,9 +47,10 @@
     CommonRuntimeTest::SetUp();
 
     // We loaded the runtime with an explicit image. Therefore the image space must exist.
-    gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-    ASSERT_TRUE(image_space != nullptr);
-    boot_image_location_ = image_space->GetImageLocation();
+    std::vector<gc::space::ImageSpace*> image_spaces =
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    ASSERT_TRUE(!image_spaces.empty());
+    boot_image_location_ = image_spaces[0]->GetImageLocation();
   }
 
   virtual void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index a3ef38d..5c75f20 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -74,14 +74,14 @@
 .PHONY: dump-oat-boot-$(TARGET_ARCH)
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-boot-$(TARGET_ARCH): $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) $(OATDUMP)
-	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION) \
+	$(OATDUMP) $(addprefix --image=,$(DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION)) \
 	  --output=$(ART_DUMP_OAT_PATH)/boot.$(TARGET_ARCH).oatdump.txt --instruction-set=$(TARGET_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.$(TARGET_ARCH).oatdump.txt
 endif
 
 ifdef TARGET_2ND_ARCH
 dump-oat-boot-$(TARGET_2ND_ARCH): $(2ND_DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) $(OATDUMP)
-	$(OATDUMP) --image=$(2ND_DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION) \
+	$(OATDUMP) $(addprefix --image=,$(2ND_DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION)) \
 	  --output=$(ART_DUMP_OAT_PATH)/boot.$(TARGET_2ND_ARCH).oatdump.txt --instruction-set=$(TARGET_2ND_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.$(TARGET_2ND_ARCH).oatdump.txt
 endif
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index bad928e..52c6524 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1492,6 +1492,8 @@
 
     os << "MAGIC: " << image_header_.GetMagic() << "\n\n";
 
+    os << "IMAGE LOCATION: " << image_space_.GetImageLocation() << "\n\n";
+
     os << "IMAGE BEGIN: " << reinterpret_cast<void*>(image_header_.GetImageBegin()) << "\n\n";
 
     os << "IMAGE SIZE: " << image_header_.GetImageSize() << "\n\n";
@@ -1599,9 +1601,8 @@
 
     os << "OBJECTS:\n" << std::flush;
 
-    // Loop through all the image spaces and dump their objects.
+    // Loop through the image space and dump its objects.
     gc::Heap* heap = runtime->GetHeap();
-    const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
     Thread* self = Thread::Current();
     {
       {
@@ -1629,21 +1630,16 @@
         }
       }
       ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      for (const auto& space : spaces) {
-        if (space->IsImageSpace()) {
-          auto* image_space = space->AsImageSpace();
-          // Dump the normal objects before ArtMethods.
-          image_space->GetLiveBitmap()->Walk(ImageDumper::Callback, this);
-          indent_os << "\n";
-          // TODO: Dump fields.
-          // Dump methods after.
-          const auto& methods_section = image_header_.GetMethodsSection();
-          const size_t pointer_size =
-              InstructionSetPointerSize(oat_dumper_->GetOatInstructionSet());
-          DumpArtMethodVisitor visitor(this);
-          methods_section.VisitPackedArtMethods(&visitor, image_space->Begin(), pointer_size);
-        }
-      }
+      // Dump the normal objects before ArtMethods.
+      image_space_.GetLiveBitmap()->Walk(ImageDumper::Callback, this);
+      indent_os << "\n";
+      // TODO: Dump fields.
+      // Dump methods after.
+      const auto& methods_section = image_header_.GetMethodsSection();
+      const size_t pointer_size =
+          InstructionSetPointerSize(oat_dumper_->GetOatInstructionSet());
+      DumpArtMethodVisitor visitor(this);
+      methods_section.VisitPackedArtMethods(&visitor, image_space_.Begin(), pointer_size);
       // Dump the large objects separately.
       heap->GetLargeObjectsSpace()->GetLiveBitmap()->Walk(ImageDumper::Callback, this);
       indent_os << "\n";
@@ -2163,6 +2159,9 @@
       size_t sum_of_expansion = 0;
       size_t sum_of_expansion_squared = 0;
       size_t n = method_outlier_size.size();
+      if (n == 0) {
+        return;
+      }
       for (size_t i = 0; i < n; i++) {
         size_t cur_size = method_outlier_size[i];
         sum_of_sizes += cur_size;
@@ -2377,26 +2376,28 @@
   DISALLOW_COPY_AND_ASSIGN(ImageDumper);
 };
 
-static int DumpImage(Runtime* runtime, const char* image_location, OatDumperOptions* options,
-                     std::ostream* os) {
+static int DumpImage(Runtime* runtime, OatDumperOptions* options, std::ostream* os) {
   // Dumping the image, no explicit class loader.
   ScopedNullHandle<mirror::ClassLoader> null_class_loader;
   options->class_loader_ = &null_class_loader;
 
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
-  gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
-  CHECK(image_space != nullptr);
-  const ImageHeader& image_header = image_space->GetImageHeader();
-  if (!image_header.IsValid()) {
-    fprintf(stderr, "Invalid image header %s\n", image_location);
-    return EXIT_FAILURE;
+  std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+  CHECK(!image_spaces.empty());
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    const ImageHeader& image_header = image_space->GetImageHeader();
+    if (!image_header.IsValid()) {
+      fprintf(stderr, "Invalid image header %s\n", image_space->GetImageLocation().c_str());
+      return EXIT_FAILURE;
+    }
+
+    ImageDumper image_dumper(os, *image_space, image_header, options);
+    if (!image_dumper.Dump()) {
+      return EXIT_FAILURE;
+    }
   }
-
-  ImageDumper image_dumper(os, *image_space, image_header, options);
-
-  bool success = image_dumper.Dump();
-  return (success) ? EXIT_SUCCESS : EXIT_FAILURE;
+  return EXIT_SUCCESS;
 }
 
 static int DumpOatWithRuntime(Runtime* runtime, OatFile* oat_file, OatDumperOptions* options,
@@ -2689,8 +2690,7 @@
                      args_->os_) == EXIT_SUCCESS;
     }
 
-    return DumpImage(runtime, args_->image_location_, oat_dumper_options_.get(), args_->os_)
-      == EXIT_SUCCESS;
+    return DumpImage(runtime, oat_dumper_options_.get(), args_->os_) == EXIT_SUCCESS;
   }
 
   std::unique_ptr<OatDumperOptions> oat_dumper_options_;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 46ab34b..d836532 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -118,103 +118,42 @@
   return true;
 }
 
-bool PatchOat::Patch(const std::string& image_location, off_t delta,
-                     File* output_image, InstructionSet isa,
-                     TimingLogger* timings) {
-  CHECK(Runtime::Current() == nullptr);
-  CHECK(output_image != nullptr);
-  CHECK_GE(output_image->Fd(), 0);
-  CHECK(!image_location.empty()) << "image file must have a filename.";
-  CHECK_NE(isa, kNone);
+static File* CreateOrOpen(const char* name, bool* created) {
+  if (OS::FileExists(name)) {
+    *created = false;
+    return OS::OpenFileReadWrite(name);
+  } else {
+    *created = true;
+    std::unique_ptr<File> f(OS::CreateEmptyFile(name));
+    if (f.get() != nullptr) {
+      if (fchmod(f->Fd(), 0644) != 0) {
+        PLOG(ERROR) << "Unable to make " << name << " world readable";
+        TEMP_FAILURE_RETRY(unlink(name));
+        return nullptr;
+      }
+    }
+    return f.release();
+  }
+}
 
-  TimingLogger::ScopedTiming t("Runtime Setup", timings);
-  const char *isa_name = GetInstructionSetString(isa);
-  std::string image_filename;
-  if (!LocationToFilename(image_location, isa, &image_filename)) {
-    LOG(ERROR) << "Unable to find image at location " << image_location;
+// Either try to close the file (close=true), or erase it.
+static bool FinishFile(File* file, bool close) {
+  if (close) {
+    if (file->FlushCloseOrErase() != 0) {
+      PLOG(ERROR) << "Failed to flush and close file.";
+      return false;
+    }
+    return true;
+  } else {
+    file->Erase();
     return false;
   }
-  std::unique_ptr<File> input_image(OS::OpenFileForReading(image_filename.c_str()));
-  if (input_image.get() == nullptr) {
-    LOG(ERROR) << "unable to open input image file at " << image_filename
-               << " for location " << image_location;
-    return false;
-  }
-
-  int64_t image_len = input_image->GetLength();
-  if (image_len < 0) {
-    LOG(ERROR) << "Error while getting image length";
-    return false;
-  }
-  ImageHeader image_header;
-  if (sizeof(image_header) != input_image->Read(reinterpret_cast<char*>(&image_header),
-                                                sizeof(image_header), 0)) {
-    LOG(ERROR) << "Unable to read image header from image file " << input_image->GetPath();
-    return false;
-  }
-
-  if (image_header.GetStorageMode() != ImageHeader::kStorageModeUncompressed) {
-    LOG(ERROR) << "Patchoat is not supported with compressed image files "
-               << input_image->GetPath();
-    return false;
-  }
-
-  /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
-  // Nothing special to do right now since the image always needs to get patched.
-  // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
-
-  // Set up the runtime
-  RuntimeOptions options;
-  NoopCompilerCallbacks callbacks;
-  options.push_back(std::make_pair("compilercallbacks", &callbacks));
-  std::string img = "-Ximage:" + image_location;
-  options.push_back(std::make_pair(img.c_str(), nullptr));
-  options.push_back(std::make_pair("imageinstructionset", reinterpret_cast<const void*>(isa_name)));
-  options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
-  if (!Runtime::Create(options, false)) {
-    LOG(ERROR) << "Unable to initialize runtime";
-    return false;
-  }
-  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
-  // give it away now and then switch to a more manageable ScopedObjectAccess.
-  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
-  ScopedObjectAccess soa(Thread::Current());
-
-  t.NewTiming("Image and oat Patching setup");
-  // Create the map where we will write the image patches to.
-  std::string error_msg;
-  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len,
-                                                PROT_READ | PROT_WRITE,
-                                                MAP_PRIVATE,
-                                                input_image->Fd(),
-                                                0,
-                                                /*low_4gb*/false,
-                                                input_image->GetPath().c_str(),
-                                                &error_msg));
-  if (image.get() == nullptr) {
-    LOG(ERROR) << "unable to map image file " << input_image->GetPath() << " : " << error_msg;
-    return false;
-  }
-  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetBootImageSpace();
-
-  PatchOat p(isa, image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(), delta, timings);
-  t.NewTiming("Patching files");
-  if (!p.PatchImage()) {
-    LOG(ERROR) << "Failed to patch image file " << input_image->GetPath();
-    return false;
-  }
-
-  t.NewTiming("Writing files");
-  if (!p.WriteImage(output_image)) {
-    return false;
-  }
-  return true;
 }
 
 bool PatchOat::Patch(File* input_oat, const std::string& image_location, off_t delta,
                      File* output_oat, File* output_image, InstructionSet isa,
                      TimingLogger* timings,
-                     bool output_oat_opened_from_fd,
+                     bool output_oat_opened_from_fd ATTRIBUTE_UNUSED,
                      bool new_oat_out) {
   CHECK(Runtime::Current() == nullptr);
   CHECK(output_image != nullptr);
@@ -236,31 +175,6 @@
     isa = GetInstructionSetFromELF(elf_hdr.e_machine, elf_hdr.e_flags);
   }
   const char* isa_name = GetInstructionSetString(isa);
-  std::string image_filename;
-  if (!LocationToFilename(image_location, isa, &image_filename)) {
-    LOG(ERROR) << "Unable to find image at location " << image_location;
-    return false;
-  }
-  std::unique_ptr<File> input_image(OS::OpenFileForReading(image_filename.c_str()));
-  if (input_image.get() == nullptr) {
-    LOG(ERROR) << "unable to open input image file at " << image_filename
-               << " for location " << image_location;
-    return false;
-  }
-  int64_t image_len = input_image->GetLength();
-  if (image_len < 0) {
-    LOG(ERROR) << "Error while getting image length";
-    return false;
-  }
-  ImageHeader image_header;
-  if (sizeof(image_header) != input_image->Read(reinterpret_cast<char*>(&image_header),
-                                              sizeof(image_header), 0)) {
-    LOG(ERROR) << "Unable to read image header from image file " << input_image->GetPath();
-  }
-
-  /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
-  // Nothing special to do right now since the image always needs to get patched.
-  // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
 
   // Set up the runtime
   RuntimeOptions options;
@@ -279,70 +193,169 @@
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   ScopedObjectAccess soa(Thread::Current());
 
+  std::string output_directory =
+      output_image->GetPath().substr(0, output_image->GetPath().find_last_of("/"));
   t.NewTiming("Image and oat Patching setup");
-  // Create the map where we will write the image patches to.
-  std::string error_msg;
-  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len,
-                                                PROT_READ | PROT_WRITE,
-                                                MAP_PRIVATE,
-                                                input_image->Fd(),
-                                                0,
-                                                /*low_4gb*/false,
-                                                input_image->GetPath().c_str(),
-                                                &error_msg));
-  if (image.get() == nullptr) {
-    LOG(ERROR) << "unable to map image file " << input_image->GetPath() << " : " << error_msg;
-    return false;
-  }
-  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetBootImageSpace();
+  std::vector<gc::space::ImageSpace*> spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  std::map<gc::space::ImageSpace*, std::unique_ptr<File>> space_to_file_map;
+  std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>> space_to_memmap_map;
+  std::map<gc::space::ImageSpace*, PatchOat> space_to_patchoat_map;
+  std::map<gc::space::ImageSpace*, bool> space_to_skip_patching_map;
 
-  std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat,
-                                             PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
-  if (elf.get() == nullptr) {
-    LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
-    return false;
-  }
-
-  bool skip_patching_oat = false;
-  MaybePic is_oat_pic = IsOatPic(elf.get());
-  if (is_oat_pic >= ERROR_FIRST) {
-    // Error logged by IsOatPic
-    return false;
-  } else if (is_oat_pic == PIC) {
-    // Do not need to do ELF-file patching. Create a symlink and skip the ELF patching.
-    if (!ReplaceOatFileWithSymlink(input_oat->GetPath(),
-                                   output_oat->GetPath(),
-                                   output_oat_opened_from_fd,
-                                   new_oat_out)) {
-      // Errors already logged by above call.
+  for (size_t i = 0; i < spaces.size(); ++i) {
+    gc::space::ImageSpace* space = spaces[i];
+    std::string input_image_filename = space->GetImageFilename();
+    std::unique_ptr<File> input_image(OS::OpenFileForReading(input_image_filename.c_str()));
+    if (input_image.get() == nullptr) {
+      LOG(ERROR) << "Unable to open input image file at " << input_image_filename;
       return false;
     }
-    // Don't patch the OAT, since we just symlinked it. Image still needs patching.
-    skip_patching_oat = true;
-  } else {
-    CHECK(is_oat_pic == NOT_PIC);
+
+    int64_t image_len = input_image->GetLength();
+    if (image_len < 0) {
+      LOG(ERROR) << "Error while getting image length";
+      return false;
+    }
+    ImageHeader image_header;
+    if (sizeof(image_header) != input_image->Read(reinterpret_cast<char*>(&image_header),
+                                                  sizeof(image_header), 0)) {
+      LOG(ERROR) << "Unable to read image header from image file " << input_image->GetPath();
+    }
+
+    /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
+    // Nothing special to do right now since the image always needs to get patched.
+    // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
+
+    // Create the map where we will write the image patches to.
+    std::string error_msg;
+    std::unique_ptr<MemMap> image(MemMap::MapFile(image_len,
+                                                  PROT_READ | PROT_WRITE,
+                                                  MAP_PRIVATE,
+                                                  input_image->Fd(),
+                                                  0,
+                                                  /*low_4gb*/false,
+                                                  input_image->GetPath().c_str(),
+                                                  &error_msg));
+    if (image.get() == nullptr) {
+      LOG(ERROR) << "Unable to map image file " << input_image->GetPath() << " : " << error_msg;
+      return false;
+    }
+    space_to_file_map.emplace(space, std::move(input_image));
+    space_to_memmap_map.emplace(space, std::move(image));
   }
 
-  PatchOat p(isa, elf.release(), image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
-             delta, timings);
-  t.NewTiming("Patching files");
-  if (!skip_patching_oat && !p.PatchElf()) {
-    LOG(ERROR) << "Failed to patch oat file " << input_oat->GetPath();
-    return false;
-  }
-  if (!p.PatchImage()) {
-    LOG(ERROR) << "Failed to patch image file " << input_image->GetPath();
-    return false;
+  for (size_t i = 0; i < spaces.size(); ++i) {
+    gc::space::ImageSpace* space = spaces[i];
+    std::string input_image_filename = space->GetImageFilename();
+    std::string input_oat_filename =
+        ImageHeader::GetOatLocationFromImageLocation(input_image_filename);
+    std::unique_ptr<File> input_oat_file(OS::OpenFileForReading(input_oat_filename.c_str()));
+    if (input_oat_file.get() == nullptr) {
+      LOG(ERROR) << "Unable to open input oat file at " << input_oat_filename;
+      return false;
+    }
+    std::string error_msg;
+    std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat_file.get(),
+                                               PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
+    if (elf.get() == nullptr) {
+      LOG(ERROR) << "Unable to open oat file " << input_oat_file->GetPath() << " : " << error_msg;
+      return false;
+    }
+
+    bool skip_patching_oat = false;
+    MaybePic is_oat_pic = IsOatPic(elf.get());
+    if (is_oat_pic >= ERROR_FIRST) {
+      // Error logged by IsOatPic
+      return false;
+    } else if (is_oat_pic == PIC) {
+      // Do not need to do ELF-file patching. Create a symlink and skip the ELF patching.
+
+      std::string converted_image_filename = space->GetImageLocation();
+      std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
+      std::string output_image_filename = output_directory +
+                                          (StartsWith(converted_image_filename, "/") ? "" : "/") +
+                                          converted_image_filename;
+      std::string output_oat_filename =
+          ImageHeader::GetOatLocationFromImageLocation(output_image_filename);
+
+      if (!ReplaceOatFileWithSymlink(input_oat_file->GetPath(),
+                                     output_oat_filename,
+                                     false,
+                                     true)) {
+        // Errors already logged by above call.
+        return false;
+      }
+      // Don't patch the OAT, since we just symlinked it. Image still needs patching.
+      skip_patching_oat = true;
+    } else {
+      CHECK(is_oat_pic == NOT_PIC);
+    }
+
+    PatchOat& p = space_to_patchoat_map.emplace(space,
+                                                PatchOat(
+                                                    isa,
+                                                    elf.release(),
+                                                    space_to_memmap_map.find(space)->second.get(),
+                                                    space->GetLiveBitmap(),
+                                                    space->GetMemMap(),
+                                                    delta,
+                                                    &space_to_memmap_map,
+                                                    timings)).first->second;
+
+    t.NewTiming("Patching files");
+    if (!skip_patching_oat && !p.PatchElf()) {
+      LOG(ERROR) << "Failed to patch oat file " << input_oat_file->GetPath();
+      return false;
+    }
+    if (!p.PatchImage(i == 0)) {
+      LOG(ERROR) << "Failed to patch image file " << input_image_filename;
+      return false;
+    }
+
+    space_to_skip_patching_map.emplace(space, skip_patching_oat);
   }
 
-  t.NewTiming("Writing files");
-  if (!skip_patching_oat && !p.WriteElf(output_oat)) {
-    LOG(ERROR) << "Failed to write oat file " << input_oat->GetPath();
-    return false;
-  }
-  if (!p.WriteImage(output_image)) {
-    LOG(ERROR) << "Failed to write image file " << input_image->GetPath();
-    return false;
+  for (size_t i = 0; i < spaces.size(); ++i) {
+    gc::space::ImageSpace* space = spaces[i];
+    std::string input_image_filename = space->GetImageFilename();
+
+    t.NewTiming("Writing files");
+    std::string converted_image_filename = space->GetImageLocation();
+    std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
+    std::string output_image_filename = output_directory +
+                                        (StartsWith(converted_image_filename, "/") ? "" : "/") +
+                                        converted_image_filename;
+    std::unique_ptr<File>
+        output_image_file(CreateOrOpen(output_image_filename.c_str(), &new_oat_out));
+    if (output_image_file.get() == nullptr) {
+      LOG(ERROR) << "Failed to open output image file at " << output_image_filename;
+      return false;
+    }
+
+    PatchOat& p = space_to_patchoat_map.find(space)->second;
+
+    if (!p.WriteImage(output_image_file.get())) {
+      LOG(ERROR) << "Failed to write image file " << output_image_file->GetPath();
+      return false;
+    }
+    FinishFile(output_image_file.get(), true);
+
+    bool skip_patching_oat = space_to_skip_patching_map.find(space)->second;
+    if (!skip_patching_oat) {
+      std::string output_oat_filename =
+          ImageHeader::GetOatLocationFromImageLocation(output_image_filename);
+      std::unique_ptr<File>
+          output_oat_file(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
+      if (output_oat_file.get() == nullptr) {
+        LOG(ERROR) << "Failed to open output oat file at " << output_oat_filename;
+        return false;
+      }
+      if (!p.WriteElf(output_oat_file.get())) {
+        LOG(ERROR) << "Failed to write oat file " << output_oat_file->GetPath();
+        return false;
+      }
+      FinishFile(output_oat_file.get(), true);
+    }
   }
   return true;
 }
@@ -527,7 +540,7 @@
   // Note that we require that ReadFromMemory does not make an internal copy of the elements.
   // This also relies on visit roots not doing any verification which could fail after we update
   // the roots to be the image addresses.
-  temp_table.ReadFromMemory(image_->Begin() + section.Offset());
+  temp_table.AddTableFromMemory(image_->Begin() + section.Offset());
   FixupRootVisitor visitor(this);
   temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
 }
@@ -616,7 +629,7 @@
   }
 }
 
-bool PatchOat::PatchImage() {
+bool PatchOat::PatchImage(bool primary_image) {
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   CHECK_GT(image_->Size(), sizeof(ImageHeader));
   // These are the roots from the original file.
@@ -630,9 +643,12 @@
   // Patch dex file int/long arrays which point to ArtFields.
   PatchDexFileArrays(img_roots);
 
-  VisitObject(img_roots);
+  if (primary_image) {
+    VisitObject(img_roots);
+  }
+
   if (!image_header->IsValid()) {
-    LOG(ERROR) << "reloction renders image header invalid";
+    LOG(ERROR) << "relocation renders image header invalid";
     return false;
   }
 
@@ -655,7 +671,6 @@
 void PatchOat::PatchVisitor::operator() (mirror::Object* obj, MemberOffset off,
                                          bool is_static_unused ATTRIBUTE_UNUSED) const {
   mirror::Object* referent = obj->GetFieldObject<mirror::Object, kVerifyNone>(off);
-  DCHECK(patcher_->InHeap(referent)) << "Referent is not in the heap.";
   mirror::Object* moved_object = patcher_->RelocatedAddressOfPointer(referent);
   copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(off, moved_object);
 }
@@ -691,7 +706,7 @@
     klass->FixupNativePointers(copy_klass, pointer_size, native_visitor);
     auto* vtable = klass->GetVTable();
     if (vtable != nullptr) {
-      vtable->Fixup(RelocatedCopyOf(vtable), pointer_size, native_visitor);
+      vtable->Fixup(RelocatedCopyOfFollowImages(vtable), pointer_size, native_visitor);
     }
     auto* iftable = klass->GetIfTable();
     if (iftable != nullptr) {
@@ -699,7 +714,9 @@
         if (iftable->GetMethodArrayCount(i) > 0) {
           auto* method_array = iftable->GetMethodArray(i);
           CHECK(method_array != nullptr);
-          method_array->Fixup(RelocatedCopyOf(method_array), pointer_size, native_visitor);
+          method_array->Fixup(RelocatedCopyOfFollowImages(method_array),
+                              pointer_size,
+                              native_visitor);
         }
       }
     }
@@ -972,38 +989,6 @@
   return true;
 }
 
-static File* CreateOrOpen(const char* name, bool* created) {
-  if (OS::FileExists(name)) {
-    *created = false;
-    return OS::OpenFileReadWrite(name);
-  } else {
-    *created = true;
-    std::unique_ptr<File> f(OS::CreateEmptyFile(name));
-    if (f.get() != nullptr) {
-      if (fchmod(f->Fd(), 0644) != 0) {
-        PLOG(ERROR) << "Unable to make " << name << " world readable";
-        TEMP_FAILURE_RETRY(unlink(name));
-        return nullptr;
-      }
-    }
-    return f.release();
-  }
-}
-
-// Either try to close the file (close=true), or erase it.
-static bool FinishFile(File* file, bool close) {
-  if (close) {
-    if (file->FlushCloseOrErase() != 0) {
-      PLOG(ERROR) << "Failed to flush and close file.";
-      return false;
-    }
-    return true;
-  } else {
-    file->Erase();
-    return false;
-  }
-}
-
 static int patchoat(int argc, char **argv) {
   InitLogging(argv);
   MemMap::Init();
@@ -1189,8 +1174,12 @@
   bool have_image_files = have_output_image;
   bool have_oat_files = have_output_oat;
 
-  if (!have_oat_files && !have_image_files) {
-    Usage("Must be patching either an oat or an image file or both.");
+  if (!have_oat_files) {
+    if (have_image_files) {
+      Usage("Cannot patch an image file without an oat file");
+    } else {
+      Usage("Must be patching either an oat file or an image file with an oat file.");
+    }
   }
 
   if (!have_oat_files && !isa_set) {
@@ -1425,10 +1414,6 @@
                           output_oat_fd >= 0,  // was it opened from FD?
                           new_oat_out);
     ret = FinishFile(output_oat.get(), ret);
-  } else if (have_image_files) {
-    TimingLogger::ScopedTiming pt("patch image", &timings);
-    ret = PatchOat::Patch(input_image_location, base_delta, output_image.get(), isa, &timings);
-    ret = FinishFile(output_image.get(), ret);
   } else {
     CHECK(false);
     ret = true;
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 38bd865..ceddc34 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -23,8 +23,10 @@
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "gc/accounting/space_bitmap.h"
+#include "gc/space/image_space.h"
 #include "gc/heap.h"
 #include "os.h"
+#include "runtime.h"
 
 namespace art {
 
@@ -57,21 +59,23 @@
                     bool output_oat_opened_from_fd,  // Was this using --oatput-oat-fd ?
                     bool new_oat_out);               // Output oat was a new file created by us?
 
+  ~PatchOat() {}
+  PatchOat(PatchOat&&) = default;
+
  private:
   // Takes ownership only of the ElfFile. All other pointers are only borrowed.
   PatchOat(ElfFile* oat_file, off_t delta, TimingLogger* timings)
       : oat_file_(oat_file), image_(nullptr), bitmap_(nullptr), heap_(nullptr), delta_(delta),
-        isa_(kNone), timings_(timings) {}
+        isa_(kNone), space_map_(nullptr), timings_(timings) {}
   PatchOat(InstructionSet isa, MemMap* image, gc::accounting::ContinuousSpaceBitmap* bitmap,
            MemMap* heap, off_t delta, TimingLogger* timings)
       : image_(image), bitmap_(bitmap), heap_(heap),
-        delta_(delta), isa_(isa), timings_(timings) {}
+        delta_(delta), isa_(isa), space_map_(nullptr), timings_(timings) {}
   PatchOat(InstructionSet isa, ElfFile* oat_file, MemMap* image,
            gc::accounting::ContinuousSpaceBitmap* bitmap, MemMap* heap, off_t delta,
-           TimingLogger* timings)
+           std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>>* map, TimingLogger* timings)
       : oat_file_(oat_file), image_(image), bitmap_(bitmap), heap_(heap),
-        delta_(delta), isa_(isa), timings_(timings) {}
-  ~PatchOat() {}
+        delta_(delta), isa_(isa), space_map_(map), timings_(timings) {}
 
   // Was the .art image at image_path made with --compile-pic ?
   static bool IsImagePic(const ImageHeader& image_header, const std::string& image_path);
@@ -111,7 +115,7 @@
   template <typename ElfFileImpl>
   bool PatchOatHeader(ElfFileImpl* oat_file);
 
-  bool PatchImage() SHARED_REQUIRES(Locks::mutator_lock_);
+  bool PatchImage(bool primary_image) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
@@ -138,6 +142,24 @@
   }
 
   template <typename T>
+  T* RelocatedCopyOfFollowImages(T* obj) const {
+    if (obj == nullptr) {
+      return nullptr;
+    }
+    // Find ImageSpace this belongs to.
+    auto image_spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      if (image_space->Contains(obj)) {
+        uintptr_t heap_off = reinterpret_cast<uintptr_t>(obj) -
+                             reinterpret_cast<uintptr_t>(image_space->GetMemMap()->Begin());
+        return reinterpret_cast<T*>(space_map_->find(image_space)->second->Begin() + heap_off);
+      }
+    }
+    LOG(FATAL) << "Did not find object in boot image space " << obj;
+    UNREACHABLE();
+  }
+
+  template <typename T>
   T* RelocatedAddressOfPointer(T* obj) const {
     if (obj == nullptr) {
       return obj;
@@ -197,6 +219,8 @@
   // Active instruction set, used to know the entrypoint size.
   const InstructionSet isa_;
 
+  const std::map<gc::space::ImageSpace*, std::unique_ptr<MemMap>>* space_map_;
+
   TimingLogger* timings_;
 
   friend class FixupRootVisitor;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 4096117..04645d1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -102,11 +102,13 @@
   jdwp/jdwp_socket.cc \
   jdwp/object_registry.cc \
   jni_env_ext.cc \
+  jit/debugger_interface.cc \
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
   jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
+  jit/profile_saver.cc  \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
   lambda/closure.cc \
@@ -248,6 +250,8 @@
   thread_android.cc
 
 LIBART_TARGET_SRC_FILES_arm := \
+  interpreter/mterp/mterp.cc \
+  interpreter/mterp/out/mterp_arm.S \
   arch/arm/context_arm.cc.arm \
   arch/arm/entrypoints_init_arm.cc \
   arch/arm/instruction_set_features_assembly_tests.S \
@@ -259,6 +263,7 @@
   arch/arm/fault_handler_arm.cc
 
 LIBART_TARGET_SRC_FILES_arm64 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/arm64/context_arm64.cc \
   arch/arm64/entrypoints_init_arm64.cc \
   arch/arm64/jni_entrypoints_arm64.S \
@@ -269,6 +274,7 @@
   arch/arm64/fault_handler_arm64.cc
 
 LIBART_SRC_FILES_x86 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/x86/context_x86.cc \
   arch/x86/entrypoints_init_x86.cc \
   arch/x86/jni_entrypoints_x86.S \
@@ -283,6 +289,7 @@
 # Note that the fault_handler_x86.cc is not a mistake.  This file is
 # shared between the x86 and x86_64 architectures.
 LIBART_SRC_FILES_x86_64 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/x86_64/context_x86_64.cc \
   arch/x86_64/entrypoints_init_x86_64.cc \
   arch/x86_64/jni_entrypoints_x86_64.S \
@@ -296,6 +303,7 @@
   $(LIBART_SRC_FILES_x86_64) \
 
 LIBART_TARGET_SRC_FILES_mips := \
+  interpreter/mterp/mterp_stub.cc \
   arch/mips/context_mips.cc \
   arch/mips/entrypoints_init_mips.cc \
   arch/mips/jni_entrypoints_mips.S \
@@ -305,6 +313,7 @@
   arch/mips/fault_handler_mips.cc
 
 LIBART_TARGET_SRC_FILES_mips64 := \
+  interpreter/mterp/mterp_stub.cc \
   arch/mips64/context_mips64.cc \
   arch/mips64/entrypoints_init_mips64.cc \
   arch/mips64/jni_entrypoints_mips64.S \
@@ -362,17 +371,13 @@
   thread_state.h \
   verifier/method_verifier.h
 
+LIBOPENJDKJVM_SRC_FILES := openjdkjvm/OpenjdkJvm.cc
+
 LIBART_CFLAGS := -DBUILDING_LIBART=1
 
 LIBART_TARGET_CFLAGS :=
 LIBART_HOST_CFLAGS :=
 
-ifeq ($(MALLOC_IMPL),dlmalloc)
-  LIBART_TARGET_CFLAGS += -DUSE_DLMALLOC
-else
-  LIBART_TARGET_CFLAGS += -DUSE_JEMALLOC
-endif
-
 # Default dex2oat instruction set features.
 LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES := default
 LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
@@ -398,8 +403,9 @@
 
 # $(1): target or host
 # $(2): ndebug or debug
-# $(3): static or shared (empty means shared, applies only for host)
-define build-libart
+# $(3): static or shared (note that static only applies for host)
+# $(4): module name : either libart or libopenjdkjvm
+define build-runtime-library
   ifneq ($(1),target)
     ifneq ($(1),host)
       $$(error expected target or host for argument 1, received $(1))
@@ -410,6 +416,11 @@
       $$(error expected ndebug or debug for argument 2, received $(2))
     endif
   endif
+  ifneq ($(4),libart)
+    ifneq ($(4),libopenjdkjvm)
+      $$(error expected libart of libopenjdkjvm for argument 4, received $(4))
+    endif
+  endif
 
   art_target_or_host := $(1)
   art_ndebug_or_debug := $(2)
@@ -418,12 +429,12 @@
   include $$(CLEAR_VARS)
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
-    LOCAL_MODULE := libart
+    LOCAL_MODULE := $(4)
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
     endif
   else # debug
-    LOCAL_MODULE := libartd
+    LOCAL_MODULE := $(4)d
   endif
 
   LOCAL_MODULE_TAGS := optional
@@ -434,17 +445,25 @@
     LOCAL_MODULE_CLASS := SHARED_LIBRARIES
   endif
 
-  ifeq ($$(art_target_or_host),target)
-    LOCAL_SRC_FILES := $$(LIBART_TARGET_SRC_FILES)
-    $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
-      $$(eval LOCAL_SRC_FILES_$$(arch) := $$$$(LIBART_TARGET_SRC_FILES_$$(arch))))
-  else # host
-    LOCAL_SRC_FILES := $$(LIBART_HOST_SRC_FILES)
-    LOCAL_SRC_FILES_32 := $$(LIBART_HOST_SRC_FILES_32)
-    LOCAL_SRC_FILES_64 := $$(LIBART_HOST_SRC_FILES_64)
-    LOCAL_IS_HOST_MODULE := true
+  ifeq ($(4),libart)
+    ifeq ($$(art_target_or_host),target)
+      LOCAL_SRC_FILES := $$(LIBART_TARGET_SRC_FILES)
+      $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
+        $$(eval LOCAL_SRC_FILES_$$(arch) := $$$$(LIBART_TARGET_SRC_FILES_$$(arch))))
+    else # host
+      LOCAL_SRC_FILES := $$(LIBART_HOST_SRC_FILES)
+      LOCAL_SRC_FILES_32 := $$(LIBART_HOST_SRC_FILES_32)
+      LOCAL_SRC_FILES_64 := $$(LIBART_HOST_SRC_FILES_64)
+      LOCAL_IS_HOST_MODULE := true
+    endif
+  else # libopenjdkjvm
+    LOCAL_SRC_FILES := $$(LIBOPENJDKJVM_SRC_FILES)
+    ifeq ($$(art_target_or_host),host)
+      LOCAL_IS_HOST_MODULE := true
+    endif
   endif
 
+ifeq ($(4),libart)
   GENERATED_SRC_DIR := $$(call local-generated-sources-dir)
   ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,$$(LIBART_ENUM_OPERATOR_OUT_HEADER_FILES))
   ENUM_OPERATOR_OUT_GEN := $$(addprefix $$(GENERATED_SRC_DIR)/,$$(ENUM_OPERATOR_OUT_CC_FILES))
@@ -455,6 +474,7 @@
 	$$(transform-generated-source)
 
   LOCAL_GENERATED_SOURCES += $$(ENUM_OPERATOR_OUT_GEN)
+endif
 
   LOCAL_CFLAGS := $$(LIBART_CFLAGS)
   LOCAL_LDFLAGS := $$(LIBART_LDFLAGS)
@@ -536,6 +556,15 @@
       LOCAL_SHARED_LIBRARIES += libcutils
     endif
   endif
+
+  ifeq ($(4),libopenjdkjvm)
+    ifeq ($$(art_ndebug_or_debug),ndebug)
+      LOCAL_SHARED_LIBRARIES += libart
+    else
+      LOCAL_SHARED_LIBRARIES += libartd
+    endif
+    LOCAL_NOTICE_FILE := $(LOCAL_PATH)/openjdkjvm/NOTICE
+  endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $$(LOCAL_PATH)/Android.mk
 
@@ -572,24 +601,30 @@
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since
 # they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-libart,host,ndebug))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libart))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libopenjdkjvm))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-libart,host,ndebug,static))
+    $(eval $(call build-runtime-library,host,ndebug,static,libart))
+    $(eval $(call build-runtime-library,host,ndebug,static,libopenjdkjvm))
   endif
 endif
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-libart,host,debug))
+  $(eval $(call build-runtime-library,host,debug,shared,libart))
+  $(eval $(call build-runtime-library,host,debug,shared,libopenjdkjvm))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-libart,host,debug,static))
+    $(eval $(call build-runtime-library,host,debug,static,libart))
+    $(eval $(call build-runtime-library,host,debug,static,libopenjdkjvm))
   endif
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-#  $(error $(call build-libart,target,ndebug))
-  $(eval $(call build-libart,target,ndebug))
+#  $(error $(call build-runtime-library,target,ndebug))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libart))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libopenjdkjvm))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-libart,target,debug))
+  $(eval $(call build-runtime-library,target,debug,shared,libart))
+  $(eval $(call build-runtime-library,target,debug,shared,libopenjdkjvm))
 endif
 
 # Clear locally defined variables.
@@ -620,4 +655,4 @@
 LIBART_CFLAGS :=
 LIBART_TARGET_CFLAGS :=
 LIBART_HOST_CFLAGS :=
-build-libart :=
+build-runtime-library :=
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index effa1c5..6f36016 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -24,7 +24,6 @@
 #include "debugger.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
-#include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
 #include "interpreter/interpreter.h"
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 2bc6c79..31610a3 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -133,8 +133,20 @@
 #define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_current_ibase.
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
+            art::Thread::MterpCurrentIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_default_ibase.
+#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 4 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_DEFAULT_IBASE_OFFSET,
+            art::Thread::MterpDefaultIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.mterp_alt_ibase.
+#define THREAD_ALT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 5 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_ALT_IBASE_OFFSET,
+            art::Thread::MterpAltIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.rosalloc_runs.
-#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 6 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
             art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
@@ -146,6 +158,40 @@
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
             art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value())
 
+// Offsets within ShadowFrame.
+#define SHADOWFRAME_LINK_OFFSET 0
+ADD_TEST_EQ(SHADOWFRAME_LINK_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::LinkOffset()))
+#define SHADOWFRAME_METHOD_OFFSET (SHADOWFRAME_LINK_OFFSET + 1 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_METHOD_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::MethodOffset()))
+#define SHADOWFRAME_RESULT_REGISTER_OFFSET (SHADOWFRAME_LINK_OFFSET + 2 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_RESULT_REGISTER_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::ResultRegisterOffset()))
+#define SHADOWFRAME_DEX_PC_PTR_OFFSET (SHADOWFRAME_LINK_OFFSET + 3 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_DEX_PC_PTR_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::DexPCPtrOffset()))
+#define SHADOWFRAME_CODE_ITEM_OFFSET (SHADOWFRAME_LINK_OFFSET + 4 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_CODE_ITEM_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::CodeItemOffset()))
+#define SHADOWFRAME_LOCK_COUNT_DATA_OFFSET (SHADOWFRAME_LINK_OFFSET + 5 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_LOCK_COUNT_DATA_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::LockCountDataOffset()))
+#define SHADOWFRAME_NUMBER_OF_VREGS_OFFSET (SHADOWFRAME_LINK_OFFSET + 6 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::NumberOfVRegsOffset()))
+#define SHADOWFRAME_DEX_PC_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 4)
+ADD_TEST_EQ(SHADOWFRAME_DEX_PC_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::DexPCOffset()))
+#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
+ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
+
+// Offsets within CodeItem
+#define CODEITEM_INSNS_OFFSET 16
+ADD_TEST_EQ(CODEITEM_INSNS_OFFSET,
+            static_cast<int32_t>(OFFSETOF_MEMBER(art::DexFile::CodeItem, insns_)))
+
 // Offsets within java.lang.Object.
 #define MIRROR_OBJECT_CLASS_OFFSET 0
 ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
@@ -160,16 +206,16 @@
 ADD_TEST_EQ(size_t(MIRROR_OBJECT_HEADER_SIZE), sizeof(art::mirror::Object))
 
 // Offsets within java.lang.Class.
-#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
-#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (36 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_STATUS_OFFSET (108 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
@@ -188,6 +234,26 @@
 ADD_TEST_EQ(MIRROR_CHAR_ARRAY_DATA_OFFSET,
             art::mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value())
 
+#define MIRROR_BOOLEAN_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_BOOLEAN_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(uint8_t)).Int32Value())
+
+#define MIRROR_BYTE_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_BYTE_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int8_t)).Int32Value())
+
+#define MIRROR_SHORT_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_SHORT_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int16_t)).Int32Value())
+
+#define MIRROR_INT_ARRAY_DATA_OFFSET MIRROR_CHAR_ARRAY_DATA_OFFSET
+ADD_TEST_EQ(MIRROR_INT_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(int32_t)).Int32Value())
+
+#define MIRROR_WIDE_ARRAY_DATA_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_WIDE_ARRAY_DATA_OFFSET,
+            art::mirror::Array::DataOffset(sizeof(uint64_t)).Int32Value())
+
 #define MIRROR_OBJECT_ARRAY_DATA_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_OBJECT_ARRAY_DATA_OFFSET,
     art::mirror::Array::DataOffset(
@@ -299,6 +365,12 @@
 // Assert this so that we can avoid zeroing the next field by installing the class pointer.
 ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
 
+#define THREAD_SUSPEND_REQUEST 1
+ADD_TEST_EQ(THREAD_SUSPEND_REQUEST, static_cast<int32_t>(art::kSuspendRequest))
+
+#define THREAD_CHECKPOINT_REQUEST 2
+ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 87de506..0faa3c6 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -199,6 +199,11 @@
     return this->load(std::memory_order_relaxed);
   }
 
+  // Load from memory with acquire ordering.
+  T LoadAcquire() const {
+    return this->load(std::memory_order_acquire);
+  }
+
   // Word tearing allowed, but may race.
   // TODO: Optimize?
   // There has been some discussion of eventually disallowing word
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 70bd398..82a5f96 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -855,6 +855,18 @@
       PLOG(FATAL) << "futex wait failed for " << name_;
     }
   }
+  if (self != nullptr) {
+    JNIEnvExt* const env = self->GetJniEnv();
+    if (UNLIKELY(env != nullptr && env->runtime_deleted)) {
+      CHECK(self->IsDaemon());
+      // If the runtime has been deleted, then we cannot proceed. Just sleep forever. This may
+      // occur for user daemon threads that get a spurious wakeup. This occurs for test 132 with
+      // --host and --gdb.
+      // After we wake up, the runtime may have been shutdown, which means that this condition may
+      // have been deleted. It is not safe to retry the wait.
+      SleepForever();
+    }
+  }
   guard_.ExclusiveLock(self);
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
diff --git a/runtime/base/time_utils.cc b/runtime/base/time_utils.cc
index 48b0a09..b7cf207 100644
--- a/runtime/base/time_utils.cc
+++ b/runtime/base/time_utils.cc
@@ -174,8 +174,6 @@
 }
 
 void InitTimeSpec(bool absolute, int clock, int64_t ms, int32_t ns, timespec* ts) {
-  int64_t endSec;
-
   if (absolute) {
 #if !defined(__APPLE__)
     clock_gettime(clock, ts);
@@ -190,13 +188,13 @@
     ts->tv_sec = 0;
     ts->tv_nsec = 0;
   }
-  endSec = ts->tv_sec + ms / 1000;
-  if (UNLIKELY(endSec >= 0x7fffffff)) {
-    std::ostringstream ss;
-    LOG(INFO) << "Note: end time exceeds epoch: " << ss.str();
-    endSec = 0x7ffffffe;
+
+  int64_t end_sec = ts->tv_sec + ms / 1000;
+  if (UNLIKELY(end_sec >= 0x7fffffff)) {
+    LOG(INFO) << "Note: end time exceeds INT32_MAX: " << end_sec;
+    end_sec = 0x7ffffffe;
   }
-  ts->tv_sec = endSec;
+  ts->tv_sec = end_sec;
   ts->tv_nsec = (ts->tv_nsec + (ms % 1000) * 1000000) + ns;
 
   // Catch rollover.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d998d99..41842e8 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -330,7 +330,7 @@
   Runtime* const runtime = Runtime::Current();
   gc::Heap* const heap = runtime->GetHeap();
 
-  CHECK(!heap->HasImageSpace()) << "Runtime has image. We should use it.";
+  CHECK(!heap->HasBootImageSpace()) << "Runtime has image. We should use it.";
   CHECK(!init_done_);
 
   // Use the pointer size from the runtime since we are probably creating the image.
@@ -736,7 +736,7 @@
 
 static void SanityCheckArtMethod(ArtMethod* m,
                                  mirror::Class* expected_class,
-                                 gc::space::ImageSpace* space)
+                                 const std::vector<gc::space::ImageSpace*>& spaces)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (m->IsRuntimeMethod()) {
     CHECK(m->GetDeclaringClass() == nullptr) << PrettyMethod(m);
@@ -745,18 +745,22 @@
   } else if (expected_class != nullptr) {
     CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << PrettyMethod(m);
   }
-  if (space != nullptr) {
-    auto& header = space->GetImageHeader();
-    auto& methods = header.GetMethodsSection();
-    auto offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
-    CHECK(methods.Contains(offset)) << m << " not in " << methods;
+  if (!spaces.empty()) {
+    bool contains = false;
+    for (gc::space::ImageSpace* space : spaces) {
+      auto& header = space->GetImageHeader();
+      auto& methods = header.GetMethodsSection();
+      auto offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
+      contains |= methods.Contains(offset);
+    }
+    CHECK(contains) << m << " not found";
   }
 }
 
 static void SanityCheckArtMethodPointerArray(mirror::PointerArray* arr,
                                              mirror::Class* expected_class,
                                              size_t pointer_size,
-                                             gc::space::ImageSpace* space)
+                                             const std::vector<gc::space::ImageSpace*>& spaces)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(arr != nullptr);
   for (int32_t j = 0; j < arr->GetLength(); ++j) {
@@ -766,27 +770,34 @@
       CHECK(method != nullptr);
     }
     if (method != nullptr) {
-      SanityCheckArtMethod(method, expected_class, space);
+      SanityCheckArtMethod(method, expected_class, spaces);
     }
   }
 }
 
-static void SanityCheckArtMethodPointerArray(
-    ArtMethod** arr,
-    size_t size,
-    size_t pointer_size,
-    gc::space::ImageSpace* space) SHARED_REQUIRES(Locks::mutator_lock_) {
+static void SanityCheckArtMethodPointerArray(ArtMethod** arr,
+                                             size_t size,
+                                             size_t pointer_size,
+                                             const std::vector<gc::space::ImageSpace*>& spaces)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK_EQ(arr != nullptr, size != 0u);
   if (arr != nullptr) {
-    auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
-    CHECK(space->GetImageHeader().GetImageSection(
-        ImageHeader::kSectionDexCacheArrays).Contains(offset));
+    bool contains = false;
+    for (auto space : spaces) {
+      auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
+      if (space->GetImageHeader().GetImageSection(
+          ImageHeader::kSectionDexCacheArrays).Contains(offset)) {
+        contains = true;
+        break;
+      }
+    }
+    CHECK(contains);
   }
   for (size_t j = 0; j < size; ++j) {
     ArtMethod* method = mirror::DexCache::GetElementPtrSize(arr, j, pointer_size);
     // expected_class == null means we are a dex cache.
     if (method != nullptr) {
-      SanityCheckArtMethod(method, nullptr, space);
+      SanityCheckArtMethod(method, nullptr, spaces);
     }
   }
 }
@@ -805,29 +816,30 @@
       CHECK_EQ(field.GetDeclaringClass(), klass);
     }
     auto* runtime = Runtime::Current();
-    auto* image_space = runtime->GetHeap()->GetBootImageSpace();
+    auto image_spaces = runtime->GetHeap()->GetBootImageSpaces();
     auto pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
     for (auto& m : klass->GetMethods(pointer_size)) {
-      SanityCheckArtMethod(&m, klass, image_space);
+      SanityCheckArtMethod(&m, klass, image_spaces);
     }
     auto* vtable = klass->GetVTable();
     if (vtable != nullptr) {
-      SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_space);
+      SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
     }
     if (klass->ShouldHaveEmbeddedImtAndVTable()) {
       for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        SanityCheckArtMethod(klass->GetEmbeddedImTableEntry(i, pointer_size), nullptr, image_space);
+        SanityCheckArtMethod(
+            klass->GetEmbeddedImTableEntry(i, pointer_size), nullptr, image_spaces);
       }
       for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
-        SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_space);
+        SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
       }
     }
     auto* iftable = klass->GetIfTable();
     if (iftable != nullptr) {
       for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
         if (iftable->GetMethodArrayCount(i) > 0) {
-          SanityCheckArtMethodPointerArray(iftable->GetMethodArray(i), nullptr, pointer_size,
-                                           image_space);
+          SanityCheckArtMethodPointerArray(
+              iftable->GetMethodArray(i), nullptr, pointer_size, image_spaces);
         }
       }
     }
@@ -856,6 +868,33 @@
   DISALLOW_COPY_AND_ASSIGN(SetInterpreterEntrypointArtMethodVisitor);
 };
 
+struct TrampolineCheckData {
+  const void* quick_resolution_trampoline;
+  const void* quick_imt_conflict_trampoline;
+  const void* quick_generic_jni_trampoline;
+  const void* quick_to_interpreter_bridge_trampoline;
+  size_t pointer_size;
+  ArtMethod* m;
+  bool error;
+};
+static void CheckTrampolines(mirror::Object* obj, void* arg) NO_THREAD_SAFETY_ANALYSIS {
+  if (obj->IsClass()) {
+    mirror::Class* klass = obj->AsClass();
+    TrampolineCheckData* d = reinterpret_cast<TrampolineCheckData*>(arg);
+    for (ArtMethod& m : klass->GetMethods(d->pointer_size)) {
+      const void* entrypoint = m.GetEntryPointFromQuickCompiledCodePtrSize(d->pointer_size);
+      if (entrypoint == d->quick_resolution_trampoline ||
+          entrypoint == d->quick_imt_conflict_trampoline ||
+          entrypoint == d->quick_generic_jni_trampoline ||
+          entrypoint == d->quick_to_interpreter_bridge_trampoline) {
+        d->m = &m;
+        d->error = true;
+        return;
+      }
+    }
+  }
+}
+
 bool ClassLinker::InitFromImage(std::string* error_msg) {
   VLOG(startup) << "ClassLinker::InitFromImage entering";
   CHECK(!init_done_);
@@ -863,28 +902,71 @@
   Runtime* const runtime = Runtime::Current();
   Thread* const self = Thread::Current();
   gc::Heap* const heap = runtime->GetHeap();
-  gc::space::ImageSpace* const space = heap->GetBootImageSpace();
-  CHECK(space != nullptr);
-  image_pointer_size_ = space->GetImageHeader().GetPointerSize();
+  std::vector<gc::space::ImageSpace*> spaces = heap->GetBootImageSpaces();
+  CHECK(!spaces.empty());
+  image_pointer_size_ = spaces[0]->GetImageHeader().GetPointerSize();
   dex_cache_boot_image_class_lookup_required_ = true;
-  const OatFile* oat_file = runtime->GetOatFileManager().RegisterImageOatFile(space);
-  DCHECK(oat_file != nullptr);
-  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
-  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(), 0U);
-  const char* image_file_location = oat_file->GetOatHeader().
+  std::vector<const OatFile*> oat_files =
+      runtime->GetOatFileManager().RegisterImageOatFiles(spaces);
+  DCHECK(!oat_files.empty());
+  const OatHeader& default_oat_header = oat_files[0]->GetOatHeader();
+  CHECK_EQ(default_oat_header.GetImageFileLocationOatChecksum(), 0U);
+  CHECK_EQ(default_oat_header.GetImageFileLocationOatDataBegin(), 0U);
+  const char* image_file_location = oat_files[0]->GetOatHeader().
       GetStoreValueByKey(OatHeader::kImageLocationKey);
   CHECK(image_file_location == nullptr || *image_file_location == 0);
-  quick_resolution_trampoline_ = oat_file->GetOatHeader().GetQuickResolutionTrampoline();
-  quick_imt_conflict_trampoline_ = oat_file->GetOatHeader().GetQuickImtConflictTrampoline();
-  quick_generic_jni_trampoline_ = oat_file->GetOatHeader().GetQuickGenericJniTrampoline();
-  quick_to_interpreter_bridge_trampoline_ = oat_file->GetOatHeader().GetQuickToInterpreterBridge();
-  StackHandleScope<2> hs(self);
-  mirror::Object* dex_caches_object = space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-  Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(
-      hs.NewHandle(dex_caches_object->AsObjectArray<mirror::DexCache>()));
+  quick_resolution_trampoline_ = default_oat_header.GetQuickResolutionTrampoline();
+  quick_imt_conflict_trampoline_ = default_oat_header.GetQuickImtConflictTrampoline();
+  quick_generic_jni_trampoline_ = default_oat_header.GetQuickGenericJniTrampoline();
+  quick_to_interpreter_bridge_trampoline_ = default_oat_header.GetQuickToInterpreterBridge();
+  if (kIsDebugBuild) {
+    // Check that the other images use the same trampoline.
+    for (size_t i = 1; i < oat_files.size(); ++i) {
+      const OatHeader& ith_oat_header = oat_files[i]->GetOatHeader();
+      const void* ith_quick_resolution_trampoline =
+          ith_oat_header.GetQuickResolutionTrampoline();
+      const void* ith_quick_imt_conflict_trampoline =
+          ith_oat_header.GetQuickImtConflictTrampoline();
+      const void* ith_quick_generic_jni_trampoline =
+          ith_oat_header.GetQuickGenericJniTrampoline();
+      const void* ith_quick_to_interpreter_bridge_trampoline =
+          ith_oat_header.GetQuickToInterpreterBridge();
+      if (ith_quick_resolution_trampoline != quick_resolution_trampoline_ ||
+          ith_quick_imt_conflict_trampoline != quick_imt_conflict_trampoline_ ||
+          ith_quick_generic_jni_trampoline != quick_generic_jni_trampoline_ ||
+          ith_quick_to_interpreter_bridge_trampoline != quick_to_interpreter_bridge_trampoline_) {
+        // Make sure that all methods in this image do not contain those trampolines as
+        // entrypoints. Otherwise the class-linker won't be able to work with a single set.
+        TrampolineCheckData data;
+        data.error = false;
+        data.pointer_size = GetImagePointerSize();
+        data.quick_resolution_trampoline = ith_quick_resolution_trampoline;
+        data.quick_imt_conflict_trampoline = ith_quick_imt_conflict_trampoline;
+        data.quick_generic_jni_trampoline = ith_quick_generic_jni_trampoline;
+        data.quick_to_interpreter_bridge_trampoline = ith_quick_to_interpreter_bridge_trampoline;
+        ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+        spaces[i]->GetLiveBitmap()->Walk(CheckTrampolines, &data);
+        if (data.error) {
+          ArtMethod* m = data.m;
+          LOG(ERROR) << "Found a broken ArtMethod: " << PrettyMethod(m);
+          *error_msg = "Found an ArtMethod with a bad entrypoint";
+          return false;
+        }
+      }
+    }
+  }
 
-  Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
-      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
+  StackHandleScopeCollection handles(self);
+  std::vector<Handle<mirror::ObjectArray<mirror::DexCache>>> dex_caches_vector;
+  for (gc::space::ImageSpace* space : spaces) {
+    Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(handles.NewHandle(
+        space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches)->
+        AsObjectArray<mirror::DexCache>()));
+    dex_caches_vector.push_back(dex_caches);
+  }
+
+  Handle<mirror::ObjectArray<mirror::Class>> class_roots(handles.NewHandle(
+      spaces[0]->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
       AsObjectArray<mirror::Class>()));
   class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(class_roots.Get());
 
@@ -896,56 +978,69 @@
   java_lang_Object->SetObjectSize(sizeof(mirror::Object));
   // Allocate in non-movable so that it's possible to check if a JNI weak global ref has been
   // cleared without triggering the read barrier and unintentionally mark the sentinel alive.
-  runtime->SetSentinel(heap->AllocNonMovableObject<true>(self,
-                                                         java_lang_Object,
-                                                         java_lang_Object->GetObjectSize(),
-                                                         VoidFunctor()));
+  runtime->SetSentinel(heap->AllocNonMovableObject<true>(
+      self, java_lang_Object, java_lang_Object->GetObjectSize(), VoidFunctor()));
 
-  if (oat_file->GetOatHeader().GetDexFileCount() !=
-      static_cast<uint32_t>(dex_caches->GetLength())) {
+  uint32_t dex_file_count = 0;
+  for (const OatFile* oat_file : oat_files) {
+    dex_file_count += oat_file->GetOatHeader().GetDexFileCount();
+  }
+  uint32_t dex_caches_count = 0;
+  for (auto dex_caches : dex_caches_vector) {
+    dex_caches_count += dex_caches->GetLength();
+  }
+  if (dex_file_count != dex_caches_count) {
     *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from "
                  "image";
     return false;
   }
-  for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-    StackHandleScope<1> hs2(self);
-    Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
-    const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
-    const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(),
-                                                                      nullptr);
-    if (oat_dex_file == nullptr) {
-      *error_msg = StringPrintf("Failed finding oat dex file for %s %s",
-                                oat_file->GetLocation().c_str(),
-                                dex_file_location.c_str());
-      return false;
-    }
-    std::string inner_error_msg;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg);
-    if (dex_file == nullptr) {
-      *error_msg = StringPrintf("Failed to open dex file %s from within oat file %s error '%s'",
-                                dex_file_location.c_str(),
-                                oat_file->GetLocation().c_str(),
-                                inner_error_msg.c_str());
-      return false;
-    }
+  for (auto dex_caches : dex_caches_vector) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      StackHandleScope<1> hs2(self);
+      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
+      const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
+      const OatFile::OatDexFile* oat_dex_file = nullptr;
+      for (const OatFile* oat_file : oat_files) {
+        const OatFile::OatDexFile* oat_dex =
+            oat_file->GetOatDexFile(dex_file_location.c_str(), nullptr, false);
+        if (oat_dex != nullptr) {
+          DCHECK(oat_dex_file == nullptr);
+          oat_dex_file = oat_dex;
+        }
+      }
 
-    if (kSanityCheckObjects) {
-      SanityCheckArtMethodPointerArray(dex_cache->GetResolvedMethods(),
-                                       dex_cache->NumResolvedMethods(),
-                                       image_pointer_size_,
-                                       space);
-    }
+      if (oat_dex_file == nullptr) {
+        *error_msg = StringPrintf("Failed finding oat dex file for %s",
+                                  dex_file_location.c_str());
+        return false;
+      }
+      std::string inner_error_msg;
+      std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg);
+      if (dex_file == nullptr) {
+        *error_msg = StringPrintf("Failed to open dex file %s error '%s'",
+                                  dex_file_location.c_str(),
+                                  inner_error_msg.c_str());
+        return false;
+      }
 
-    if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) {
-      *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x",
-                                dex_file_location.c_str(),
-                                dex_file->GetLocationChecksum(),
-                                oat_dex_file->GetDexFileLocationChecksum());
-      return false;
-    }
+      if (kSanityCheckObjects) {
+        SanityCheckArtMethodPointerArray(dex_cache->GetResolvedMethods(),
+                                         dex_cache->NumResolvedMethods(),
+                                         image_pointer_size_,
+                                         spaces);
+      }
 
-    AppendToBootClassPath(*dex_file.get(), dex_cache);
-    opened_dex_files_.push_back(std::move(dex_file));
+      if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) {
+        *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x",
+                                  dex_file_location.c_str(),
+                                  dex_file->GetLocationChecksum(),
+                                  oat_dex_file->GetDexFileLocationChecksum());
+        return false;
+      }
+
+      AppendToBootClassPath(*dex_file.get(), dex_cache);
+      opened_dex_files_.push_back(std::move(dex_file));
+    }
   }
 
   if (!ValidPointerSize(image_pointer_size_)) {
@@ -968,12 +1063,14 @@
   }
 
   if (kSanityCheckObjects) {
-    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-      auto* dex_cache = dex_caches->Get(i);
-      for (size_t j = 0; j < dex_cache->NumResolvedFields(); ++j) {
-        auto* field = dex_cache->GetResolvedField(j, image_pointer_size_);
-        if (field != nullptr) {
-          CHECK(field->GetDeclaringClass()->GetClass() != nullptr);
+    for (auto dex_caches : dex_caches_vector) {
+      for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+        auto* dex_cache = dex_caches->Get(i);
+        for (size_t j = 0; j < dex_cache->NumResolvedFields(); ++j) {
+          auto* field = dex_cache->GetResolvedField(j, image_pointer_size_);
+          if (field != nullptr) {
+            CHECK(field->GetDeclaringClass()->GetClass() != nullptr);
+          }
         }
       }
     }
@@ -982,10 +1079,12 @@
 
   // Set entry point to interpreter if in InterpretOnly mode.
   if (!runtime->IsAotCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
-    const ImageHeader& header = space->GetImageHeader();
-    const ImageSection& methods = header.GetMethodsSection();
-    SetInterpreterEntrypointArtMethodVisitor visitor(image_pointer_size_);
-    methods.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
+    for (gc::space::ImageSpace* space : spaces) {
+      const ImageHeader& header = space->GetImageHeader();
+      const ImageSection& methods = header.GetMethodsSection();
+      SetInterpreterEntrypointArtMethodVisitor visitor(image_pointer_size_);
+      methods.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
+    }
   }
 
   // reinit class_roots_
@@ -1014,12 +1113,23 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
-  const ImageHeader& header = space->GetImageHeader();
-  const ImageSection& section = header.GetImageSection(ImageHeader::kSectionClassTable);
-  if (section.Size() > 0u) {
-    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
-    class_table->ReadFromMemory(space->Begin() + section.Offset());
+  size_t class_tables_added = 0;
+  for (gc::space::ImageSpace* space : spaces) {
+    const ImageHeader& header = space->GetImageHeader();
+    const ImageSection& section = header.GetImageSection(ImageHeader::kSectionClassTable);
+    if (section.Size() > 0u) {
+      WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+      ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
+      class_table->ReadFromMemory(space->Begin() + section.Offset());
+      ++class_tables_added;
+    }
+  }
+  if (class_tables_added != 0) {
+    // Either all of the image spaces have an empty class section or none do. In the case where
+    // an image space has no classes, it will still have a non-empty class section that contains
+    // metadata.
+    CHECK_EQ(spaces.size(), class_tables_added)
+        << "Expected non-empty class section for each image space.";
     dex_cache_boot_image_class_lookup_required_ = false;
   }
 
@@ -1974,7 +2084,7 @@
   }
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsStarted()) {
-    if (runtime->IsAotCompiler() || runtime->GetHeap()->HasImageSpace()) {
+    if (runtime->IsAotCompiler() || runtime->GetHeap()->HasBootImageSpace()) {
       return;  // OAT file unavailable.
     }
   }
@@ -2783,23 +2893,27 @@
   return result;
 }
 
-static mirror::ObjectArray<mirror::DexCache>* GetImageDexCaches(gc::space::ImageSpace* image_space)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  CHECK(image_space != nullptr);
-  mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-  DCHECK(root != nullptr);
-  return root->AsObjectArray<mirror::DexCache>();
+static std::vector<mirror::ObjectArray<mirror::DexCache>*> GetImageDexCaches(
+    std::vector<gc::space::ImageSpace*> image_spaces) SHARED_REQUIRES(Locks::mutator_lock_) {
+  CHECK(!image_spaces.empty());
+  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector;
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
+    DCHECK(root != nullptr);
+    dex_caches_vector.push_back(root->AsObjectArray<mirror::DexCache>());
+  }
+  return dex_caches_vector;
 }
 
 void ClassLinker::AddBootImageClassesToClassTable() {
   if (dex_cache_boot_image_class_lookup_required_) {
-    AddImageClassesToClassTable(Runtime::Current()->GetHeap()->GetBootImageSpace(),
+    AddImageClassesToClassTable(Runtime::Current()->GetHeap()->GetBootImageSpaces(),
                                 /*class_loader*/nullptr);
     dex_cache_boot_image_class_lookup_required_ = false;
   }
 }
 
-void ClassLinker::AddImageClassesToClassTable(gc::space::ImageSpace* image_space,
+void ClassLinker::AddImageClassesToClassTable(std::vector<gc::space::ImageSpace*> image_spaces,
                                               mirror::ClassLoader* class_loader) {
   Thread* self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
@@ -2807,25 +2921,28 @@
 
   ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
 
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches(image_space);
   std::string temp;
-  for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-    mirror::DexCache* dex_cache = dex_caches->Get(i);
-    GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
-    for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
-      mirror::Class* klass = types[j].Read();
-      if (klass != nullptr) {
-        DCHECK_EQ(klass->GetClassLoader(), class_loader);
-        const char* descriptor = klass->GetDescriptor(&temp);
-        size_t hash = ComputeModifiedUtf8Hash(descriptor);
-        mirror::Class* existing = class_table->Lookup(descriptor, hash);
-        if (existing != nullptr) {
-          CHECK_EQ(existing, klass) << PrettyClassAndClassLoader(existing) << " != "
-              << PrettyClassAndClassLoader(klass);
-        } else {
-          class_table->Insert(klass);
-          if (log_new_class_table_roots_) {
-            new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector =
+      GetImageDexCaches(image_spaces);
+  for (mirror::ObjectArray<mirror::DexCache>* dex_caches : dex_caches_vector) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
+      for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
+        mirror::Class* klass = types[j].Read();
+        if (klass != nullptr) {
+          DCHECK_EQ(klass->GetClassLoader(), class_loader);
+          const char* descriptor = klass->GetDescriptor(&temp);
+          size_t hash = ComputeModifiedUtf8Hash(descriptor);
+          mirror::Class* existing = class_table->Lookup(descriptor, hash);
+          if (existing != nullptr) {
+            CHECK_EQ(existing, klass) << PrettyClassAndClassLoader(existing) << " != "
+                << PrettyClassAndClassLoader(klass);
+          } else {
+            class_table->Insert(klass);
+            if (log_new_class_table_roots_) {
+              new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+            }
           }
         }
       }
@@ -2856,18 +2973,20 @@
 
 mirror::Class* ClassLinker::LookupClassFromBootImage(const char* descriptor) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "Image class lookup");
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches(
-      Runtime::Current()->GetHeap()->GetBootImageSpace());
-  for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-    mirror::DexCache* dex_cache = dex_caches->Get(i);
-    const DexFile* dex_file = dex_cache->GetDexFile();
-    // Try binary searching the type index by descriptor.
-    const DexFile::TypeId* type_id = dex_file->FindTypeId(descriptor);
-    if (type_id != nullptr) {
-      uint16_t type_idx = dex_file->GetIndexForTypeId(*type_id);
-      mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
-      if (klass != nullptr) {
-        return klass;
+  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector =
+      GetImageDexCaches(Runtime::Current()->GetHeap()->GetBootImageSpaces());
+  for (mirror::ObjectArray<mirror::DexCache>* dex_caches : dex_caches_vector) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      // Try binary searching the type index by descriptor.
+      const DexFile::TypeId* type_id = dex_file->FindTypeId(descriptor);
+      if (type_id != nullptr) {
+        uint16_t type_idx = dex_file->GetIndexForTypeId(*type_id);
+        mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
+        if (klass != nullptr) {
+          return klass;
+        }
       }
     }
   }
@@ -3167,7 +3286,7 @@
   // the runtime isn't started. On the other hand, app classes can be re-verified even if they are
   // already pre-opted, as then the runtime is started.
   if (!Runtime::Current()->IsAotCompiler() &&
-      !Runtime::Current()->GetHeap()->HasImageSpace() &&
+      !Runtime::Current()->GetHeap()->HasBootImageSpace() &&
       klass->GetClassLoader() != nullptr) {
     return false;
   }
@@ -3441,7 +3560,7 @@
 
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
-  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 16u);
+  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 19u);
   ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->GetDirectMethodUnchecked(
       2, image_pointer_size_);
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index f16fe92..9d432c6 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -505,7 +505,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add image classes to the class table.
-  void AddImageClassesToClassTable(gc::space::ImageSpace* image_space,
+  void AddImageClassesToClassTable(std::vector<gc::space::ImageSpace*> image_spaces,
                                    mirror::ClassLoader* class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 59a43ee..99353c5 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -501,6 +501,7 @@
 struct ClassOffsets : public CheckOffsets<mirror::Class> {
   ClassOffsets() : CheckOffsets<mirror::Class>(false, "Ljava/lang/Class;") {
     addOffset(OFFSETOF_MEMBER(mirror::Class, access_flags_), "accessFlags");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, annotation_type_), "annotationType");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_flags_), "classFlags");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_loader_), "classLoader");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_size_), "classSize");
@@ -535,15 +536,15 @@
 struct StringOffsets : public CheckOffsets<mirror::String> {
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
-    addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hashCode");
+    addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hash");
   };
 };
 
 struct ThrowableOffsets : public CheckOffsets<mirror::Throwable> {
   ThrowableOffsets() : CheckOffsets<mirror::Throwable>(false, "Ljava/lang/Throwable;") {
+    addOffset(OFFSETOF_MEMBER(mirror::Throwable, backtrace_), "backtrace");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, cause_), "cause");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, detail_message_), "detailMessage");
-    addOffset(OFFSETOF_MEMBER(mirror::Throwable, stack_state_), "stackState");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, stack_trace_), "stackTrace");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, suppressed_exceptions_), "suppressedExceptions");
   };
@@ -612,7 +613,7 @@
 struct AccessibleObjectOffsets : public CheckOffsets<mirror::AccessibleObject> {
   AccessibleObjectOffsets() : CheckOffsets<mirror::AccessibleObject>(
       false, "Ljava/lang/reflect/AccessibleObject;") {
-    addOffset(mirror::AccessibleObject::FlagOffset().Uint32Value(), "flag");
+    addOffset(mirror::AccessibleObject::FlagOffset().Uint32Value(), "override");
   };
 };
 
diff --git a/runtime/class_table.h b/runtime/class_table.h
index c911365..911f3c2 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -106,8 +106,7 @@
 
   // Combines all of the tables into one class set.
   size_t WriteToMemory(uint8_t* ptr) const
-      REQUIRES(Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
   size_t ReadFromMemory(uint8_t* ptr)
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 2640a6e..a4e16ae 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -303,7 +303,12 @@
 
 
   RuntimeOptions options;
-  std::string boot_class_path_string = "-Xbootclasspath:" + GetLibCoreDexFileName();
+  std::string boot_class_path_string = "-Xbootclasspath";
+  for (const std::string &core_dex_file_name : GetLibCoreDexFileNames()) {
+    boot_class_path_string += ":";
+    boot_class_path_string += core_dex_file_name;
+  }
+
   options.push_back(std::make_pair(boot_class_path_string, nullptr));
   options.push_back(std::make_pair("-Xcheck:jni", nullptr));
   options.push_back(std::make_pair(min_heap_string, nullptr));
@@ -409,19 +414,48 @@
   (*icu_cleanup_fn)();
 
   Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
+
+  // Manually closing the JNI libraries.
+  // Runtime does not support repeatedly doing JNI->CreateVM, thus we need to manually clean up the
+  // dynamic linking loader so that gtests would not fail.
+  // Bug: 25785594
+  if (runtime_->IsStarted()) {
+    {
+      // We retrieve the handle by calling dlopen on the library. To close it, we need to call
+      // dlclose twice, the first time to undo our dlopen and the second time to actually unload it.
+      // See man dlopen.
+      void* handle = dlopen("libjavacore.so", RTLD_LAZY);
+      dlclose(handle);
+      CHECK_EQ(0, dlclose(handle));
+    }
+    {
+      void* handle = dlopen("libopenjdkd.so", RTLD_LAZY);
+      dlclose(handle);
+      CHECK_EQ(0, dlclose(handle));
+    }
+  }
 }
 
-std::string CommonRuntimeTest::GetLibCoreDexFileName() {
-  return GetDexFileName("core-libart");
-}
-
-std::string CommonRuntimeTest::GetDexFileName(const std::string& jar_prefix) {
-  if (IsHost()) {
+static std::string GetDexFileName(const std::string& jar_prefix, bool host) {
+  std::string path;
+  if (host) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
     CHECK(host_dir != nullptr);
-    return StringPrintf("%s/framework/%s-hostdex.jar", host_dir, jar_prefix.c_str());
+    path = host_dir;
+  } else {
+    path = GetAndroidRoot();
   }
-  return StringPrintf("%s/framework/%s.jar", GetAndroidRoot(), jar_prefix.c_str());
+
+  std::string suffix = host
+      ? "-hostdex"                 // The host version.
+      : "-testdex";                // The unstripped target version.
+
+  return StringPrintf("%s/framework/%s%s.jar", path.c_str(), jar_prefix.c_str(), suffix.c_str());
+}
+
+std::vector<std::string> CommonRuntimeTest::GetLibCoreDexFileNames() {
+  return std::vector<std::string>({GetDexFileName("core-oj", IsHost()),
+                                   GetDexFileName("core-libart", IsHost())});
 }
 
 std::string CommonRuntimeTest::GetTestAndroidRoot() {
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index f318457..7223b6e 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -77,8 +77,8 @@
   CommonRuntimeTest();
   ~CommonRuntimeTest();
 
-  // Gets the path of the libcore dex file.
-  static std::string GetLibCoreDexFileName();
+  // Gets the paths of the libcore dex files.
+  static std::vector<std::string> GetLibCoreDexFileNames();
 
   // Returns bin directory which contains host's prebuild tools.
   static std::string GetAndroidHostToolsDir();
@@ -118,9 +118,6 @@
   // initializers, initialize well-known classes, and creates the heap thread pool.
   virtual void FinalizeSetup();
 
-  // Gets the path of the specified dex file for host or target.
-  static std::string GetDexFileName(const std::string& jar_prefix);
-
   std::string GetTestAndroidRoot();
 
   std::string GetTestDexFileName(const char* name);
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index d68b463..40e2b15 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -18,6 +18,8 @@
 
 #include <sstream>
 
+#include "ScopedLocalRef.h"
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/logging.h"
@@ -522,6 +524,104 @@
   va_end(args);
 }
 
+// Stack overflow.
+
+void ThrowStackOverflowError(Thread* self) {
+  if (self->IsHandlingStackOverflow()) {
+    LOG(ERROR) << "Recursive stack overflow.";
+    // We don't fail here because SetStackEndForStackOverflow will print better diagnostics.
+  }
+
+  self->SetStackEndForStackOverflow();  // Allow space on the stack for constructor to execute.
+  JNIEnvExt* env = self->GetJniEnv();
+  std::string msg("stack size ");
+  msg += PrettySize(self->GetStackSize());
+
+  // Avoid running Java code for exception initialization.
+  // TODO: Checks to make this a bit less brittle.
+
+  std::string error_msg;
+
+  // Allocate an uninitialized object.
+  ScopedLocalRef<jobject> exc(env,
+                              env->AllocObject(WellKnownClasses::java_lang_StackOverflowError));
+  if (exc.get() != nullptr) {
+    // "Initialize".
+    // StackOverflowError -> VirtualMachineError -> Error -> Throwable -> Object.
+    // Only Throwable has "custom" fields:
+    //   String detailMessage.
+    //   Throwable cause (= this).
+    //   List<Throwable> suppressedExceptions (= Collections.emptyList()).
+    //   Object stackState;
+    //   StackTraceElement[] stackTrace;
+    // Only Throwable has a non-empty constructor:
+    //   this.stackTrace = EmptyArray.STACK_TRACE_ELEMENT;
+    //   fillInStackTrace();
+
+    // detailMessage.
+    // TODO: Use String::FromModifiedUTF...?
+    ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg.c_str()));
+    if (s.get() != nullptr) {
+      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_detailMessage, s.get());
+
+      // cause.
+      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_cause, exc.get());
+
+      // suppressedExceptions.
+      ScopedLocalRef<jobject> emptylist(env, env->GetStaticObjectField(
+          WellKnownClasses::java_util_Collections,
+          WellKnownClasses::java_util_Collections_EMPTY_LIST));
+      CHECK(emptylist.get() != nullptr);
+      env->SetObjectField(exc.get(),
+                          WellKnownClasses::java_lang_Throwable_suppressedExceptions,
+                          emptylist.get());
+
+      // stackState is set as result of fillInStackTrace. fillInStackTrace calls
+      // nativeFillInStackTrace.
+      ScopedLocalRef<jobject> stack_state_val(env, nullptr);
+      {
+        ScopedObjectAccessUnchecked soa(env);
+        stack_state_val.reset(soa.Self()->CreateInternalStackTrace<false>(soa));
+      }
+      if (stack_state_val.get() != nullptr) {
+        env->SetObjectField(exc.get(),
+                            WellKnownClasses::java_lang_Throwable_stackState,
+                            stack_state_val.get());
+
+        // stackTrace.
+        ScopedLocalRef<jobject> stack_trace_elem(env, env->GetStaticObjectField(
+            WellKnownClasses::libcore_util_EmptyArray,
+            WellKnownClasses::libcore_util_EmptyArray_STACK_TRACE_ELEMENT));
+        env->SetObjectField(exc.get(),
+                            WellKnownClasses::java_lang_Throwable_stackTrace,
+                            stack_trace_elem.get());
+      } else {
+        error_msg = "Could not create stack trace.";
+      }
+      // Throw the exception.
+      self->SetException(reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get())));
+    } else {
+      // Could not allocate a string object.
+      error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed.";
+    }
+  } else {
+    error_msg = "Could not allocate StackOverflowError object.";
+  }
+
+  if (!error_msg.empty()) {
+    LOG(WARNING) << error_msg;
+    CHECK(self->IsExceptionPending());
+  }
+
+  bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
+  self->ResetDefaultStackEnd();  // Return to default stack size.
+
+  // And restore protection if implicit checks are on.
+  if (!explicit_overflow_check) {
+    self->ProtectStack();
+  }
+}
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 2a0934f..85fe2b3 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -154,10 +154,10 @@
 
 void ThrowNoSuchFieldError(const StringPiece& scope, mirror::Class* c,
                            const StringPiece& type, const StringPiece& name)
-    SHARED_REQUIRES(Locks::mutator_lock_);
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowNoSuchFieldException(mirror::Class* c, const StringPiece& name)
-    SHARED_REQUIRES(Locks::mutator_lock_);
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 // NoSuchMethodError
 
@@ -194,6 +194,10 @@
     __attribute__((__format__(__printf__, 1, 2)))
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+// Stack overflow.
+
+void ThrowStackOverflowError(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index c32331f..6e11cf8 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -726,11 +726,11 @@
 
 JDWP::JdwpError Dbg::GetClassLoader(JDWP::RefTypeId id, JDWP::ExpandBuf* pReply) {
   JDWP::JdwpError error;
-  mirror::Object* o = gRegistry->Get<mirror::Object*>(id, &error);
-  if (o == nullptr) {
-    return JDWP::ERR_INVALID_OBJECT;
+  mirror::Class* c = DecodeClass(id, &error);
+  if (c == nullptr) {
+    return error;
   }
-  expandBufAddObjectId(pReply, gRegistry->Add(o->GetClass()->GetClassLoader()));
+  expandBufAddObjectId(pReply, gRegistry->Add(c->GetClassLoader()));
   return JDWP::ERR_NONE;
 }
 
@@ -2033,29 +2033,28 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(thread_group != nullptr);
 
-  // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
-  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
-  mirror::Object* groups_array_list = groups_field->GetObject(thread_group);
-  {
-    // The "groups" field is declared as a java.util.List: check it really is
-    // an instance of java.util.ArrayList.
-    CHECK(groups_array_list != nullptr);
-    mirror::Class* java_util_ArrayList_class =
-        soa.Decode<mirror::Class*>(WellKnownClasses::java_util_ArrayList);
-    CHECK(groups_array_list->InstanceOf(java_util_ArrayList_class));
+  // Get the int "ngroups" count of this thread group...
+  ArtField* ngroups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
+  CHECK(ngroups_field != nullptr);
+  const int32_t size = ngroups_field->GetInt(thread_group);
+  if (size == 0) {
+    return;
   }
 
-  // Get the array and size out of the ArrayList<ThreadGroup>...
-  ArtField* array_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_array);
-  ArtField* size_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_size);
-  mirror::ObjectArray<mirror::Object>* groups_array =
-      array_field->GetObject(groups_array_list)->AsObjectArray<mirror::Object>();
-  const int32_t size = size_field->GetInt(groups_array_list);
+  // Get the ThreadGroup[] "groups" out of this thread group...
+  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
+  mirror::Object* groups_array = groups_field->GetObject(thread_group);
+
+  CHECK(groups_array != nullptr);
+  CHECK(groups_array->IsObjectArray());
+
+  mirror::ObjectArray<mirror::Object>* groups_array_as_array =
+      groups_array->AsObjectArray<mirror::Object>();
 
   // Copy the first 'size' elements out of the array into the result.
   ObjectRegistry* registry = Dbg::GetObjectRegistry();
   for (int32_t i = 0; i < size; ++i) {
-    child_thread_group_ids->push_back(registry->Add(groups_array->Get(i)));
+    child_thread_group_ids->push_back(registry->Add(groups_array_as_array->Get(i)));
   }
 }
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 0a167bb..796701d 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -206,7 +206,7 @@
   uint32_t checksum;
   ScopedObjectAccess soa(Thread::Current());
   std::string error_msg;
-  EXPECT_TRUE(DexFile::GetChecksum(GetLibCoreDexFileName().c_str(), &checksum, &error_msg))
+  EXPECT_TRUE(DexFile::GetChecksum(GetLibCoreDexFileNames()[0].c_str(), &checksum, &error_msg))
       << error_msg;
   EXPECT_EQ(java_lang_dex_file_->GetLocationChecksum(), checksum);
 }
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 2819670..52da28b 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -32,84 +32,6 @@
 
 namespace art {
 
-// -------------------------------------------------------------------
-// Binary GDB JIT Interface as described in
-//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
-extern "C" {
-  typedef enum {
-    JIT_NOACTION = 0,
-    JIT_REGISTER_FN,
-    JIT_UNREGISTER_FN
-  } JITAction;
-
-  struct JITCodeEntry {
-    JITCodeEntry* next_;
-    JITCodeEntry* prev_;
-    const uint8_t *symfile_addr_;
-    uint64_t symfile_size_;
-  };
-
-  struct JITDescriptor {
-    uint32_t version_;
-    uint32_t action_flag_;
-    JITCodeEntry* relevant_entry_;
-    JITCodeEntry* first_entry_;
-  };
-
-  // GDB will place breakpoint into this function.
-  // To prevent GCC from inlining or removing it we place noinline attribute
-  // and inline assembler statement inside.
-  void __attribute__((noinline)) __jit_debug_register_code();
-  void __attribute__((noinline)) __jit_debug_register_code() {
-    __asm__("");
-  }
-
-  // GDB will inspect contents of this descriptor.
-  // Static initialization is necessary to prevent GDB from seeing
-  // uninitialized descriptor.
-  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
-}
-
-
-static JITCodeEntry* CreateCodeEntry(const uint8_t *symfile_addr,
-                                     uintptr_t symfile_size) {
-  JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr;
-  entry->symfile_size_ = symfile_size;
-  entry->prev_ = nullptr;
-
-  // TODO: Do we need a lock here?
-  entry->next_ = __jit_debug_descriptor.first_entry_;
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry;
-  }
-  __jit_debug_descriptor.first_entry_ = entry;
-  __jit_debug_descriptor.relevant_entry_ = entry;
-
-  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
-  __jit_debug_register_code();
-  return entry;
-}
-
-
-static void UnregisterCodeEntry(JITCodeEntry* entry) {
-  // TODO: Do we need a lock here?
-  if (entry->prev_ != nullptr) {
-    entry->prev_->next_ = entry->next_;
-  } else {
-    __jit_debug_descriptor.first_entry_ = entry->next_;
-  }
-
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry->prev_;
-  }
-
-  __jit_debug_descriptor.relevant_entry_ = entry;
-  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
-  __jit_debug_register_code();
-  delete entry;
-}
-
 template <typename ElfTypes>
 ElfFileImpl<ElfTypes>::ElfFileImpl(File* file, bool writable,
                                    bool program_header_only,
@@ -130,8 +52,6 @@
     hash_section_start_(nullptr),
     symtab_symbol_table_(nullptr),
     dynsym_symbol_table_(nullptr),
-    jit_elf_image_(nullptr),
-    jit_gdb_entry_(nullptr),
     requested_base_(requested_base) {
   CHECK(file != nullptr);
 }
@@ -350,10 +270,6 @@
   STLDeleteElements(&segments_);
   delete symtab_symbol_table_;
   delete dynsym_symbol_table_;
-  delete jit_elf_image_;
-  if (jit_gdb_entry_) {
-    UnregisterCodeEntry(jit_gdb_entry_);
-  }
 }
 
 template <typename ElfTypes>
@@ -1377,11 +1293,6 @@
     return false;
   }
 
-  // Use GDB JIT support to do stack backtrace, etc.
-  if (executable) {
-    GdbJITSupport();
-  }
-
   return true;
 }
 
@@ -1472,50 +1383,6 @@
 }
 
 template <typename ElfTypes>
-void ElfFileImpl<ElfTypes>::GdbJITSupport() {
-  // We only get here if we only are mapping the program header.
-  DCHECK(program_header_only_);
-
-  // Well, we need the whole file to do this.
-  std::string error_msg;
-  // Make it MAP_PRIVATE so we can just give it to gdb if all the necessary
-  // sections are there.
-  std::unique_ptr<ElfFileImpl<ElfTypes>> all_ptr(
-      Open(const_cast<File*>(file_), PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
-  if (all_ptr.get() == nullptr) {
-    return;
-  }
-  ElfFileImpl<ElfTypes>& all = *all_ptr;
-
-  // We need the eh_frame for gdb but debug info might be present without it.
-  const Elf_Shdr* eh_frame = all.FindSectionByName(".eh_frame");
-  if (eh_frame == nullptr) {
-    return;
-  }
-
-  // Do we have interesting sections?
-  // We need to add in a strtab and symtab to the image.
-  // all is MAP_PRIVATE so it can be written to freely.
-  // We also already have strtab and symtab so we are fine there.
-  Elf_Ehdr& elf_hdr = all.GetHeader();
-  elf_hdr.e_entry = 0;
-  elf_hdr.e_phoff = 0;
-  elf_hdr.e_phnum = 0;
-  elf_hdr.e_phentsize = 0;
-  elf_hdr.e_type = ET_EXEC;
-
-  // Since base_address_ is 0 if we are actually loaded at a known address (i.e. this is boot.oat)
-  // and the actual address stuff starts at in regular files this is good.
-  if (!all.FixupDebugSections(reinterpret_cast<intptr_t>(base_address_))) {
-    LOG(ERROR) << "Failed to load GDB data";
-    return;
-  }
-
-  jit_gdb_entry_ = CreateCodeEntry(all.Begin(), all.Size());
-  gdb_file_mapping_.reset(all_ptr.release());
-}
-
-template <typename ElfTypes>
 bool ElfFileImpl<ElfTypes>::Strip(std::string* error_msg) {
   // ELF files produced by MCLinker look roughly like this
   //
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
index 0f466bd..2af31dc 100644
--- a/runtime/elf_file_impl.h
+++ b/runtime/elf_file_impl.h
@@ -213,12 +213,6 @@
   SymbolTable* symtab_symbol_table_;
   SymbolTable* dynsym_symbol_table_;
 
-  // Support for GDB JIT
-  uint8_t* jit_elf_image_;
-  JITCodeEntry* jit_gdb_entry_;
-  std::unique_ptr<ElfFileImpl<ElfTypes>> gdb_file_mapping_;
-  void GdbJITSupport();
-
   // Override the 'base' p_vaddr in the first LOAD segment with this value (if non-null).
   uint8_t* requested_base_;
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 915d9ab..b5a55bf 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -33,7 +33,6 @@
 #include "oat_quick_method_header.h"
 #include "reflection.h"
 #include "scoped_thread_state_change.h"
-#include "ScopedLocalRef.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -120,102 +119,6 @@
                                     heap->GetCurrentAllocator());
 }
 
-void ThrowStackOverflowError(Thread* self) {
-  if (self->IsHandlingStackOverflow()) {
-    LOG(ERROR) << "Recursive stack overflow.";
-    // We don't fail here because SetStackEndForStackOverflow will print better diagnostics.
-  }
-
-  self->SetStackEndForStackOverflow();  // Allow space on the stack for constructor to execute.
-  JNIEnvExt* env = self->GetJniEnv();
-  std::string msg("stack size ");
-  msg += PrettySize(self->GetStackSize());
-
-  // Avoid running Java code for exception initialization.
-  // TODO: Checks to make this a bit less brittle.
-
-  std::string error_msg;
-
-  // Allocate an uninitialized object.
-  ScopedLocalRef<jobject> exc(env,
-                              env->AllocObject(WellKnownClasses::java_lang_StackOverflowError));
-  if (exc.get() != nullptr) {
-    // "Initialize".
-    // StackOverflowError -> VirtualMachineError -> Error -> Throwable -> Object.
-    // Only Throwable has "custom" fields:
-    //   String detailMessage.
-    //   Throwable cause (= this).
-    //   List<Throwable> suppressedExceptions (= Collections.emptyList()).
-    //   Object stackState;
-    //   StackTraceElement[] stackTrace;
-    // Only Throwable has a non-empty constructor:
-    //   this.stackTrace = EmptyArray.STACK_TRACE_ELEMENT;
-    //   fillInStackTrace();
-
-    // detailMessage.
-    // TODO: Use String::FromModifiedUTF...?
-    ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg.c_str()));
-    if (s.get() != nullptr) {
-      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_detailMessage, s.get());
-
-      // cause.
-      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_cause, exc.get());
-
-      // suppressedExceptions.
-      ScopedLocalRef<jobject> emptylist(env, env->GetStaticObjectField(
-          WellKnownClasses::java_util_Collections,
-          WellKnownClasses::java_util_Collections_EMPTY_LIST));
-      CHECK(emptylist.get() != nullptr);
-      env->SetObjectField(exc.get(),
-                          WellKnownClasses::java_lang_Throwable_suppressedExceptions,
-                          emptylist.get());
-
-      // stackState is set as result of fillInStackTrace. fillInStackTrace calls
-      // nativeFillInStackTrace.
-      ScopedLocalRef<jobject> stack_state_val(env, nullptr);
-      {
-        ScopedObjectAccessUnchecked soa(env);
-        stack_state_val.reset(soa.Self()->CreateInternalStackTrace<false>(soa));
-      }
-      if (stack_state_val.get() != nullptr) {
-        env->SetObjectField(exc.get(),
-                            WellKnownClasses::java_lang_Throwable_stackState,
-                            stack_state_val.get());
-
-        // stackTrace.
-        ScopedLocalRef<jobject> stack_trace_elem(env, env->GetStaticObjectField(
-            WellKnownClasses::libcore_util_EmptyArray,
-            WellKnownClasses::libcore_util_EmptyArray_STACK_TRACE_ELEMENT));
-        env->SetObjectField(exc.get(),
-                            WellKnownClasses::java_lang_Throwable_stackTrace,
-                            stack_trace_elem.get());
-      } else {
-        error_msg = "Could not create stack trace.";
-      }
-      // Throw the exception.
-      self->SetException(reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get())));
-    } else {
-      // Could not allocate a string object.
-      error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed.";
-    }
-  } else {
-    error_msg = "Could not allocate StackOverflowError object.";
-  }
-
-  if (!error_msg.empty()) {
-    LOG(WARNING) << error_msg;
-    CHECK(self->IsExceptionPending());
-  }
-
-  bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
-  self->ResetDefaultStackEnd();  // Return to default stack size.
-
-  // And restore protection if implicit checks are on.
-  if (!explicit_overflow_check) {
-    self->ProtectStack();
-  }
-}
-
 void CheckReferenceResult(mirror::Object* o, Thread* self) {
   if (o == nullptr) {
     return;
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 0469ee6..a28376f 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -158,8 +158,6 @@
     uint32_t type_idx, ArtMethod* referrer, Thread* self, bool can_run_clinit, bool verify_access)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void ThrowStackOverflowError(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
-
 inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, uint32_t string_idx)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 5a82b3a..5256fea 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -16,7 +16,6 @@
 
 #include "callee_save_frame.h"
 #include "common_throws.h"
-#include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/object-inl.h"
 #include "thread.h"
 #include "well_known_classes.h"
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index dc9f14c..f87d48d 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -122,7 +122,10 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, rosalloc_runs, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_alt_ibase, rosalloc_runs, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, rosalloc_runs, thread_local_alloc_stack_top,
                         sizeof(void*) * kNumRosAllocThreadLocalSizeBrackets);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 8f7bb94..d16afd9 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -487,7 +487,9 @@
 
 // Mark all references to the alloc space(s).
 void ModUnionTableCardCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
-  auto* image_space = heap_->GetBootImageSpace();
+  // TODO: Needs better support for multi-images? b/26317072
+  space::ImageSpace* image_space =
+      heap_->GetBootImageSpaces().empty() ? nullptr : heap_->GetBootImageSpaces()[0];
   // If we don't have an image space, just pass in space_ as the immune space. Pass in the same
   // space_ instead of image_space to avoid a null check in ModUnionUpdateObjectReferencesVisitor.
   CardBitVisitor bit_visitor(visitor, space_, image_space != nullptr ? image_space : space_,
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 3be7181..61c67f8 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -167,7 +167,10 @@
   uintptr_t* address = &bitmap_begin_[index];
   uintptr_t old_word = *address;
   if (kSetBit) {
-    *address = old_word | mask;
+    if ((old_word & mask) == 0) {
+      // Avoid dirtying the page if possible.
+      *address = old_word | mask;
+    }
   } else {
     *address = old_word & ~mask;
   }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6d72f31..d6c1817 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -233,8 +233,7 @@
       backtrace_lock_(nullptr),
       seen_backtrace_count_(0u),
       unique_backtrace_count_(0u),
-      gc_disabled_for_shutdown_(false),
-      boot_image_space_(nullptr) {
+      gc_disabled_for_shutdown_(false) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
@@ -260,23 +259,63 @@
     CHECK_GE(300 * MB, non_moving_space_capacity);
     requested_alloc_space_begin = reinterpret_cast<uint8_t*>(300 * MB) - non_moving_space_capacity;
   }
+
+  // Load image space(s).
   if (!image_file_name.empty()) {
-    ATRACE_BEGIN("ImageSpace::Create");
-    std::string error_msg;
-    boot_image_space_ = space::ImageSpace::Create(image_file_name.c_str(),
-                                                  image_instruction_set,
-                                                  &error_msg);
-    ATRACE_END();
-    if (boot_image_space_ != nullptr) {
-      AddSpace(boot_image_space_);
-      // Oat files referenced by image files immediately follow them in memory, ensure alloc space
-      // isn't going to get in the middle
-      uint8_t* oat_file_end_addr = boot_image_space_->GetImageHeader().GetOatFileEnd();
-      CHECK_GT(oat_file_end_addr, boot_image_space_->End());
-      requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
-    } else {
-      LOG(ERROR) << "Could not create image space with image file '" << image_file_name << "'. "
-                   << "Attempting to fall back to imageless running. Error was: " << error_msg;
+    // For code reuse, handle this like a work queue.
+    std::vector<std::string> image_file_names;
+    image_file_names.push_back(image_file_name);
+    // The loaded spaces. Secondary images may fail to load, in which case we need to remove
+    // already added spaces.
+    std::vector<space::Space*> added_image_spaces;
+
+    for (size_t index = 0; index < image_file_names.size(); ++index) {
+      std::string& image_name = image_file_names[index];
+      ATRACE_BEGIN("ImageSpace::Create");
+      std::string error_msg;
+      space::ImageSpace* boot_image_space = space::ImageSpace::Create(image_name.c_str(),
+                                                                      image_instruction_set,
+                                                                      index > 0,
+                                                                      &error_msg);
+      ATRACE_END();
+      if (boot_image_space != nullptr) {
+        AddSpace(boot_image_space);
+        added_image_spaces.push_back(boot_image_space);
+        // Oat files referenced by image files immediately follow them in memory, ensure alloc space
+        // isn't going to get in the middle
+        uint8_t* oat_file_end_addr = boot_image_space->GetImageHeader().GetOatFileEnd();
+        CHECK_GT(oat_file_end_addr, boot_image_space->End());
+        requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
+        boot_image_spaces_.push_back(boot_image_space);
+
+        if (index == 0) {
+          // If this was the first space, check whether there are more images to load.
+          const OatFile* boot_oat_file = boot_image_space->GetOatFile();
+          if (boot_oat_file == nullptr) {
+            continue;
+          }
+
+          const OatHeader& boot_oat_header = boot_oat_file->GetOatHeader();
+          const char* boot_classpath =
+              boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPath);
+          if (boot_classpath == nullptr) {
+            continue;
+          }
+
+          space::ImageSpace::CreateMultiImageLocations(image_file_name,
+                                                       boot_classpath,
+                                                       &image_file_names);
+        }
+      } else {
+        LOG(ERROR) << "Could not create image space with image file '" << image_file_name << "'. "
+            << "Attempting to fall back to imageless running. Error was: " << error_msg
+            << "\nAttempted image: " << image_name;
+        // Remove already loaded spaces.
+        for (space::Space* loaded_space : added_image_spaces) {
+          RemoveSpace(loaded_space);
+        }
+        break;
+      }
     }
   }
   /*
@@ -456,13 +495,15 @@
     rb_table_.reset(new accounting::ReadBarrierTable());
     DCHECK(rb_table_->IsAllCleared());
   }
-  if (GetBootImageSpace() != nullptr) {
+  if (HasBootImageSpace()) {
     // Don't add the image mod union table if we are running without an image, this can crash if
     // we use the CardCache implementation.
-    accounting::ModUnionTable* mod_union_table = new accounting::ModUnionTableToZygoteAllocspace(
-        "Image mod-union table", this, GetBootImageSpace());
-    CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
-    AddModUnionTable(mod_union_table);
+    for (space::ImageSpace* image_space : GetBootImageSpaces()) {
+      accounting::ModUnionTable* mod_union_table = new accounting::ModUnionTableToZygoteAllocspace(
+          "Image mod-union table", this, image_space);
+      CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
+      AddModUnionTable(mod_union_table);
+    }
   }
   if (collector::SemiSpace::kUseRememberedSet && non_moving_space_ != main_space_) {
     accounting::RememberedSet* non_moving_space_rem_set =
@@ -525,13 +566,19 @@
       garbage_collectors_.push_back(mark_compact_collector_);
     }
   }
-  if (GetBootImageSpace() != nullptr && non_moving_space_ != nullptr &&
+  if (!GetBootImageSpaces().empty() && non_moving_space_ != nullptr &&
       (is_zygote || separate_non_moving_space || foreground_collector_type_ == kCollectorTypeGSS)) {
     // Check that there's no gap between the image space and the non moving space so that the
     // immune region won't break (eg. due to a large object allocated in the gap). This is only
     // required when we're the zygote or using GSS.
-    bool no_gap = MemMap::CheckNoGaps(GetBootImageSpace()->GetMemMap(),
-                                      non_moving_space_->GetMemMap());
+    // Space with smallest Begin().
+    space::ImageSpace* first_space = nullptr;
+    for (space::ImageSpace* space : boot_image_spaces_) {
+      if (first_space == nullptr || space->Begin() < first_space->Begin()) {
+        first_space = space;
+      }
+    }
+    bool no_gap = MemMap::CheckNoGaps(first_space->GetMemMap(), non_moving_space_->GetMemMap());
     if (!no_gap) {
       PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
       MemMap::DumpMaps(LOG(ERROR), true);
@@ -1202,8 +1249,8 @@
   return FindDiscontinuousSpaceFromObject(obj, fail_ok);
 }
 
-space::ImageSpace* Heap::GetBootImageSpace() const {
-  return boot_image_space_;
+std::vector<space::ImageSpace*> Heap::GetBootImageSpaces() const {
+  return boot_image_spaces_;
 }
 
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
@@ -2290,7 +2337,7 @@
   if (HasZygoteSpace()) {
     return;
   }
-  Runtime::Current()->GetInternTable()->SwapPostZygoteWithPreZygote();
+  Runtime::Current()->GetInternTable()->AddNewTable();
   Runtime::Current()->GetClassLinker()->MoveClassTableToPreZygote();
   VLOG(heap) << "Starting PreZygoteFork";
   // Trim the pages at the end of the non moving space.
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e23b1a3..e7ea983 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -580,9 +580,8 @@
   // Unbind any bound bitmaps.
   void UnBindBitmaps() REQUIRES(Locks::heap_bitmap_lock_);
 
-  // Returns the boot image space. There may be multiple image spaces, but there is only one boot
-  // image space.
-  space::ImageSpace* GetBootImageSpace() const;
+  // Returns the boot image spaces. There may be multiple boot image spaces.
+  std::vector<space::ImageSpace*> GetBootImageSpaces() const;
 
   // Permenantly disable moving garbage collection.
   void DisableMovingGc() REQUIRES(!*gc_complete_lock_);
@@ -660,8 +659,8 @@
   void RemoveRememberedSet(space::Space* space);
 
   bool IsCompilingBoot() const;
-  bool HasImageSpace() const {
-    return boot_image_space_ != nullptr;
+  bool HasBootImageSpace() const {
+    return !boot_image_spaces_.empty();
   }
 
   ReferenceProcessor* GetReferenceProcessor() {
@@ -1322,8 +1321,8 @@
   // allocating.
   bool gc_disabled_for_shutdown_ GUARDED_BY(gc_complete_lock_);
 
-  // Boot image space.
-  space::ImageSpace* boot_image_space_;
+  // Boot image spaces.
+  std::vector<space::ImageSpace*> boot_image_spaces_;
 
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 8f67c21..5f6bb8e 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -43,11 +43,18 @@
 
 Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
-ImageSpace::ImageSpace(const std::string& image_filename, const char* image_location,
-                       MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap,
+ImageSpace::ImageSpace(const std::string& image_filename,
+                       const char* image_location,
+                       MemMap* mem_map,
+                       accounting::ContinuousSpaceBitmap* live_bitmap,
                        uint8_t* end)
-    : MemMapSpace(image_filename, mem_map, mem_map->Begin(), end, end,
+    : MemMapSpace(image_filename,
+                  mem_map,
+                  mem_map->Begin(),
+                  end,
+                  end,
                   kGcRetentionPolicyNeverCollect),
+      oat_file_non_owned_(nullptr),
       image_location_(image_location) {
   DCHECK(live_bitmap != nullptr);
   live_bitmap_.reset(live_bitmap);
@@ -470,6 +477,7 @@
 
 ImageSpace* ImageSpace::Create(const char* image_location,
                                const InstructionSet image_isa,
+                               bool secondary_image,
                                std::string* error_msg) {
   std::string system_filename;
   bool has_system = false;
@@ -481,7 +489,7 @@
                                              &has_system, &cache_filename, &dalvik_cache_exists,
                                              &has_cache, &is_global_cache);
 
-  if (Runtime::Current()->IsZygote()) {
+  if (Runtime::Current()->IsZygote() && !secondary_image) {
     MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
   }
 
@@ -517,6 +525,9 @@
           } else if (!ImageCreationAllowed(is_global_cache, &reason)) {
             // Whether we can write to the cache.
             success = false;
+          } else if (secondary_image) {
+            reason = "Should not have to patch secondary image.";
+            success = false;
           } else {
             // Try to relocate.
             success = RelocateImage(image_location, cache_filename.c_str(), image_isa, &reason);
@@ -609,6 +620,9 @@
     return nullptr;
   } else if (!ImageCreationAllowed(is_global_cache, error_msg)) {
     return nullptr;
+  } else if (secondary_image) {
+    *error_msg = "Cannot compile a secondary image.";
+    return nullptr;
   } else if (!GenerateImage(cache_filename, image_isa, error_msg)) {
     *error_msg = StringPrintf("Failed to generate image '%s': %s",
                               cache_filename.c_str(), error_msg->c_str());
@@ -686,7 +700,7 @@
     return nullptr;
   }
 
-  if (kIsDebugBuild) {
+  if (VLOG_IS_ON(startup)) {
     LOG(INFO) << "Dumping image sections";
     for (size_t i = 0; i < ImageHeader::kSectionCount; ++i) {
       const auto section_idx = static_cast<ImageHeader::ImageSections>(i);
@@ -788,12 +802,14 @@
   uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1);
   std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename,
                                        bitmap_index));
+  // Bitmap only needs to cover until the end of the mirror objects section.
+  const ImageSection& image_objects = image_header.GetImageSection(ImageHeader::kSectionObjects);
   std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
       accounting::ContinuousSpaceBitmap::CreateFromMemMap(
           bitmap_name,
           image_bitmap_map.release(),
           reinterpret_cast<uint8_t*>(map->Begin()),
-          accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_section.Size())));
+          image_objects.End()));
   if (bitmap == nullptr) {
     *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
     return nullptr;
@@ -802,8 +818,11 @@
   // We only want the mirror object, not the ArtFields and ArtMethods.
   uint8_t* const image_end =
       map->Begin() + image_header.GetImageSection(ImageHeader::kSectionObjects).End();
-  std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename, image_location,
-                                                   map.release(), bitmap.release(), image_end));
+  std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename,
+                                                   image_location,
+                                                   map.release(),
+                                                   bitmap.release(),
+                                                   image_end));
 
   // VerifyImageAllocations() will be called later in Runtime::Init()
   // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
@@ -826,16 +845,18 @@
   Runtime* runtime = Runtime::Current();
   runtime->SetInstructionSet(space->oat_file_->GetOatHeader().GetInstructionSet());
 
-  runtime->SetResolutionMethod(image_header.GetImageMethod(ImageHeader::kResolutionMethod));
-  runtime->SetImtConflictMethod(image_header.GetImageMethod(ImageHeader::kImtConflictMethod));
-  runtime->SetImtUnimplementedMethod(
-      image_header.GetImageMethod(ImageHeader::kImtUnimplementedMethod));
-  runtime->SetCalleeSaveMethod(
-      image_header.GetImageMethod(ImageHeader::kCalleeSaveMethod), Runtime::kSaveAll);
-  runtime->SetCalleeSaveMethod(
-      image_header.GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
-  runtime->SetCalleeSaveMethod(
-      image_header.GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
+  if (!runtime->HasResolutionMethod()) {
+    runtime->SetResolutionMethod(image_header.GetImageMethod(ImageHeader::kResolutionMethod));
+    runtime->SetImtConflictMethod(image_header.GetImageMethod(ImageHeader::kImtConflictMethod));
+    runtime->SetImtUnimplementedMethod(
+        image_header.GetImageMethod(ImageHeader::kImtUnimplementedMethod));
+    runtime->SetCalleeSaveMethod(
+        image_header.GetImageMethod(ImageHeader::kCalleeSaveMethod), Runtime::kSaveAll);
+    runtime->SetCalleeSaveMethod(
+        image_header.GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly);
+    runtime->SetCalleeSaveMethod(
+        image_header.GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs);
+  }
 
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "ImageSpace::Init exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -920,6 +941,67 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
+void ImageSpace::CreateMultiImageLocations(const std::string& input_image_file_name,
+                                           const std::string& boot_classpath,
+                                           std::vector<std::string>* image_file_names) {
+  DCHECK(image_file_names != nullptr);
+
+  std::vector<std::string> images;
+  Split(boot_classpath, ':', &images);
+
+  // Add the rest into the list. We have to adjust locations, possibly:
+  //
+  // For example, image_file_name is /a/b/c/d/e.art
+  //              images[0] is          f/c/d/e.art
+  // ----------------------------------------------
+  //              images[1] is          g/h/i/j.art  -> /a/b/h/i/j.art
+
+  // Derive pattern.
+  std::vector<std::string> left;
+  Split(input_image_file_name, '/', &left);
+  std::vector<std::string> right;
+  Split(images[0], '/', &right);
+
+  size_t common = 1;
+  while (common < left.size() && common < right.size()) {
+    if (left[left.size() - common - 1] != right[right.size() - common - 1]) {
+      break;
+    }
+    common++;
+  }
+
+  std::vector<std::string> prefix_vector(left.begin(), left.end() - common);
+  std::string common_prefix = Join(prefix_vector, '/');
+  if (!common_prefix.empty() && common_prefix[0] != '/' && input_image_file_name[0] == '/') {
+    common_prefix = "/" + common_prefix;
+  }
+
+  // Apply pattern to images[1] .. images[n].
+  for (size_t i = 1; i < images.size(); ++i) {
+    std::string image = images[i];
+
+    size_t rslash = std::string::npos;
+    for (size_t j = 0; j < common; ++j) {
+      if (rslash != std::string::npos) {
+        rslash--;
+      }
+
+      rslash = image.rfind('/', rslash);
+      if (rslash == std::string::npos) {
+        rslash = 0;
+      }
+      if (rslash == 0) {
+        break;
+      }
+    }
+    std::string image_part = image.substr(rslash);
+
+    std::string new_image = common_prefix + (StartsWith(image_part, "/") ? "" : "/") +
+        image_part;
+    image_file_names->push_back(new_image);
+  }
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index babd672..9c8e8b2 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -43,7 +43,10 @@
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* Create(const char* image, InstructionSet image_isa, std::string* error_msg)
+  static ImageSpace* Create(const char* image,
+                            InstructionSet image_isa,
+                            bool secondary_image,
+                            std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Reads the image header from the specified image location for the
@@ -119,6 +122,12 @@
                                 bool* has_data,
                                 bool *is_global_cache);
 
+  // Use the input image filename to adapt the names in the given boot classpath to establish
+  // complete locations for secondary images.
+  static void CreateMultiImageLocations(const std::string& input_image_file_name,
+                                        const std::string& boot_classpath,
+                                        std::vector<std::string>* image_filenames);
+
   // Return the end of the image which includes non-heap objects such as ArtMethods and ArtFields.
   uint8_t* GetImageEnd() const {
     return Begin() + GetImageHeader().GetImageSize();
@@ -158,8 +167,11 @@
 
   std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
 
-  ImageSpace(const std::string& name, const char* image_location,
-             MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap, uint8_t* end);
+  ImageSpace(const std::string& name,
+             const char* image_location,
+             MemMap* mem_map,
+             accounting::ContinuousSpaceBitmap* live_bitmap,
+             uint8_t* end);
 
   // The OatFile associated with the image during early startup to
   // reserve space contiguous to the image. It is later released to
diff --git a/runtime/image.cc b/runtime/image.cc
index f8f930b..3cb6642 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '4', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '5', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
@@ -55,7 +55,6 @@
   CHECK_EQ(image_begin, RoundUp(image_begin, kPageSize));
   CHECK_EQ(oat_file_begin, RoundUp(oat_file_begin, kPageSize));
   CHECK_EQ(oat_data_begin, RoundUp(oat_data_begin, kPageSize));
-  CHECK_LT(image_begin, image_roots);
   CHECK_LT(image_roots, oat_file_begin);
   CHECK_LE(oat_file_begin, oat_data_begin);
   CHECK_LT(oat_data_begin, oat_data_end);
@@ -100,9 +99,6 @@
   if (oat_file_begin_ >= oat_data_begin_) {
     return false;
   }
-  if (image_roots_ <= image_begin_ || oat_file_begin_ <= image_roots_) {
-    return false;
-  }
   if (!IsAligned<kPageSize>(patch_delta_)) {
     return false;
   }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 726cf1b..5e0a11d 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -417,6 +417,13 @@
                !Locks::classlinker_classes_lock_);
 
   void UpdateInterpreterHandlerTable() REQUIRES(Locks::mutator_lock_) {
+    /*
+     * TUNING: Dalvik's mterp stashes the actual current handler table base in a
+     * tls field.  For Arm, this enables all suspend, debug & tracing checks to be
+     * collapsed into a single conditionally-executed ldw instruction.
+     * Move to Dalvik-style handler-table management for both the goto interpreter and
+     * mterp.
+     */
     interpreter_handler_table_ = IsActive() ? kAlternativeHandlerTable : kMainHandlerTable;
   }
 
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index e2e4782..015bf98 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -32,7 +32,8 @@
 namespace art {
 
 InternTable::InternTable()
-    : image_added_to_intern_table_(false), log_new_roots_(false),
+    : images_added_to_intern_table_(false),
+      log_new_roots_(false),
       weak_intern_condition_("New intern condition", *Locks::intern_table_lock_),
       weak_root_state_(gc::kWeakRootStateNormal) {
 }
@@ -93,10 +94,10 @@
   return weak_interns_.Find(s);
 }
 
-void InternTable::SwapPostZygoteWithPreZygote() {
+void InternTable::AddNewTable() {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  weak_interns_.SwapPostZygoteWithPreZygote();
-  strong_interns_.SwapPostZygoteWithPreZygote();
+  weak_interns_.AddNewTable();
+  strong_interns_.AddNewTable();
 }
 
 mirror::String* InternTable::InsertStrong(mirror::String* s) {
@@ -150,15 +151,14 @@
   RemoveWeak(s);
 }
 
-void InternTable::AddImageStringsToTable(gc::space::ImageSpace* image_space) {
-  CHECK(image_space != nullptr);
+void InternTable::AddImagesStringsToTable(const std::vector<gc::space::ImageSpace*>& image_spaces) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  if (!image_added_to_intern_table_) {
+  for (gc::space::ImageSpace* image_space : image_spaces) {
     const ImageHeader* const header = &image_space->GetImageHeader();
     // Check if we have the interned strings section.
     const ImageSection& section = header->GetImageSection(ImageHeader::kSectionInternedStrings);
     if (section.Size() > 0) {
-      ReadFromMemoryLocked(image_space->Begin() + section.Offset());
+      AddTableFromMemoryLocked(image_space->Begin() + section.Offset());
     } else {
       // TODO: Delete this logic?
       mirror::Object* root = header->GetImageRoot(ImageHeader::kDexCaches);
@@ -179,32 +179,33 @@
         }
       }
     }
-    image_added_to_intern_table_ = true;
   }
+  images_added_to_intern_table_ = true;
 }
 
 mirror::String* InternTable::LookupStringFromImage(mirror::String* s) {
-  if (image_added_to_intern_table_) {
-    return nullptr;
-  }
-  gc::space::ImageSpace* image = Runtime::Current()->GetHeap()->GetBootImageSpace();
-  if (image == nullptr) {
+  DCHECK(!images_added_to_intern_table_);
+  const std::vector<gc::space::ImageSpace*>& image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  if (image_spaces.empty()) {
     return nullptr;  // No image present.
   }
-  mirror::Object* root = image->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
   const std::string utf8 = s->ToModifiedUtf8();
-  for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-    mirror::DexCache* dex_cache = dex_caches->Get(i);
-    const DexFile* dex_file = dex_cache->GetDexFile();
-    // Binary search the dex file for the string index.
-    const DexFile::StringId* string_id = dex_file->FindStringId(utf8.c_str());
-    if (string_id != nullptr) {
-      uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
-      // GetResolvedString() contains a RB.
-      mirror::String* image_string = dex_cache->GetResolvedString(string_idx);
-      if (image_string != nullptr) {
-        return image_string;
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
+    mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
+    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      // Binary search the dex file for the string index.
+      const DexFile::StringId* string_id = dex_file->FindStringId(utf8.c_str());
+      if (string_id != nullptr) {
+        uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
+        // GetResolvedString() contains a RB.
+        mirror::String* image_string = dex_cache->GetResolvedString(string_idx);
+        if (image_string != nullptr) {
+          return image_string;
+        }
       }
     }
   }
@@ -281,9 +282,11 @@
     return weak;
   }
   // Check the image for a match.
-  mirror::String* image = LookupStringFromImage(s);
-  if (image != nullptr) {
-    return is_strong ? InsertStrong(image) : InsertWeak(image);
+  if (!images_added_to_intern_table_) {
+    mirror::String* const image_string = LookupStringFromImage(s);
+    if (image_string != nullptr) {
+      return is_strong ? InsertStrong(image_string) : InsertWeak(image_string);
+    }
   }
   // No match in the strong table or the weak table. Insert into the strong / weak table.
   return is_strong ? InsertStrong(s) : InsertWeak(s);
@@ -323,27 +326,18 @@
   weak_interns_.SweepWeaks(visitor);
 }
 
-void InternTable::AddImageInternTable(gc::space::ImageSpace* image_space) {
-  const ImageSection& intern_section = image_space->GetImageHeader().GetImageSection(
-      ImageHeader::kSectionInternedStrings);
-  // Read the string tables from the image.
-  const uint8_t* ptr = image_space->Begin() + intern_section.Offset();
-  const size_t offset = ReadFromMemory(ptr);
-  CHECK_LE(offset, intern_section.Size());
-}
-
-size_t InternTable::ReadFromMemory(const uint8_t* ptr) {
+size_t InternTable::AddTableFromMemory(const uint8_t* ptr) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return ReadFromMemoryLocked(ptr);
+  return AddTableFromMemoryLocked(ptr);
 }
 
-size_t InternTable::ReadFromMemoryLocked(const uint8_t* ptr) {
-  return strong_interns_.ReadIntoPreZygoteTable(ptr);
+size_t InternTable::AddTableFromMemoryLocked(const uint8_t* ptr) {
+  return strong_interns_.AddTableFromMemory(ptr);
 }
 
 size_t InternTable::WriteToMemory(uint8_t* ptr) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return strong_interns_.WriteFromPostZygoteTable(ptr);
+  return strong_interns_.WriteToMemory(ptr);
 }
 
 std::size_t InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& root) const {
@@ -361,71 +355,87 @@
   return a.Read()->Equals(b.Read());
 }
 
-size_t InternTable::Table::ReadIntoPreZygoteTable(const uint8_t* ptr) {
-  CHECK_EQ(pre_zygote_table_.Size(), 0u);
+size_t InternTable::Table::AddTableFromMemory(const uint8_t* ptr) {
   size_t read_count = 0;
-  pre_zygote_table_ = UnorderedSet(ptr, false /* make copy */, &read_count);
+  UnorderedSet set(ptr, /*make copy*/false, &read_count);
+  // TODO: Disable this for app images if app images have intern tables.
+  static constexpr bool kCheckDuplicates = true;
+  if (kCheckDuplicates) {
+    for (GcRoot<mirror::String>& string : set) {
+      CHECK(Find(string.Read()) == nullptr) << "Already found " << string.Read()->ToModifiedUtf8();
+    }
+  }
+  // Insert at the front since we insert into the back.
+  tables_.insert(tables_.begin(), std::move(set));
   return read_count;
 }
 
-size_t InternTable::Table::WriteFromPostZygoteTable(uint8_t* ptr) {
-  return post_zygote_table_.WriteToMemory(ptr);
+size_t InternTable::Table::WriteToMemory(uint8_t* ptr) {
+  if (tables_.empty()) {
+    return 0;
+  }
+  UnorderedSet* table_to_write;
+  UnorderedSet combined;
+  if (tables_.size() > 1) {
+    table_to_write = &combined;
+    for (UnorderedSet& table : tables_) {
+      for (GcRoot<mirror::String>& string : table) {
+        combined.Insert(string);
+      }
+    }
+  } else {
+    table_to_write = &tables_.back();
+  }
+  return table_to_write->WriteToMemory(ptr);
 }
 
 void InternTable::Table::Remove(mirror::String* s) {
-  auto it = post_zygote_table_.Find(GcRoot<mirror::String>(s));
-  if (it != post_zygote_table_.end()) {
-    post_zygote_table_.Erase(it);
-  } else {
-    it = pre_zygote_table_.Find(GcRoot<mirror::String>(s));
-    DCHECK(it != pre_zygote_table_.end());
-    pre_zygote_table_.Erase(it);
+  for (UnorderedSet& table : tables_) {
+    auto it = table.Find(GcRoot<mirror::String>(s));
+    if (it != table.end()) {
+      table.Erase(it);
+      return;
+    }
   }
+  LOG(FATAL) << "Attempting to remove non-interned string " << s->ToModifiedUtf8();
 }
 
 mirror::String* InternTable::Table::Find(mirror::String* s) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
-  auto it = pre_zygote_table_.Find(GcRoot<mirror::String>(s));
-  if (it != pre_zygote_table_.end()) {
-    return it->Read();
-  }
-  it = post_zygote_table_.Find(GcRoot<mirror::String>(s));
-  if (it != post_zygote_table_.end()) {
-    return it->Read();
+  for (UnorderedSet& table : tables_) {
+    auto it = table.Find(GcRoot<mirror::String>(s));
+    if (it != table.end()) {
+      return it->Read();
+    }
   }
   return nullptr;
 }
 
-void InternTable::Table::SwapPostZygoteWithPreZygote() {
-  if (pre_zygote_table_.Empty()) {
-    std::swap(pre_zygote_table_, post_zygote_table_);
-    VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table";
-  } else {
-    // This case happens if read the intern table from the image.
-    VLOG(heap) << "Not swapping due to non-empty pre_zygote_table_";
-  }
+void InternTable::Table::AddNewTable() {
+  tables_.push_back(UnorderedSet());
 }
 
 void InternTable::Table::Insert(mirror::String* s) {
-  // Always insert the post zygote table, this gets swapped when we create the zygote to be the
-  // pre zygote table.
-  post_zygote_table_.Insert(GcRoot<mirror::String>(s));
+  // Always insert the last table, the image tables are before and we avoid inserting into these
+  // to prevent dirty pages.
+  DCHECK(!tables_.empty());
+  tables_.back().Insert(GcRoot<mirror::String>(s));
 }
 
 void InternTable::Table::VisitRoots(RootVisitor* visitor) {
   BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
       visitor, RootInfo(kRootInternedString));
-  for (auto& intern : pre_zygote_table_) {
-    buffered_visitor.VisitRoot(intern);
-  }
-  for (auto& intern : post_zygote_table_) {
-    buffered_visitor.VisitRoot(intern);
+  for (UnorderedSet& table : tables_) {
+    for (auto& intern : table) {
+      buffered_visitor.VisitRoot(intern);
+    }
   }
 }
 
 void InternTable::Table::SweepWeaks(IsMarkedVisitor* visitor) {
-  SweepWeaks(&pre_zygote_table_, visitor);
-  SweepWeaks(&post_zygote_table_, visitor);
+  for (UnorderedSet& table : tables_) {
+    SweepWeaks(&table, visitor);
+  }
 }
 
 void InternTable::Table::SweepWeaks(UnorderedSet* set, IsMarkedVisitor* visitor) {
@@ -443,7 +453,12 @@
 }
 
 size_t InternTable::Table::Size() const {
-  return pre_zygote_table_.Size() + post_zygote_table_.Size();
+  return std::accumulate(tables_.begin(),
+                         tables_.end(),
+                         0U,
+                         [](size_t sum, const UnorderedSet& set) {
+                           return sum + set.Size();
+                         });
 }
 
 void InternTable::ChangeWeakRootState(gc::WeakRootState new_state) {
@@ -461,10 +476,10 @@
 
 InternTable::Table::Table() {
   Runtime* const runtime = Runtime::Current();
-  pre_zygote_table_.SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
-                                  runtime->GetHashTableMaxLoadFactor());
-  post_zygote_table_.SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
-                                   runtime->GetHashTableMaxLoadFactor());
+  // Initial table.
+  tables_.push_back(UnorderedSet());
+  tables_.back().SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
+                               runtime->GetHashTableMaxLoadFactor());
 }
 
 }  // namespace art
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 3a4e8d8..8f715a3 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -98,22 +98,20 @@
 
   void BroadcastForNewInterns() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Adds all of the resolved image strings from the image space into the intern table. The
-  // advantage of doing this is preventing expensive DexFile::FindStringId calls.
-  void AddImageStringsToTable(gc::space::ImageSpace* image_space)
+  // Adds all of the resolved image strings from the image spaces into the intern table. The
+  // advantage of doing this is preventing expensive DexFile::FindStringId calls. Sets
+  // images_added_to_intern_table_ to true.
+  void AddImagesStringsToTable(const std::vector<gc::space::ImageSpace*>& image_spaces)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::intern_table_lock_);
 
-  // Copy the post zygote tables to pre zygote to save memory by preventing dirty pages.
-  void SwapPostZygoteWithPreZygote()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::intern_table_lock_);
-
-  // Add an intern table which was serialized to the image.
-  void AddImageInternTable(gc::space::ImageSpace* image_space)
+  // Add a new intern table for inserting to, previous intern tables are still there but no
+  // longer inserted into and ideally unmodified. This is done to prevent dirty pages.
+  void AddNewTable()
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::intern_table_lock_);
 
   // Read the intern table from memory. The elements aren't copied, the intern hash set data will
   // point to somewhere within ptr. Only reads the strong interns.
-  size_t ReadFromMemory(const uint8_t* ptr) REQUIRES(!Locks::intern_table_lock_)
+  size_t AddTableFromMemory(const uint8_t* ptr) REQUIRES(!Locks::intern_table_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Write the post zygote intern table to a pointer. Only writes the strong interns since it is
@@ -157,15 +155,17 @@
         SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
     void SweepWeaks(IsMarkedVisitor* visitor)
         SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-    void SwapPostZygoteWithPreZygote() REQUIRES(Locks::intern_table_lock_);
+    // Add a new intern table that will only be inserted into from now on.
+    void AddNewTable() REQUIRES(Locks::intern_table_lock_);
     size_t Size() const REQUIRES(Locks::intern_table_lock_);
-    // Read pre zygote table is called from ReadFromMemory which happens during runtime creation
-    // when we load the image intern table. Returns how many bytes were read.
-    size_t ReadIntoPreZygoteTable(const uint8_t* ptr)
+    // Read and add an intern table from ptr.
+    // Tables read are inserted at the front of the table array. Only checks for conflicts in
+    // debug builds. Returns how many bytes were read.
+    size_t AddTableFromMemory(const uint8_t* ptr)
         REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
-    // The image writer calls WritePostZygoteTable through WriteToMemory, it writes the interns in
-    // the post zygote table. Returns how many bytes were written.
-    size_t WriteFromPostZygoteTable(uint8_t* ptr)
+    // Write the intern tables to ptr, if there are multiple tables they are combined into a single
+    // one. Returns how many bytes were written.
+    size_t WriteToMemory(uint8_t* ptr)
         REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
    private:
@@ -175,12 +175,9 @@
     void SweepWeaks(UnorderedSet* set, IsMarkedVisitor* visitor)
         SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
-    // We call SwapPostZygoteWithPreZygote when we create the zygote to reduce private dirty pages
-    // caused by modifying the zygote intern table hash table. The pre zygote table are the
-    // interned strings which were interned before we created the zygote space. Post zygote is self
-    // explanatory.
-    UnorderedSet pre_zygote_table_;
-    UnorderedSet post_zygote_table_;
+    // We call AddNewTable when we create the zygote to reduce private dirty pages caused by
+    // modifying the zygote intern table. The back of table is modified when strings are interned.
+    std::vector<UnorderedSet> tables_;
   };
 
   // Insert if non null, otherwise return null. Must be called holding the mutator lock.
@@ -214,7 +211,7 @@
   void RemoveWeakFromTransaction(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
-  size_t ReadFromMemoryLocked(const uint8_t* ptr)
+  size_t AddTableFromMemoryLocked(const uint8_t* ptr)
       REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Change the weak root state. May broadcast to waiters.
@@ -225,7 +222,7 @@
   void WaitUntilAccessible(Thread* self)
       REQUIRES(Locks::intern_table_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool image_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
+  bool images_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable weak_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
   // Since this contains (strong) roots, they need a read barrier to
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 871fad7..47e2e98 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -18,12 +18,14 @@
 
 #include <limits>
 
+#include "common_throws.h"
 #include "interpreter_common.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "stack.h"
 #include "unstarted_runtime.h"
+#include "mterp/mterp.h"
 
 namespace art {
 namespace interpreter {
@@ -223,19 +225,33 @@
 }
 
 enum InterpreterImplKind {
-  kSwitchImpl,            // Switch-based interpreter implementation.
-  kComputedGotoImplKind   // Computed-goto-based interpreter implementation.
+  kSwitchImplKind,        // Switch-based interpreter implementation.
+  kComputedGotoImplKind,  // Computed-goto-based interpreter implementation.
+  kMterpImplKind          // Assembly interpreter
 };
 static std::ostream& operator<<(std::ostream& os, const InterpreterImplKind& rhs) {
-  os << ((rhs == kSwitchImpl) ? "Switch-based interpreter" : "Computed-goto-based interpreter");
+  os << ((rhs == kSwitchImplKind)
+              ? "Switch-based interpreter"
+              : (rhs == kComputedGotoImplKind)
+                  ? "Computed-goto-based interpreter"
+                  : "Asm interpreter");
   return os;
 }
 
 #if !defined(__clang__)
+#if defined(__arm__)
+// TODO: remove when all targets implemented.
+static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
+#else
 static constexpr InterpreterImplKind kInterpreterImplKind = kComputedGotoImplKind;
+#endif
 #else
 // Clang 3.4 fails to build the goto interpreter implementation.
-static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImpl;
+#if defined(__arm__)
+static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
+#else
+static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImplKind;
+#endif
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
   LOG(FATAL) << "UNREACHABLE";
@@ -262,18 +278,52 @@
 
 static inline JValue Execute(Thread* self, const DexFile::CodeItem* code_item,
                              ShadowFrame& shadow_frame, JValue result_register) {
-  DCHECK(shadow_frame.GetMethod()->IsInvokable());
+  DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
   shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   bool transaction_active = Runtime::Current()->IsActiveTransaction();
   if (LIKELY(shadow_frame.GetMethod()->IsPreverified())) {
     // Enter the "without access check" interpreter.
-    if (kInterpreterImplKind == kSwitchImpl) {
+    if (kInterpreterImplKind == kMterpImplKind) {
       if (transaction_active) {
-        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register);
+        // No Mterp variant - just use the switch interpreter.
+        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
+                                              false);
       } else {
-        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register);
+        const instrumentation::Instrumentation* const instrumentation =
+            Runtime::Current()->GetInstrumentation();
+        while (true) {
+          if (instrumentation->IsActive()) {
+            // TODO: allow JIT profiling instrumentation.  Now, just punt on all instrumentation.
+#if !defined(__clang__)
+            return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
+#else
+            return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                                   false);
+#endif
+          }
+          bool returned = ExecuteMterpImpl(self, code_item, &shadow_frame, &result_register);
+          if (returned) {
+            return result_register;
+          } else {
+            // Mterp didn't like that instruction.  Single-step it with the reference interpreter.
+            JValue res = ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame,
+                                                               result_register, true);
+            if (shadow_frame.GetDexPC() == DexFile::kDexNoIndex) {
+              // Single-stepped a return or an exception not handled locally.  Return to caller.
+              return res;
+            }
+          }
+        }
+      }
+    } else if (kInterpreterImplKind == kSwitchImplKind) {
+      if (transaction_active) {
+        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,
+                                              false);
+      } else {
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
+                                               false);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
@@ -285,11 +335,22 @@
     }
   } else {
     // Enter the "with access check" interpreter.
-    if (kInterpreterImplKind == kSwitchImpl) {
+    if (kInterpreterImplKind == kMterpImplKind) {
+      // No access check variants for Mterp.  Just use the switch version.
       if (transaction_active) {
-        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register,
+                                             false);
       } else {
-        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register,
+                                              false);
+      }
+    } else if (kInterpreterImplKind == kSwitchImplKind) {
+      if (transaction_active) {
+        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register,
+                                             false);
+      } else {
+        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register,
+                                              false);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
@@ -500,5 +561,13 @@
   self->PopShadowFrame();
 }
 
+void CheckInterpreterAsmConstants() {
+  CheckMterpAsmConstants();
+}
+
+void InitInterpreterTls(Thread* self) {
+  InitMterpTls(self);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 8e7f3da..6353a9b 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -50,6 +50,11 @@
                                        ShadowFrame* shadow_frame, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+// One-time sanity check.
+void CheckInterpreterAsmConstants();
+
+void InitInterpreterTls(Thread* self);
+
 }  // namespace interpreter
 
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 9f6699f..932d255 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -67,16 +67,21 @@
 namespace art {
 namespace interpreter {
 
-// External references to both interpreter implementations.
+// External references to all interpreter implementations.
 
 template<bool do_access_check, bool transaction_active>
 extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                                ShadowFrame& shadow_frame, JValue result_register);
+                                ShadowFrame& shadow_frame, JValue result_register,
+                                bool interpret_one_instruction);
 
 template<bool do_access_check, bool transaction_active>
 extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item,
                               ShadowFrame& shadow_frame, JValue result_register);
 
+// Mterp does not support transactions or access check, thus no templated versions.
+extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame, JValue* result_register);
+
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index c9831e6..bab0d40 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -35,6 +35,9 @@
       /* Structured locking is to be enforced for abnormal termination, too. */                 \
       shadow_frame.GetLockCountData().                                                          \
           CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
+      if (interpret_one_instruction) {                                                          \
+        shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                            \
+      }                                                                                         \
       return JValue(); /* Handled in caller. */                                                 \
     } else {                                                                                    \
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc); \
@@ -78,7 +81,8 @@
 
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
-                         ShadowFrame& shadow_frame, JValue result_register) {
+                         ShadowFrame& shadow_frame, JValue result_register,
+                         bool interpret_one_instruction) {
   constexpr bool do_assignability_check = do_access_check;
   if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
     LOG(FATAL) << "Invalid shadow frame for interpreter use";
@@ -105,7 +109,7 @@
   // to keep this live for the scope of the entire function call.
   std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
   size_t lambda_captured_variable_index = 0;
-  while (true) {
+  do {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
     TraceExecution(shadow_frame, inst, dex_pc);
@@ -203,6 +207,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_VOID: {
@@ -216,6 +223,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN: {
@@ -230,6 +240,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_WIDE: {
@@ -243,6 +256,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::RETURN_OBJECT: {
@@ -278,6 +294,9 @@
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
         }
+        if (interpret_one_instruction) {
+          shadow_frame.SetDexPC(DexFile::kDexNoIndex);
+        }
         return result;
       }
       case Instruction::CONST_4: {
@@ -2370,22 +2389,29 @@
       case Instruction::UNUSED_7A:
         UnexpectedOpcode(inst, shadow_frame);
     }
-  }
+  } while (!interpret_one_instruction);
+  // Record where we stopped.
+  shadow_frame.SetDexPC(inst->GetDexPc(insns));
+  return JValue();
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteSwitchImpl.
 template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
 JValue ExecuteSwitchImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                      ShadowFrame& shadow_frame, JValue result_register);
+                                      ShadowFrame& shadow_frame, JValue result_register,
+                                      bool interpret_one_instruction);
 template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR
 JValue ExecuteSwitchImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
-                                       ShadowFrame& shadow_frame, JValue result_register);
+                                       ShadowFrame& shadow_frame, JValue result_register,
+                                       bool interpret_one_instruction);
 template SHARED_REQUIRES(Locks::mutator_lock_)
 JValue ExecuteSwitchImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
+                                     ShadowFrame& shadow_frame, JValue result_register,
+                                     bool interpret_one_instruction);
 template SHARED_REQUIRES(Locks::mutator_lock_)
 JValue ExecuteSwitchImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
-                                      ShadowFrame& shadow_frame, JValue result_register);
+                                      ShadowFrame& shadow_frame, JValue result_register,
+                                      bool interpret_one_instruction);
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/Makefile_mterp b/runtime/interpreter/mterp/Makefile_mterp
new file mode 100644
index 0000000..f0c30ad
--- /dev/null
+++ b/runtime/interpreter/mterp/Makefile_mterp
@@ -0,0 +1,49 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Makefile for the Art fast interpreter.  This is not currently
+# integrated into the build system.
+#
+
+SHELL := /bin/sh
+
+# Build system has TARGET_ARCH=arm, but we can support the exact architecture
+# if it is worthwhile.
+#
+# To generate sources:
+# for arch in arm arm64 x86 x86_64 mips mips64
+# do
+#   TARGET_ARCH_EXT=$arch make -f Makefile-mterp
+# done
+#
+
+OUTPUT_DIR := out
+
+# Accumulate all possible dependencies for the generated files in a very
+# conservative fashion.  If it's not one of the generated files in "out",
+# assume it's a dependency.
+SOURCE_DEPS := \
+	$(shell find . -path ./$(OUTPUT_DIR) -prune -o -type f -print) \
+
+# Source files generated by the script.  There's always one C and one
+# assembly file, though in practice one or the other could be empty.
+GEN_SOURCES := \
+	$(OUTPUT_DIR)/interp_asm_$(TARGET_ARCH_EXT).S
+
+target: $(GEN_SOURCES)
+
+$(GEN_SOURCES): $(SOURCE_DEPS)
+	@mkdir -p out
+	./gen_mterp.py $(TARGET_ARCH_EXT) $(OUTPUT_DIR)
diff --git a/runtime/interpreter/mterp/README.txt b/runtime/interpreter/mterp/README.txt
new file mode 100644
index 0000000..19e02be
--- /dev/null
+++ b/runtime/interpreter/mterp/README.txt
@@ -0,0 +1,197 @@
+rt "mterp" README
+
+NOTE: Find rebuilding instructions at the bottom of this file.
+
+
+==== Overview ====
+
+Every configuration has a "config-*" file that controls how the sources
+are generated.  The sources are written into the "out" directory, where
+they are picked up by the Android build system.
+
+The best way to become familiar with the interpreter is to look at the
+generated files in the "out" directory.
+
+
+==== Config file format ====
+
+The config files are parsed from top to bottom.  Each line in the file
+may be blank, hold a comment (line starts with '#'), or be a command.
+
+The commands are:
+
+  handler-style <computed-goto|jump-table>
+
+    Specify which style of interpreter to generate.  In computed-goto,
+    each handler is allocated a fixed region, allowing transitions to
+    be done via table-start-address + (opcode * handler-size). With
+    jump-table style, handlers may be of any length, and the generated
+    table is an array of pointers to the handlers.  This command is required,
+    and must be the first command in the config file.
+
+  handler-size <bytes>
+
+    Specify the size of the fixed region, in bytes.  On most platforms
+    this will need to be a power of 2.  For jump-table implementations,
+    this command is ignored.
+
+  import <filename>
+
+    The specified file is included immediately, in its entirety.  No
+    substitutions are performed.  ".cpp" and ".h" files are copied to the
+    C output, ".S" files are copied to the asm output.
+
+  asm-alt-stub <filename>
+
+    When present, this command will cause the generation of an alternate
+    set of entry points (for computed-goto interpreters) or an alternate
+    jump table (for jump-table interpreters).
+
+  fallback-stub <filename>
+
+    Specifies a file to be used for the special FALLBACK tag on the "op"
+    command below.  Intended to be used to transfer control to an alternate
+    interpreter to single-step a not-yet-implemented opcode.  Note: should
+    note be used on RETURN-class instructions.
+
+  op-start <directory>
+
+    Indicates the start of the opcode list.  Must precede any "op"
+    commands.  The specified directory is the default location to pull
+    instruction files from.
+
+  op <opcode> <directory>|FALLBACK
+
+    Can only appear after "op-start" and before "op-end".  Overrides the
+    default source file location of the specified opcode.  The opcode
+    definition will come from the specified file, e.g. "op OP_NOP arm"
+    will load from "arm/OP_NOP.S".  A substitution dictionary will be
+    applied (see below).  If the special "FALLBACK" token is used instead of
+    a directory name, the source file specified in fallback-stub will instead
+    be used for this opcode.
+
+  alt <opcode> <directory>
+
+    Can only appear after "op-start" and before "op-end".  Similar to the
+    "op" command above, but denotes a source file to override the entry
+    in the alternate handler table.  The opcode definition will come from
+    the specified file, e.g. "alt OP_NOP arm" will load from
+    "arm/ALT_OP_NOP.S".  A substitution dictionary will be applied
+    (see below).
+
+  op-end
+
+    Indicates the end of the opcode list.  All kNumPackedOpcodes
+    opcodes are emitted when this is seen, followed by any code that
+    didn't fit inside the fixed-size instruction handler space.
+
+The order of "op" and "alt" directives are not significant; the generation
+tool will extract ordering info from the VM sources.
+
+Typically the form in which most opcodes currently exist is used in
+the "op-start" directive.
+
+==== Instruction file format ====
+
+The assembly instruction files are simply fragments of assembly sources.
+The starting label will be provided by the generation tool, as will
+declarations for the segment type and alignment.  The expected target
+assembler is GNU "as", but others will work (may require fiddling with
+some of the pseudo-ops emitted by the generation tool).
+
+A substitution dictionary is applied to all opcode fragments as they are
+appended to the output.  Substitutions can look like "$value" or "${value}".
+
+The dictionary always includes:
+
+  $opcode - opcode name, e.g. "OP_NOP"
+  $opnum - opcode number, e.g. 0 for OP_NOP
+  $handler_size_bytes - max size of an instruction handler, in bytes
+  $handler_size_bits - max size of an instruction handler, log 2
+
+Both C and assembly sources will be passed through the C pre-processor,
+so you can take advantage of C-style comments and preprocessor directives
+like "#define".
+
+Some generator operations are available.
+
+  %include "filename" [subst-dict]
+
+    Includes the file, which should look like "arm/OP_NOP.S".  You can
+    specify values for the substitution dictionary, using standard Python
+    syntax.  For example, this:
+      %include "arm/unop.S" {"result":"r1"}
+    would insert "arm/unop.S" at the current file position, replacing
+    occurrences of "$result" with "r1".
+
+  %default <subst-dict>
+
+    Specify default substitution dictionary values, using standard Python
+    syntax.  Useful if you want to have a "base" version and variants.
+
+  %break
+
+    Identifies the split between the main portion of the instruction
+    handler (which must fit in "handler-size" bytes) and the "sister"
+    code, which is appended to the end of the instruction handler block.
+    In jump table implementations, %break is ignored.
+
+The generation tool does *not* print a warning if your instructions
+exceed "handler-size", but the VM will abort on startup if it detects an
+oversized handler.  On architectures with fixed-width instructions this
+is easy to work with, on others this you will need to count bytes.
+
+
+==== Using C constants from assembly sources ====
+
+The file "art/runtime/asm_support.h" has some definitions for constant
+values, structure sizes, and struct member offsets.  The format is fairly
+restricted, as simple macros are used to massage it for use with both C
+(where it is verified) and assembly (where the definitions are used).
+
+If a constant in the file becomes out of sync, the VM will log an error
+message and abort during startup.
+
+
+==== Development tips ====
+
+If you need to debug the initial piece of an opcode handler, and your
+debug code expands it beyond the handler size limit, you can insert a
+generic header at the top:
+
+    b       ${opcode}_start
+%break
+${opcode}_start:
+
+If you already have a %break, it's okay to leave it in place -- the second
+%break is ignored.
+
+
+==== Rebuilding ====
+
+If you change any of the source file fragments, you need to rebuild the
+combined source files in the "out" directory.  Make sure the files in
+"out" are editable, then:
+
+    $ cd mterp
+    $ ./rebuild.sh
+
+The ultimate goal is to have the build system generate the necessary
+output files without requiring this separate step, but we're not yet
+ready to require Python in the build.
+
+==== Interpreter Control ====
+
+The mterp fast interpreter achieves much of its performance advantage
+over the C++ interpreter through its efficient mechanism of
+transitioning from one Dalvik bytecode to the next.  Mterp for ARM targets
+uses a computed-goto mechanism, in which the handler entrypoints are
+located at the base of the handler table + (opcode * 128).
+
+In normal operation, the dedicated register rIBASE
+(r8 for ARM, edx for x86) holds a mainHandlerTable.  If we need to switch
+to a mode that requires inter-instruction checking, rIBASE is changed
+to altHandlerTable.  Note that this change is not immediate.  What is actually
+changed is the value of curHandlerTable - which is part of the interpBreak
+structure.  Rather than explicitly check for changes, each thread will
+blindly refresh rIBASE at backward branches, exception throws and returns.
diff --git a/runtime/interpreter/mterp/arm/alt_stub.S b/runtime/interpreter/mterp/arm/alt_stub.S
new file mode 100644
index 0000000..92ae0c6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/alt_stub.S
@@ -0,0 +1,12 @@
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (${opnum} * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
new file mode 100644
index 0000000..474bc3c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -0,0 +1,36 @@
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/binop.S b/runtime/interpreter/mterp/arm/binop.S
new file mode 100644
index 0000000..eeb72ef
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binop.S
@@ -0,0 +1,35 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/binop2addr.S b/runtime/interpreter/mterp/arm/binop2addr.S
new file mode 100644
index 0000000..d09a43a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binop2addr.S
@@ -0,0 +1,32 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopLit16.S b/runtime/interpreter/mterp/arm/binopLit16.S
new file mode 100644
index 0000000..065394e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopLit16.S
@@ -0,0 +1,29 @@
+%default {"result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if $chkzero
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
new file mode 100644
index 0000000..ec0b3c4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -0,0 +1,32 @@
+%default {"preinstr":"", "result":"r0", "chkzero":"0"}
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if $chkzero
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ $result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG $result, r9                @ vAA<- $result
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopWide.S b/runtime/interpreter/mterp/arm/binopWide.S
new file mode 100644
index 0000000..57d43c6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopWide.S
@@ -0,0 +1,38 @@
+%default {"preinstr":"", "result0":"r0", "result1":"r1", "chkzero":"0"}
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if $chkzero
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
diff --git a/runtime/interpreter/mterp/arm/binopWide2addr.S b/runtime/interpreter/mterp/arm/binopWide2addr.S
new file mode 100644
index 0000000..4e855f2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/binopWide2addr.S
@@ -0,0 +1,34 @@
+%default {"preinstr":"", "result0":"r0", "result1":"r1", "chkzero":"0"}
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if $chkzero
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
new file mode 100644
index 0000000..4c5ffc5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align  2
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  r0  Thread* self/
+ *  r1  code_item
+ *  r2  ShadowFrame
+ *  r3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .fnstart
+    .save {r4-r10,fp,lr}
+    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
+    .pad    #4
+    sub     sp, sp, #4                  @ align 64
+
+    /* Remember the return register */
+    str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     r1, [r2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     rSELF, r0
+    ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
+    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
+    add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
+    add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          @ load rINST from rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* NOTE: no fallthrough */
diff --git a/runtime/interpreter/mterp/arm/fallback.S b/runtime/interpreter/mterp/arm/fallback.S
new file mode 100644
index 0000000..44e7e12
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fallback.S
@@ -0,0 +1,3 @@
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
diff --git a/runtime/interpreter/mterp/arm/fbinop.S b/runtime/interpreter/mterp/arm/fbinop.S
new file mode 100644
index 0000000..594ee03
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinop.S
@@ -0,0 +1,23 @@
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinop2addr.S b/runtime/interpreter/mterp/arm/fbinop2addr.S
new file mode 100644
index 0000000..b052a29
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinop2addr.S
@@ -0,0 +1,21 @@
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinopWide.S b/runtime/interpreter/mterp/arm/fbinopWide.S
new file mode 100644
index 0000000..1bed817
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinopWide.S
@@ -0,0 +1,23 @@
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $instr                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/fbinopWide2addr.S b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
new file mode 100644
index 0000000..9f56986
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    $instr                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
new file mode 100644
index 0000000..75e0037
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -0,0 +1,168 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    ldr  r2, [rSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     r0, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    cmp     r0, #0                                  @ Exception pending?
+    beq     MterpFallback                           @ If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    @ (self, shadow_frame)
+    cmp     r0, #0
+    beq     MterpExceptionReturn                    @ no local catch, back to caller.
+    ldr     r0, [rFP, #OFF_FP_CODE_ITEM]
+    ldr     r1, [rFP, #OFF_FP_DEX_PC]
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     rPC, r0, #CODEITEM_INSNS_OFFSET
+    add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
+    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    EXPORT_PC
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck           @ (self)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+MterpCommonFallback:
+    mov     r0, #0                                  @ signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, #1                                  @ signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r0, #1                                  @ signal return to caller.
+MterpDone:
+    add     sp, sp, #4                              @ un-align 64
+    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+
+
+    .fnend
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
diff --git a/runtime/interpreter/mterp/arm/funop.S b/runtime/interpreter/mterp/arm/funop.S
new file mode 100644
index 0000000..d7a0859
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funop.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    $instr                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/funopNarrower.S b/runtime/interpreter/mterp/arm/funopNarrower.S
new file mode 100644
index 0000000..9daec28
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funopNarrower.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    $instr                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/funopWider.S b/runtime/interpreter/mterp/arm/funopWider.S
new file mode 100644
index 0000000..087a1f2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/funopWider.S
@@ -0,0 +1,18 @@
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    $instr                              @ d0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
new file mode 100644
index 0000000..14319d9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.  If VFP
+is present, registers s16-s31 (a/k/a d8-d15, a/k/a q4-q7) must be preserved,
+s0-s15 (d0-d7, q0-a3) do not need to be.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+Mterp and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rSELF     self (Thread) pointer
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rSELF   r6
+#define rINST   r7
+#define rIBASE  r8
+#define rREFS   r11
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+.macro EXPORT_DEX_PC tmp
+    ldr  \tmp, [rFP, #OFF_FP_CODE_ITEM]
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    add  \tmp, #CODEITEM_INSNS_OFFSET
+    sub  \tmp, rPC, \tmp
+    asr  \tmp, #1
+    str  \tmp, [rFP, #OFF_FP_DEX_PC]
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    ldrh    rINST, [rPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+  add  rPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg.  Updates
+ * rPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ *
+ * We want to write "ldrh rINST, [rPC, _reg, lsl #1]!", but some of the
+ * bits that hold the shift distance are used for the half/byte/sign flags.
+ * In some cases we can pre-double _reg for free, so we require a byte offset
+ * here.
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    ldrh    rINST, [rPC, \reg]!
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [rPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [rPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [rPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Because this only jumps within the
+ * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
+ */
+.macro GOTO_OPCODE reg
+    add     pc, rIBASE, \reg, lsl #${handler_size_bits}
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     pc, \base, \reg, lsl #${handler_size_bits}
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [rFP, \vreg, lsl #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    mov     \reg, #0
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
diff --git a/runtime/interpreter/mterp/arm/invoke.S b/runtime/interpreter/mterp/arm/invoke.S
new file mode 100644
index 0000000..7575865
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/invoke.S
@@ -0,0 +1,19 @@
+%default { "helper":"UndefinedInvokeHandler" }
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern $helper
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      $helper
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
diff --git a/runtime/interpreter/mterp/arm/op_add_double.S b/runtime/interpreter/mterp/arm/op_add_double.S
new file mode 100644
index 0000000..9332bf2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_double_2addr.S b/runtime/interpreter/mterp/arm/op_add_double_2addr.S
new file mode 100644
index 0000000..3242c53
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_float.S b/runtime/interpreter/mterp/arm/op_add_float.S
new file mode 100644
index 0000000..afb7967
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_float_2addr.S b/runtime/interpreter/mterp/arm/op_add_float_2addr.S
new file mode 100644
index 0000000..0067b6a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int.S b/runtime/interpreter/mterp/arm/op_add_int.S
new file mode 100644
index 0000000..1dcae7e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_2addr.S b/runtime/interpreter/mterp/arm/op_add_int_2addr.S
new file mode 100644
index 0000000..9ea98f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit16.S b/runtime/interpreter/mterp/arm/op_add_int_lit16.S
new file mode 100644
index 0000000..5763ab8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_int_lit8.S b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
new file mode 100644
index 0000000..b84684a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"add     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_add_long.S b/runtime/interpreter/mterp/arm/op_add_long.S
new file mode 100644
index 0000000..093223e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"adds    r0, r0, r2", "instr":"adc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_add_long_2addr.S b/runtime/interpreter/mterp/arm/op_add_long_2addr.S
new file mode 100644
index 0000000..c11e0af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_add_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"adds    r0, r0, r2", "instr":"adc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_aget.S b/runtime/interpreter/mterp/arm/op_aget.S
new file mode 100644
index 0000000..2cc4d66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget.S
@@ -0,0 +1,33 @@
+%default { "load":"ldr", "shift":"2", "is_object":"0", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #$shift     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $load   r2, [r0, #$data_offset]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r9              @ vAA<- r2
+    .else
+    SET_VREG r2, r9                     @ vAA<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aget_boolean.S b/runtime/interpreter/mterp/arm/op_aget_boolean.S
new file mode 100644
index 0000000..8f678dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_byte.S b/runtime/interpreter/mterp/arm/op_aget_byte.S
new file mode 100644
index 0000000..a304650
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrsb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_char.S b/runtime/interpreter/mterp/arm/op_aget_char.S
new file mode 100644
index 0000000..4908306
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_char.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_object.S b/runtime/interpreter/mterp/arm/op_aget_object.S
new file mode 100644
index 0000000..4e0aab5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_object.S
@@ -0,0 +1,21 @@
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     @ (array, index)
+    ldr      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp      r1, #0
+    bne      MterpException
+    SET_VREG_OBJECT r0, r9
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aget_short.S b/runtime/interpreter/mterp/arm/op_aget_short.S
new file mode 100644
index 0000000..b71e659
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_short.S
@@ -0,0 +1 @@
+%include "arm/op_aget.S" { "load":"ldrsh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aget_wide.S b/runtime/interpreter/mterp/arm/op_aget_wide.S
new file mode 100644
index 0000000..caaec71
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aget_wide.S
@@ -0,0 +1,24 @@
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use LDRD.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_and_int.S b/runtime/interpreter/mterp/arm/op_and_int.S
new file mode 100644
index 0000000..7c16d37
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_2addr.S b/runtime/interpreter/mterp/arm/op_and_int_2addr.S
new file mode 100644
index 0000000..0fbab02
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit16.S b/runtime/interpreter/mterp/arm/op_and_int_lit16.S
new file mode 100644
index 0000000..541e9b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_int_lit8.S b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
new file mode 100644
index 0000000..d5783e5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"and     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_and_long.S b/runtime/interpreter/mterp/arm/op_and_long.S
new file mode 100644
index 0000000..4ad5158
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"and     r0, r0, r2", "instr":"and     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_and_long_2addr.S b/runtime/interpreter/mterp/arm/op_and_long_2addr.S
new file mode 100644
index 0000000..e23ea44
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_and_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"and     r0, r0, r2", "instr":"and     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_aput.S b/runtime/interpreter/mterp/arm/op_aput.S
new file mode 100644
index 0000000..a511fa5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput.S
@@ -0,0 +1,29 @@
+%default { "store":"str", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #$shift     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    $store  r2, [r0, #$data_offset]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_boolean.S b/runtime/interpreter/mterp/arm/op_aput_boolean.S
new file mode 100644
index 0000000..e86663f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BOOLEAN_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_byte.S b/runtime/interpreter/mterp/arm/op_aput_byte.S
new file mode 100644
index 0000000..83694b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strb", "shift":"0", "data_offset":"MIRROR_BYTE_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_char.S b/runtime/interpreter/mterp/arm/op_aput_char.S
new file mode 100644
index 0000000..3551cac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_char.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_CHAR_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_object.S b/runtime/interpreter/mterp/arm/op_aput_object.S
new file mode 100644
index 0000000..c539916
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_object.S
@@ -0,0 +1,14 @@
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpAputObject
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_short.S b/runtime/interpreter/mterp/arm/op_aput_short.S
new file mode 100644
index 0000000..0a0590e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_short.S
@@ -0,0 +1 @@
+%include "arm/op_aput.S" { "store":"strh", "shift":"1", "data_offset":"MIRROR_SHORT_ARRAY_DATA_OFFSET" }
diff --git a/runtime/interpreter/mterp/arm/op_aput_wide.S b/runtime/interpreter/mterp/arm/op_aput_wide.S
new file mode 100644
index 0000000..49839d1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_aput_wide.S
@@ -0,0 +1,24 @@
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_array_length.S b/runtime/interpreter/mterp/arm/op_array_length.S
new file mode 100644
index 0000000..43b1682
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_array_length.S
@@ -0,0 +1,13 @@
+    /*
+     * Return the length of an array.
+     */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    GET_VREG r0, r1                     @ r0<- vB (object ref)
+    cmp     r0, #0                      @ is object null?
+    beq     common_errNullObject        @ yup, fail
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- array length
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r3, r2                     @ vB<- length
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_check_cast.S b/runtime/interpreter/mterp/arm/op_check_cast.S
new file mode 100644
index 0000000..3e3ac70
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_check_cast.S
@@ -0,0 +1,17 @@
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- BBBB
+    mov      r1, rINST, lsr #8          @ r1<- AA
+    GET_VREG r1, r1                     @ r1<- object
+    ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
+    mov      r3, rSELF                  @ r3<- self
+    bl       MterpCheckCast             @ (index, obj, method, self)
+    PREFETCH_INST 2
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
new file mode 100644
index 0000000..2b4c0ea
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -0,0 +1,56 @@
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .L${opcode}_less            @ signed compare on high part
+    bgt     .L${opcode}_greater
+    subs    r1, r0, r2                  @ r1<- r0 - r2
+    bhi     .L${opcode}_greater         @ unsigned compare on low part
+    bne     .L${opcode}_less
+    b       .L${opcode}_finish          @ equal; r1 already holds 0
+%break
+
+.L${opcode}_less:
+    mvn     r1, #0                      @ r1<- -1
+    @ Want to cond code the next mov so we can avoid branch, but don't see it;
+    @ instead, we just replicate the tail end.
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L${opcode}_greater:
+    mov     r1, #1                      @ r1<- 1
+    @ fall through to _finish
+
+.L${opcode}_finish:
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_double.S b/runtime/interpreter/mterp/arm/op_cmpg_double.S
new file mode 100644
index 0000000..4b05c44
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpg_double.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_float.S b/runtime/interpreter/mterp/arm/op_cmpg_float.S
new file mode 100644
index 0000000..d5d2df2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpg_float.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_double.S b/runtime/interpreter/mterp/arm/op_cmpl_double.S
new file mode 100644
index 0000000..6ee53b3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpl_double.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_float.S b/runtime/interpreter/mterp/arm/op_cmpl_float.S
new file mode 100644
index 0000000..64535b6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_cmpl_float.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const.S b/runtime/interpreter/mterp/arm/op_const.S
new file mode 100644
index 0000000..de3e3c3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const.S
@@ -0,0 +1,9 @@
+    /* const vAA, #+BBBBbbbb */
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_16.S b/runtime/interpreter/mterp/arm/op_const_16.S
new file mode 100644
index 0000000..59c6dac
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_16.S
@@ -0,0 +1,7 @@
+    /* const/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_4.S b/runtime/interpreter/mterp/arm/op_const_4.S
new file mode 100644
index 0000000..c177bb9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_4.S
@@ -0,0 +1,8 @@
+    /* const/4 vA, #+B */
+    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    SET_VREG r1, r0                     @ fp[A]<- r1
+    GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_class.S b/runtime/interpreter/mterp/arm/op_const_class.S
new file mode 100644
index 0000000..0b111f4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_class.S
@@ -0,0 +1,13 @@
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstClass             @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cmp     r0, #0
+    bne     MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_high16.S b/runtime/interpreter/mterp/arm/op_const_high16.S
new file mode 100644
index 0000000..460d546
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_high16.S
@@ -0,0 +1,8 @@
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, r0, lsl #16             @ r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_string.S b/runtime/interpreter/mterp/arm/op_const_string.S
new file mode 100644
index 0000000..4b8302a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_string.S
@@ -0,0 +1,13 @@
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     @ load rINST
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
new file mode 100644
index 0000000..1a3d0b2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
@@ -0,0 +1,15 @@
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r2, 2                         @ r2<- BBBB (high
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     @ advance rPC
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 3                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide.S b/runtime/interpreter/mterp/arm/op_const_wide.S
new file mode 100644
index 0000000..2cdc426
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide.S
@@ -0,0 +1,13 @@
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (low middle)
+    FETCH r2, 3                         @ r2<- hhhh (high middle)
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb (low word)
+    FETCH r3, 4                         @ r3<- HHHH (high)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
+    FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_16.S b/runtime/interpreter/mterp/arm/op_const_wide_16.S
new file mode 100644
index 0000000..56bfc17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_16.S
@@ -0,0 +1,9 @@
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_32.S b/runtime/interpreter/mterp/arm/op_const_wide_32.S
new file mode 100644
index 0000000..36d4628
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_32.S
@@ -0,0 +1,11 @@
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH r0, 1                         @ r0<- 0000bbbb (low)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_S r2, 2                       @ r2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_high16.S b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
new file mode 100644
index 0000000..bee592d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
@@ -0,0 +1,10 @@
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH r1, 1                         @ r1<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, #0                      @ r0<- 00000000
+    mov     r1, r1, lsl #16             @ r1<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_div_double.S b/runtime/interpreter/mterp/arm/op_div_double.S
new file mode 100644
index 0000000..5147550
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_double_2addr.S b/runtime/interpreter/mterp/arm/op_div_double_2addr.S
new file mode 100644
index 0000000..b812f17
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_float.S b/runtime/interpreter/mterp/arm/op_div_float.S
new file mode 100644
index 0000000..0f24d11
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_float_2addr.S b/runtime/interpreter/mterp/arm/op_div_float_2addr.S
new file mode 100644
index 0000000..a1dbf01
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_int.S b/runtime/interpreter/mterp/arm/op_div_int.S
new file mode 100644
index 0000000..251064b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int.S
@@ -0,0 +1,30 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl    __aeabi_idiv                  @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_int_2addr.S b/runtime/interpreter/mterp/arm/op_div_int_2addr.S
new file mode 100644
index 0000000..9be4cd8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_2addr.S
@@ -0,0 +1,29 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
diff --git a/runtime/interpreter/mterp/arm/op_div_int_lit16.S b/runtime/interpreter/mterp/arm/op_div_int_lit16.S
new file mode 100644
index 0000000..d9bc7d6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_lit16.S
@@ -0,0 +1,28 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_int_lit8.S b/runtime/interpreter/mterp/arm/op_div_int_lit8.S
new file mode 100644
index 0000000..5d2dbd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_int_lit8.S
@@ -0,0 +1,29 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl   __aeabi_idiv                   @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_div_long.S b/runtime/interpreter/mterp/arm/op_div_long.S
new file mode 100644
index 0000000..0f21a84
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"instr":"bl      __aeabi_ldivmod", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_div_long_2addr.S b/runtime/interpreter/mterp/arm/op_div_long_2addr.S
new file mode 100644
index 0000000..e172b29
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_div_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"instr":"bl      __aeabi_ldivmod", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_float.S b/runtime/interpreter/mterp/arm/op_double_to_float.S
new file mode 100644
index 0000000..e327000
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_float.S
@@ -0,0 +1 @@
+%include "arm/funopNarrower.S" {"instr":"fcvtsd  s0, d0"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_int.S b/runtime/interpreter/mterp/arm/op_double_to_int.S
new file mode 100644
index 0000000..aa035de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_int.S
@@ -0,0 +1 @@
+%include "arm/funopNarrower.S" {"instr":"ftosizd  s0, d0"}
diff --git a/runtime/interpreter/mterp/arm/op_double_to_long.S b/runtime/interpreter/mterp/arm/op_double_to_long.S
new file mode 100644
index 0000000..b100810
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_double_to_long.S
@@ -0,0 +1,52 @@
+@include "arm/unopWide.S" {"instr":"bl      __aeabi_d2lz"}
+%include "arm/unopWide.S" {"instr":"bl      d2l_doconv"}
+
+%break
+/*
+ * Convert the double in r0/r1 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+d2l_doconv:
+    stmfd   sp!, {r4, r5, lr}           @ save regs
+    mov     r3, #0x43000000             @ maxlong, as a double (high word)
+    add     r3, #0x00e00000             @  0x43e00000
+    mov     r2, #0                      @ maxlong, as a double (low word)
+    sub     sp, sp, #4                  @ align for EABI
+    mov     r4, r0                      @ save a copy of r0
+    mov     r5, r1                      @  and r1
+    bl      __aeabi_dcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffffffffffff)
+    mvnne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r3, #0xc3000000             @ minlong, as a double (high word)
+    add     r3, #0x00e00000             @  0xc3e00000
+    mov     r2, #0                      @ minlong, as a double (low word)
+    bl      __aeabi_dcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (8000000000000000)
+    movne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r2, r4                      @ compare against self
+    mov     r3, r5
+    bl      __aeabi_dcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    beq     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    bl      __aeabi_d2lz                @ convert double to long
+
+1:
+    add     sp, sp, #4
+    ldmfd   sp!, {r4, r5, pc}
diff --git a/runtime/interpreter/mterp/arm/op_fill_array_data.S b/runtime/interpreter/mterp/arm/op_fill_array_data.S
new file mode 100644
index 0000000..e1ca85c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_fill_array_data.S
@@ -0,0 +1,14 @@
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r1, r0, r1, lsl #16         @ r1<- BBBBbbbb
+    GET_VREG r0, r3                     @ r0<- vAA (array object)
+    add     r1, rPC, r1, lsl #1         @ r1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          @ (obj, payload)
+    cmp     r0, #0                      @ 0 means an exception is thrown
+    beq     MterpPossibleException      @ exception?
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_filled_new_array.S b/runtime/interpreter/mterp/arm/op_filled_new_array.S
new file mode 100644
index 0000000..1075f0c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_filled_new_array.S
@@ -0,0 +1,19 @@
+%default { "helper":"MterpFilledNewArray" }
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern $helper
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      $helper
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_filled_new_array_range.S b/runtime/interpreter/mterp/arm/op_filled_new_array_range.S
new file mode 100644
index 0000000..16567af
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_filled_new_array_range.S
@@ -0,0 +1 @@
+%include "arm/op_filled_new_array.S" { "helper":"MterpFilledNewArrayRange" }
diff --git a/runtime/interpreter/mterp/arm/op_float_to_double.S b/runtime/interpreter/mterp/arm/op_float_to_double.S
new file mode 100644
index 0000000..fb1892b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_double.S
@@ -0,0 +1 @@
+%include "arm/funopWider.S" {"instr":"fcvtds  d0, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_int.S b/runtime/interpreter/mterp/arm/op_float_to_int.S
new file mode 100644
index 0000000..aab8716
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_int.S
@@ -0,0 +1 @@
+%include "arm/funop.S" {"instr":"ftosizs s1, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_long.S b/runtime/interpreter/mterp/arm/op_float_to_long.S
new file mode 100644
index 0000000..24416d3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_float_to_long.S
@@ -0,0 +1,39 @@
+@include "arm/unopWider.S" {"instr":"bl      __aeabi_f2lz"}
+%include "arm/unopWider.S" {"instr":"bl      f2l_doconv"}
+
+%break
+/*
+ * Convert the float in r0 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+f2l_doconv:
+    stmfd   sp!, {r4, lr}
+    mov     r1, #0x5f000000             @ (float)maxlong
+    mov     r4, r0
+    bl      __aeabi_fcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffff)
+    mvnne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, #0xdf000000             @ (float)minlong
+    bl      __aeabi_fcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (80000000)
+    movne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r4
+    bl      __aeabi_fcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    ldmeqfd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    bl      __aeabi_f2lz                @ convert float to long
+    ldmfd   sp!, {r4, pc}
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
new file mode 100644
index 0000000..9b3632a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -0,0 +1,28 @@
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+       @ If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
new file mode 100644
index 0000000..2231acd
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -0,0 +1,23 @@
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
new file mode 100644
index 0000000..6b72ff5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -0,0 +1,32 @@
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_if_eq.S b/runtime/interpreter/mterp/arm/op_if_eq.S
new file mode 100644
index 0000000..5685686
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_eq.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_eqz.S b/runtime/interpreter/mterp/arm/op_if_eqz.S
new file mode 100644
index 0000000..2a9c0f9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_eqz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ge.S b/runtime/interpreter/mterp/arm/op_if_ge.S
new file mode 100644
index 0000000..60a0307
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ge.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gez.S b/runtime/interpreter/mterp/arm/op_if_gez.S
new file mode 100644
index 0000000..981cdec
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gt.S b/runtime/interpreter/mterp/arm/op_if_gt.S
new file mode 100644
index 0000000..ca50cd7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gt.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gtz.S b/runtime/interpreter/mterp/arm/op_if_gtz.S
new file mode 100644
index 0000000..c621812
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_gtz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_le.S b/runtime/interpreter/mterp/arm/op_if_le.S
new file mode 100644
index 0000000..7e060f2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_le.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lez.S b/runtime/interpreter/mterp/arm/op_if_lez.S
new file mode 100644
index 0000000..f92be23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_lez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lt.S b/runtime/interpreter/mterp/arm/op_if_lt.S
new file mode 100644
index 0000000..213344d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_lt.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ltz.S b/runtime/interpreter/mterp/arm/op_if_ltz.S
new file mode 100644
index 0000000..dfd4e44
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ltz.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ne.S b/runtime/interpreter/mterp/arm/op_if_ne.S
new file mode 100644
index 0000000..4a58b4a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_ne.S
@@ -0,0 +1 @@
+%include "arm/bincmp.S" { "revcmp":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_nez.S b/runtime/interpreter/mterp/arm/op_if_nez.S
new file mode 100644
index 0000000..d864ef4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_if_nez.S
@@ -0,0 +1 @@
+%include "arm/zcmp.S" { "revcmp":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_iget.S b/runtime/interpreter/mterp/arm/op_iget.S
new file mode 100644
index 0000000..c7f777b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget.S
@@ -0,0 +1,26 @@
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       $helper
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if $is_object
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_boolean.S b/runtime/interpreter/mterp/arm/op_iget_boolean.S
new file mode 100644
index 0000000..628f40a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S b/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S
new file mode 100644
index 0000000..0ae4843
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrb" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_byte.S b/runtime/interpreter/mterp/arm/op_iget_byte.S
new file mode 100644
index 0000000..c4e08e2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_byte_quick.S b/runtime/interpreter/mterp/arm/op_iget_byte_quick.S
new file mode 100644
index 0000000..e1b3083
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_byte_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrsb" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_char.S b/runtime/interpreter/mterp/arm/op_iget_char.S
new file mode 100644
index 0000000..5e8da66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_char.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_char_quick.S b/runtime/interpreter/mterp/arm/op_iget_char_quick.S
new file mode 100644
index 0000000..b44d8f1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_char_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrh" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_object.S b/runtime/interpreter/mterp/arm/op_iget_object.S
new file mode 100644
index 0000000..1cf2e3c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_object.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_object_quick.S b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
new file mode 100644
index 0000000..1f8dc5a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_iget_quick.S b/runtime/interpreter/mterp/arm/op_iget_quick.S
new file mode 100644
index 0000000..9229afc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_quick.S
@@ -0,0 +1,18 @@
+%default { "load":"ldr", "is_object":"0" }
+    /* For: iget-quick, iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    $load   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    .if $is_object
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    .endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_short.S b/runtime/interpreter/mterp/arm/op_iget_short.S
new file mode 100644
index 0000000..460f045
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_short.S
@@ -0,0 +1 @@
+%include "arm/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_short_quick.S b/runtime/interpreter/mterp/arm/op_iget_short_quick.S
new file mode 100644
index 0000000..1831b99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_short_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iget_quick.S" { "load":"ldrsh" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide.S b/runtime/interpreter/mterp/arm/op_iget_wide.S
new file mode 100644
index 0000000..f8d2f41
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_wide.S
@@ -0,0 +1,22 @@
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet64InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpException                @ bail out
+    add     r3, rFP, r2, lsl #2            @ r3<- &fp[A]
+    stmia   r3, {r0-r1}                    @ fp[A]<- r0/r1
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
new file mode 100644
index 0000000..4d6976e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
@@ -0,0 +1,13 @@
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH ip, 1                         @ ip<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_instance_of.S b/runtime/interpreter/mterp/arm/op_instance_of.S
new file mode 100644
index 0000000..e94108c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_instance_of.S
@@ -0,0 +1,24 @@
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    FETCH     r0, 1                     @ r0<- CCCC
+    mov       r1, rINST, lsr #12        @ r1<- B
+    GET_VREG  r1, r1                    @ r1<- vB (object)
+    ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
+    mov       r3, rSELF                 @ r3<- self
+    mov       r9, rINST, lsr #8         @ r9<- A+
+    and       r9, r9, #15               @ r9<- A
+    bl        MterpInstanceOf           @ (index, obj, method, self)
+    ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp       r1, #0                    @ exception pending?
+    bne       MterpException
+    ADVANCE 2                           @ advance rPC
+    SET_VREG r0, r9                     @ vA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_int_to_byte.S b/runtime/interpreter/mterp/arm/op_int_to_byte.S
new file mode 100644
index 0000000..059d5c2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_byte.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"sxtb    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_char.S b/runtime/interpreter/mterp/arm/op_int_to_char.S
new file mode 100644
index 0000000..83a0c19
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_char.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"uxth    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_double.S b/runtime/interpreter/mterp/arm/op_int_to_double.S
new file mode 100644
index 0000000..810c2e4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_double.S
@@ -0,0 +1 @@
+%include "arm/funopWider.S" {"instr":"fsitod  d0, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_float.S b/runtime/interpreter/mterp/arm/op_int_to_float.S
new file mode 100644
index 0000000..f41654c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_float.S
@@ -0,0 +1 @@
+%include "arm/funop.S" {"instr":"fsitos  s1, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_long.S b/runtime/interpreter/mterp/arm/op_int_to_long.S
new file mode 100644
index 0000000..b5aed8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_long.S
@@ -0,0 +1 @@
+%include "arm/unopWider.S" {"instr":"mov     r1, r0, asr #31"}
diff --git a/runtime/interpreter/mterp/arm/op_int_to_short.S b/runtime/interpreter/mterp/arm/op_int_to_short.S
new file mode 100644
index 0000000..717bd96
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_int_to_short.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"sxth    r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_invoke_direct.S b/runtime/interpreter/mterp/arm/op_invoke_direct.S
new file mode 100644
index 0000000..1edf221
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_direct.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeDirect" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_direct_range.S b/runtime/interpreter/mterp/arm/op_invoke_direct_range.S
new file mode 100644
index 0000000..3097b8e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_direct_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeDirectRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_interface.S b/runtime/interpreter/mterp/arm/op_invoke_interface.S
new file mode 100644
index 0000000..f6d565b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_interface.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeInterface" }
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_interface_range.S b/runtime/interpreter/mterp/arm/op_invoke_interface_range.S
new file mode 100644
index 0000000..c8443b0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_interface_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeInterfaceRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_static.S b/runtime/interpreter/mterp/arm/op_invoke_static.S
new file mode 100644
index 0000000..c3cefcf
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_static.S
@@ -0,0 +1,2 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeStatic" }
+
diff --git a/runtime/interpreter/mterp/arm/op_invoke_static_range.S b/runtime/interpreter/mterp/arm/op_invoke_static_range.S
new file mode 100644
index 0000000..dd60d7b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_static_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeStaticRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_super.S b/runtime/interpreter/mterp/arm/op_invoke_super.S
new file mode 100644
index 0000000..92ef2a4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_super.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeSuper" }
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_super_range.S b/runtime/interpreter/mterp/arm/op_invoke_super_range.S
new file mode 100644
index 0000000..9e4fb1c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_super_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeSuperRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual.S b/runtime/interpreter/mterp/arm/op_invoke_virtual.S
new file mode 100644
index 0000000..5b893ff
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual.S
@@ -0,0 +1,8 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtual" }
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S
new file mode 100644
index 0000000..020e8b8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_quick.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualQuick" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S
new file mode 100644
index 0000000..2b42a78
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_range.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualRange" }
diff --git a/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S b/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S
new file mode 100644
index 0000000..42f2ded
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_invoke_virtual_range_quick.S
@@ -0,0 +1 @@
+%include "arm/invoke.S" { "helper":"MterpInvokeVirtualQuickRange" }
diff --git a/runtime/interpreter/mterp/arm/op_iput.S b/runtime/interpreter/mterp/arm/op_iput.S
new file mode 100644
index 0000000..d224cd8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput.S
@@ -0,0 +1,22 @@
+%default { "is_object":"0", "handler":"artSet32InstanceFromMterp" }
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern $handler
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       $handler
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_boolean.S b/runtime/interpreter/mterp/arm/op_iput_boolean.S
new file mode 100644
index 0000000..c9e8589
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S b/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S
new file mode 100644
index 0000000..f0a2777
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_boolean_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_byte.S b/runtime/interpreter/mterp/arm/op_iput_byte.S
new file mode 100644
index 0000000..c9e8589
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet8InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_byte_quick.S b/runtime/interpreter/mterp/arm/op_iput_byte_quick.S
new file mode 100644
index 0000000..f0a2777
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_byte_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strb" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_char.S b/runtime/interpreter/mterp/arm/op_iput_char.S
new file mode 100644
index 0000000..5046f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_char.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_char_quick.S b/runtime/interpreter/mterp/arm/op_iput_char_quick.S
new file mode 100644
index 0000000..5212fc3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_char_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_object.S b/runtime/interpreter/mterp/arm/op_iput_object.S
new file mode 100644
index 0000000..d942e84
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_object.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpIputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_object_quick.S b/runtime/interpreter/mterp/arm/op_iput_object_quick.S
new file mode 100644
index 0000000..876b3da
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_object_quick.S
@@ -0,0 +1,10 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpIputObjectQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_quick.S b/runtime/interpreter/mterp/arm/op_iput_quick.S
new file mode 100644
index 0000000..98c8150
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_quick.S
@@ -0,0 +1,14 @@
+%default { "store":"str" }
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    $store     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_short.S b/runtime/interpreter/mterp/arm/op_iput_short.S
new file mode 100644
index 0000000..5046f6b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_short.S
@@ -0,0 +1 @@
+%include "arm/op_iput.S" { "handler":"artSet16InstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_short_quick.S b/runtime/interpreter/mterp/arm/op_iput_short_quick.S
new file mode 100644
index 0000000..5212fc3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_short_quick.S
@@ -0,0 +1 @@
+%include "arm/op_iput_quick.S" { "store":"strh" }
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide.S b/runtime/interpreter/mterp/arm/op_iput_wide.S
new file mode 100644
index 0000000..8bbd63e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_wide.S
@@ -0,0 +1,16 @@
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
new file mode 100644
index 0000000..a2fc9e1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
@@ -0,0 +1,13 @@
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r3, 1                         @ r3<- field byte offset
+    GET_VREG r2, r2                     @ r2<- fp[B], the object pointer
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    cmp     r2, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strd    r0, [r2, r3]                @ obj.field<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_long_to_double.S b/runtime/interpreter/mterp/arm/op_long_to_double.S
new file mode 100644
index 0000000..1d48a2a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_double.S
@@ -0,0 +1,27 @@
+%default {}
+    /*
+     * Specialised 64-bit floating point operation.
+     *
+     * Note: The result will be returned in d2.
+     *
+     * For: long-to-double
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    vldr    d0, [r3]                    @ d0<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    vcvt.f64.s32    d1, s1              @ d1<- (double)(vAAh)
+    vcvt.f64.u32    d2, s0              @ d2<- (double)(vAAl)
+    vldr            d3, constval$opcode
+    vmla.f64        d2, d1, d3          @ d2<- vAAh*2^32 + vAAl
+
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    vstr.64 d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+    /* literal pool helper */
+constval${opcode}:
+    .8byte          0x41f0000000000000
diff --git a/runtime/interpreter/mterp/arm/op_long_to_float.S b/runtime/interpreter/mterp/arm/op_long_to_float.S
new file mode 100644
index 0000000..efa5a66
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_float.S
@@ -0,0 +1 @@
+%include "arm/unopNarrower.S" {"instr":"bl      __aeabi_l2f"}
diff --git a/runtime/interpreter/mterp/arm/op_long_to_int.S b/runtime/interpreter/mterp/arm/op_long_to_int.S
new file mode 100644
index 0000000..3e91f23
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_long_to_int.S
@@ -0,0 +1,2 @@
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "arm/op_move.S"
diff --git a/runtime/interpreter/mterp/arm/op_monitor_enter.S b/runtime/interpreter/mterp/arm/op_monitor_enter.S
new file mode 100644
index 0000000..3c34f75
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_monitor_enter.S
@@ -0,0 +1,14 @@
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                      @ r0<- vAA (object)
+    mov      r1, rSELF                   @ r1<- self
+    bl       artLockObjectFromCode
+    cmp      r0, #0
+    bne      MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   @ extract opcode from rINST
+    GOTO_OPCODE ip                       @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_monitor_exit.S b/runtime/interpreter/mterp/arm/op_monitor_exit.S
new file mode 100644
index 0000000..fc7cef5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_monitor_exit.S
@@ -0,0 +1,18 @@
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8          @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA (object)
+    mov      r1, rSELF                  @ r0<- self
+    bl       artUnlockObjectFromCode    @ r0<- success for unlock(self, obj)
+    cmp     r0, #0                      @ failed?
+    bne     MterpException
+    FETCH_ADVANCE_INST 1                @ before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move.S b/runtime/interpreter/mterp/arm/op_move.S
new file mode 100644
index 0000000..dfecc24
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_16.S b/runtime/interpreter/mterp/arm/op_move_16.S
new file mode 100644
index 0000000..78138a2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_exception.S b/runtime/interpreter/mterp/arm/op_move_exception.S
new file mode 100644
index 0000000..0242e26
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_exception.S
@@ -0,0 +1,9 @@
+    /* move-exception vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     r1, #0                      @ r1<- 0
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    SET_VREG_OBJECT r3, r2              @ fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str     r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ clear exception
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_from16.S b/runtime/interpreter/mterp/arm/op_move_from16.S
new file mode 100644
index 0000000..3e79417
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_from16.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_object.S b/runtime/interpreter/mterp/arm/op_move_object.S
new file mode 100644
index 0000000..16de57b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object.S
@@ -0,0 +1 @@
+%include "arm/op_move.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_object_16.S b/runtime/interpreter/mterp/arm/op_move_object_16.S
new file mode 100644
index 0000000..2534300
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object_16.S
@@ -0,0 +1 @@
+%include "arm/op_move_16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_object_from16.S b/runtime/interpreter/mterp/arm/op_move_object_from16.S
new file mode 100644
index 0000000..9e0cf02
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_object_from16.S
@@ -0,0 +1 @@
+%include "arm/op_move_from16.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_result.S b/runtime/interpreter/mterp/arm/op_move_result.S
new file mode 100644
index 0000000..f2586a0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result.S
@@ -0,0 +1,14 @@
+%default { "is_object":"0" }
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if $is_object
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_result_object.S b/runtime/interpreter/mterp/arm/op_move_result_object.S
new file mode 100644
index 0000000..643296a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result_object.S
@@ -0,0 +1 @@
+%include "arm/op_move_result.S" {"is_object":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_move_result_wide.S b/runtime/interpreter/mterp/arm/op_move_result_wide.S
new file mode 100644
index 0000000..c64103c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_result_wide.S
@@ -0,0 +1,9 @@
+    /* move-result-wide vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide.S b/runtime/interpreter/mterp/arm/op_move_wide.S
new file mode 100644
index 0000000..1345b95
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide.S
@@ -0,0 +1,11 @@
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_16.S b/runtime/interpreter/mterp/arm/op_move_wide_16.S
new file mode 100644
index 0000000..133a4c3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide_16.S
@@ -0,0 +1,11 @@
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 2                         @ r3<- BBBB
+    FETCH r2, 1                         @ r2<- AAAA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_from16.S b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
new file mode 100644
index 0000000..f2ae785
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
@@ -0,0 +1,11 @@
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 1                         @ r3<- BBBB
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_double.S b/runtime/interpreter/mterp/arm/op_mul_double.S
new file mode 100644
index 0000000..530e85a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_double_2addr.S b/runtime/interpreter/mterp/arm/op_mul_double_2addr.S
new file mode 100644
index 0000000..da1abc6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_float.S b/runtime/interpreter/mterp/arm/op_mul_float.S
new file mode 100644
index 0000000..6a72e6f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_float_2addr.S b/runtime/interpreter/mterp/arm/op_mul_float_2addr.S
new file mode 100644
index 0000000..edb5101
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int.S b/runtime/interpreter/mterp/arm/op_mul_int.S
new file mode 100644
index 0000000..d6151d4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binop.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_2addr.S b/runtime/interpreter/mterp/arm/op_mul_int_2addr.S
new file mode 100644
index 0000000..66a797d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_2addr.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binop2addr.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_lit16.S b/runtime/interpreter/mterp/arm/op_mul_int_lit16.S
new file mode 100644
index 0000000..4e40c43
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_lit16.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binopLit16.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_int_lit8.S b/runtime/interpreter/mterp/arm/op_mul_int_lit8.S
new file mode 100644
index 0000000..dbafae9
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_int_lit8.S
@@ -0,0 +1,2 @@
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+%include "arm/binopLit8.S" {"instr":"mul     r0, r1, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
new file mode 100644
index 0000000..9e83778
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -0,0 +1,36 @@
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
new file mode 100644
index 0000000..789dbd3
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -0,0 +1,24 @@
+    /*
+     * Signed 64-bit integer multiply, "/2addr" version.
+     *
+     * See op_mul_long for an explanation.
+     *
+     * We get a little tight on registers, so to avoid looking up &fp[A]
+     * again we stuff it into rINST.
+     */
+    /* mul-long/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_neg_double.S b/runtime/interpreter/mterp/arm/op_neg_double.S
new file mode 100644
index 0000000..33e609c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_double.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"instr":"add     r1, r1, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_float.S b/runtime/interpreter/mterp/arm/op_neg_float.S
new file mode 100644
index 0000000..993583f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_float.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"add     r0, r0, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_int.S b/runtime/interpreter/mterp/arm/op_neg_int.S
new file mode 100644
index 0000000..ec0b253
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_int.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"rsb     r0, r0, #0"}
diff --git a/runtime/interpreter/mterp/arm/op_neg_long.S b/runtime/interpreter/mterp/arm/op_neg_long.S
new file mode 100644
index 0000000..dab2eb4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_neg_long.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"preinstr":"rsbs    r0, r0, #0", "instr":"rsc     r1, r1, #0"}
diff --git a/runtime/interpreter/mterp/arm/op_new_array.S b/runtime/interpreter/mterp/arm/op_new_array.S
new file mode 100644
index 0000000..8bb792c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_new_array.S
@@ -0,0 +1,19 @@
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_new_instance.S b/runtime/interpreter/mterp/arm/op_new_instance.S
new file mode 100644
index 0000000..95d4be8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_new_instance.S
@@ -0,0 +1,14 @@
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rSELF
+    mov     r2, rINST
+    bl      MterpNewInstance           @ (shadow_frame, self, inst_data)
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2               @ advance rPC, load rINST
+    GET_INST_OPCODE ip                 @ extract opcode from rINST
+    GOTO_OPCODE ip                     @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_nop.S b/runtime/interpreter/mterp/arm/op_nop.S
new file mode 100644
index 0000000..af0f88f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_nop.S
@@ -0,0 +1,3 @@
+    FETCH_ADVANCE_INST 1                @ advance to next instr, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    GOTO_OPCODE ip                      @ execute it
diff --git a/runtime/interpreter/mterp/arm/op_not_int.S b/runtime/interpreter/mterp/arm/op_not_int.S
new file mode 100644
index 0000000..816485a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_not_int.S
@@ -0,0 +1 @@
+%include "arm/unop.S" {"instr":"mvn     r0, r0"}
diff --git a/runtime/interpreter/mterp/arm/op_not_long.S b/runtime/interpreter/mterp/arm/op_not_long.S
new file mode 100644
index 0000000..49a5905
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_not_long.S
@@ -0,0 +1 @@
+%include "arm/unopWide.S" {"preinstr":"mvn     r0, r0", "instr":"mvn     r1, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int.S b/runtime/interpreter/mterp/arm/op_or_int.S
new file mode 100644
index 0000000..b046e8d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_2addr.S b/runtime/interpreter/mterp/arm/op_or_int_2addr.S
new file mode 100644
index 0000000..493c59f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit16.S b/runtime/interpreter/mterp/arm/op_or_int_lit16.S
new file mode 100644
index 0000000..0a01db8
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_int_lit8.S b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
new file mode 100644
index 0000000..2d85038
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"orr     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_or_long.S b/runtime/interpreter/mterp/arm/op_or_long.S
new file mode 100644
index 0000000..048c45c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"orr     r0, r0, r2", "instr":"orr     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_or_long_2addr.S b/runtime/interpreter/mterp/arm/op_or_long_2addr.S
new file mode 100644
index 0000000..9395346
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_or_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"orr     r0, r0, r2", "instr":"orr     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
new file mode 100644
index 0000000..1e3370e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -0,0 +1,39 @@
+%default { "func":"MterpDoPackedSwitch" }
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      $func                       @ r0<- code-unit branch offset
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      $func                       @ r0<- code-unit branch offset
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_rem_double.S b/runtime/interpreter/mterp/arm/op_rem_double.S
new file mode 100644
index 0000000..b539221
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_double.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a double remainder function, but libm does */
+%include "arm/binopWide.S" {"instr":"bl      fmod"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_double_2addr.S b/runtime/interpreter/mterp/arm/op_rem_double_2addr.S
new file mode 100644
index 0000000..372ef1d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_double_2addr.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a double remainder function, but libm does */
+%include "arm/binopWide2addr.S" {"instr":"bl      fmod"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_float.S b/runtime/interpreter/mterp/arm/op_rem_float.S
new file mode 100644
index 0000000..7bd10de
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_float.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm/binop.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm/op_rem_float_2addr.S
new file mode 100644
index 0000000..93c5fae
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_float_2addr.S
@@ -0,0 +1,2 @@
+/* EABI doesn't define a float remainder function, but libm does */
+%include "arm/binop2addr.S" {"instr":"bl      fmodf"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_int.S b/runtime/interpreter/mterp/arm/op_rem_int.S
new file mode 100644
index 0000000..ff62573
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int.S
@@ -0,0 +1,33 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls  r1, r1, r2, r0                 @ r1<- op, r0-r2 changed
+#else
+    bl   __aeabi_idivmod                @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_2addr.S b/runtime/interpreter/mterp/arm/op_rem_int_2addr.S
new file mode 100644
index 0000000..ba5751a
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_2addr.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl      __aeabi_idivmod             @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_lit16.S b/runtime/interpreter/mterp/arm/op_rem_int_lit16.S
new file mode 100644
index 0000000..4edb187
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_lit16.S
@@ -0,0 +1,31 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl     __aeabi_idivmod              @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_int_lit8.S b/runtime/interpreter/mterp/arm/op_rem_int_lit8.S
new file mode 100644
index 0000000..3888361
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_int_lit8.S
@@ -0,0 +1,32 @@
+%default {}
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl       __aeabi_idivmod            @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/arm/op_rem_long.S b/runtime/interpreter/mterp/arm/op_rem_long.S
new file mode 100644
index 0000000..b2b1c24
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_long.S
@@ -0,0 +1,2 @@
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+%include "arm/binopWide.S" {"instr":"bl      __aeabi_ldivmod", "result0":"r2", "result1":"r3", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_rem_long_2addr.S b/runtime/interpreter/mterp/arm/op_rem_long_2addr.S
new file mode 100644
index 0000000..f87d493
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rem_long_2addr.S
@@ -0,0 +1,2 @@
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+%include "arm/binopWide2addr.S" {"instr":"bl      __aeabi_ldivmod", "result0":"r2", "result1":"r3", "chkzero":"1"}
diff --git a/runtime/interpreter/mterp/arm/op_return.S b/runtime/interpreter/mterp/arm/op_return.S
new file mode 100644
index 0000000..a4ffd04
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return.S
@@ -0,0 +1,12 @@
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_object.S b/runtime/interpreter/mterp/arm/op_return_object.S
new file mode 100644
index 0000000..c490730
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_object.S
@@ -0,0 +1 @@
+%include "arm/op_return.S"
diff --git a/runtime/interpreter/mterp/arm/op_return_void.S b/runtime/interpreter/mterp/arm/op_return_void.S
new file mode 100644
index 0000000..f6dfd99
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_void.S
@@ -0,0 +1,5 @@
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
new file mode 100644
index 0000000..7322940
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
@@ -0,0 +1,3 @@
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
new file mode 100644
index 0000000..2881c87
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -0,0 +1,10 @@
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int.S b/runtime/interpreter/mterp/arm/op_rsub_int.S
new file mode 100644
index 0000000..1508dd4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rsub_int.S
@@ -0,0 +1,2 @@
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+%include "arm/binopLit16.S" {"instr":"rsb     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
new file mode 100644
index 0000000..2ee11e1
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_rsub_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"rsb     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sget.S b/runtime/interpreter/mterp/arm/op_sget.S
new file mode 100644
index 0000000..2b81f50
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget.S
@@ -0,0 +1,27 @@
+%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern $helper
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    $helper
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if $is_object
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
diff --git a/runtime/interpreter/mterp/arm/op_sget_boolean.S b/runtime/interpreter/mterp/arm/op_sget_boolean.S
new file mode 100644
index 0000000..ebfb44c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_byte.S b/runtime/interpreter/mterp/arm/op_sget_byte.S
new file mode 100644
index 0000000..d76862e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_byte.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetByteStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_char.S b/runtime/interpreter/mterp/arm/op_sget_char.S
new file mode 100644
index 0000000..b7fcfc2
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_char.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetCharStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_object.S b/runtime/interpreter/mterp/arm/op_sget_object.S
new file mode 100644
index 0000000..8e7d075
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_object.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_short.S b/runtime/interpreter/mterp/arm/op_sget_short.S
new file mode 100644
index 0000000..3e80f0d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_short.S
@@ -0,0 +1 @@
+%include "arm/op_sget.S" {"helper":"artGetShortStaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_wide.S b/runtime/interpreter/mterp/arm/op_sget_wide.S
new file mode 100644
index 0000000..97db05f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sget_wide.S
@@ -0,0 +1,21 @@
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field@BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet64StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r9, rINST, lsr #8             @ r9<- AA
+    add   r9, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_int.S b/runtime/interpreter/mterp/arm/op_shl_int.S
new file mode 100644
index 0000000..7e4c768
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm/op_shl_int_2addr.S
new file mode 100644
index 0000000..4286577
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
new file mode 100644
index 0000000..6a48bfc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asl r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
new file mode 100644
index 0000000..dc8a679
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
new file mode 100644
index 0000000..fd7668d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_int.S b/runtime/interpreter/mterp/arm/op_shr_int.S
new file mode 100644
index 0000000..6317605
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm/op_shr_int_2addr.S
new file mode 100644
index 0000000..cc8632f
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
new file mode 100644
index 0000000..60fe5fc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, asr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
new file mode 100644
index 0000000..c0edf90
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
new file mode 100644
index 0000000..ffeaf9c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sparse_switch.S b/runtime/interpreter/mterp/arm/op_sparse_switch.S
new file mode 100644
index 0000000..9f7a42b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sparse_switch.S
@@ -0,0 +1 @@
+%include "arm/op_packed_switch.S" { "func":"MterpDoSparseSwitch" }
diff --git a/runtime/interpreter/mterp/arm/op_sput.S b/runtime/interpreter/mterp/arm/op_sput.S
new file mode 100644
index 0000000..7e0c1a6
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput.S
@@ -0,0 +1,20 @@
+%default { "helper":"artSet32StaticFromCode"}
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      $helper
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sput_boolean.S b/runtime/interpreter/mterp/arm/op_sput_boolean.S
new file mode 100644
index 0000000..e3bbf2b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_boolean.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_byte.S b/runtime/interpreter/mterp/arm/op_sput_byte.S
new file mode 100644
index 0000000..e3bbf2b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_byte.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_char.S b/runtime/interpreter/mterp/arm/op_sput_char.S
new file mode 100644
index 0000000..d8d65cb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_char.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_object.S b/runtime/interpreter/mterp/arm/op_sput_object.S
new file mode 100644
index 0000000..6d3a9a7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_object.S
@@ -0,0 +1,11 @@
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpSputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sput_short.S b/runtime/interpreter/mterp/arm/op_sput_short.S
new file mode 100644
index 0000000..d8d65cb
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_short.S
@@ -0,0 +1 @@
+%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_wide.S b/runtime/interpreter/mterp/arm/op_sput_wide.S
new file mode 100644
index 0000000..adbcffa
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sput_wide.S
@@ -0,0 +1,19 @@
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    ldr     r1, [rFP, #OFF_FP_METHOD]
+    mov     r2, rINST, lsr #8           @ r3<- AA
+    add     r2, rFP, r2, lsl #2
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_sub_double.S b/runtime/interpreter/mterp/arm/op_sub_double.S
new file mode 100644
index 0000000..69bcc67
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_double.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_double_2addr.S b/runtime/interpreter/mterp/arm/op_sub_double_2addr.S
new file mode 100644
index 0000000..2ea59fe
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_double_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinopWide2addr.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_float.S b/runtime/interpreter/mterp/arm/op_sub_float.S
new file mode 100644
index 0000000..3f17a0d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_float.S
@@ -0,0 +1 @@
+%include "arm/fbinop.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_float_2addr.S b/runtime/interpreter/mterp/arm/op_sub_float_2addr.S
new file mode 100644
index 0000000..2f4aac4
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_float_2addr.S
@@ -0,0 +1 @@
+%include "arm/fbinop2addr.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_int.S b/runtime/interpreter/mterp/arm/op_sub_int.S
new file mode 100644
index 0000000..efb9e10
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"sub     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_int_2addr.S b/runtime/interpreter/mterp/arm/op_sub_int_2addr.S
new file mode 100644
index 0000000..4d3036b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"sub     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_long.S b/runtime/interpreter/mterp/arm/op_sub_long.S
new file mode 100644
index 0000000..6f1eb6e
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"subs    r0, r0, r2", "instr":"sbc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_sub_long_2addr.S b/runtime/interpreter/mterp/arm/op_sub_long_2addr.S
new file mode 100644
index 0000000..8e9da05
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_sub_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"subs    r0, r0, r2", "instr":"sbc     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_throw.S b/runtime/interpreter/mterp/arm/op_throw.S
new file mode 100644
index 0000000..be49ada
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_throw.S
@@ -0,0 +1,11 @@
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r1, r2                      @ r1<- vAA (exception object)
+    cmp      r1, #0                      @ null object?
+    beq      common_errNullObject        @ yes, throw an NPE instead
+    str      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ thread->exception<- obj
+    b        MterpException
diff --git a/runtime/interpreter/mterp/arm/op_unused_3e.S b/runtime/interpreter/mterp/arm/op_unused_3e.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_3e.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_3f.S b/runtime/interpreter/mterp/arm/op_unused_3f.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_3f.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_40.S b/runtime/interpreter/mterp/arm/op_unused_40.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_40.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_41.S b/runtime/interpreter/mterp/arm/op_unused_41.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_41.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_42.S b/runtime/interpreter/mterp/arm/op_unused_42.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_42.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_43.S b/runtime/interpreter/mterp/arm/op_unused_43.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_43.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_73.S b/runtime/interpreter/mterp/arm/op_unused_73.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_73.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_79.S b/runtime/interpreter/mterp/arm/op_unused_79.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_79.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_7a.S b/runtime/interpreter/mterp/arm/op_unused_7a.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_7a.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f3.S b/runtime/interpreter/mterp/arm/op_unused_f3.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f3.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f4.S b/runtime/interpreter/mterp/arm/op_unused_f4.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f4.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f5.S b/runtime/interpreter/mterp/arm/op_unused_f5.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f5.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f6.S b/runtime/interpreter/mterp/arm/op_unused_f6.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f6.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f7.S b/runtime/interpreter/mterp/arm/op_unused_f7.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f7.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f8.S b/runtime/interpreter/mterp/arm/op_unused_f8.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f8.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_f9.S b/runtime/interpreter/mterp/arm/op_unused_f9.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_f9.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fa.S b/runtime/interpreter/mterp/arm/op_unused_fa.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fa.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fb.S b/runtime/interpreter/mterp/arm/op_unused_fb.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fb.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fc.S b/runtime/interpreter/mterp/arm/op_unused_fc.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fc.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fd.S b/runtime/interpreter/mterp/arm/op_unused_fd.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fd.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fe.S b/runtime/interpreter/mterp/arm/op_unused_fe.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_fe.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_ff.S b/runtime/interpreter/mterp/arm/op_unused_ff.S
new file mode 100644
index 0000000..10948dc
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_unused_ff.S
@@ -0,0 +1 @@
+%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int.S b/runtime/interpreter/mterp/arm/op_ushr_int.S
new file mode 100644
index 0000000..a74361b
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S
new file mode 100644
index 0000000..f2d1d13
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
new file mode 100644
index 0000000..40a4435
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"preinstr":"and     r1, r1, #31", "instr":"mov     r0, r0, lsr r1"}
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
new file mode 100644
index 0000000..f64c861
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -0,0 +1,27 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
new file mode 100644
index 0000000..dbab08d
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -0,0 +1,22 @@
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_xor_int.S b/runtime/interpreter/mterp/arm/op_xor_int.S
new file mode 100644
index 0000000..fd7a4b7
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int.S
@@ -0,0 +1 @@
+%include "arm/binop.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_2addr.S b/runtime/interpreter/mterp/arm/op_xor_int_2addr.S
new file mode 100644
index 0000000..196a665
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_2addr.S
@@ -0,0 +1 @@
+%include "arm/binop2addr.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit16.S b/runtime/interpreter/mterp/arm/op_xor_int_lit16.S
new file mode 100644
index 0000000..39f2a47
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit16.S
@@ -0,0 +1 @@
+%include "arm/binopLit16.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_int_lit8.S b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
new file mode 100644
index 0000000..46bb712
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_int_lit8.S
@@ -0,0 +1 @@
+%include "arm/binopLit8.S" {"instr":"eor     r0, r0, r1"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_long.S b/runtime/interpreter/mterp/arm/op_xor_long.S
new file mode 100644
index 0000000..4f830d0
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_long.S
@@ -0,0 +1 @@
+%include "arm/binopWide.S" {"preinstr":"eor     r0, r0, r2", "instr":"eor     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/op_xor_long_2addr.S b/runtime/interpreter/mterp/arm/op_xor_long_2addr.S
new file mode 100644
index 0000000..5b5ed88
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/op_xor_long_2addr.S
@@ -0,0 +1 @@
+%include "arm/binopWide2addr.S" {"preinstr":"eor     r0, r0, r2", "instr":"eor     r1, r1, r3"}
diff --git a/runtime/interpreter/mterp/arm/unop.S b/runtime/interpreter/mterp/arm/unop.S
new file mode 100644
index 0000000..56518b5
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unop.S
@@ -0,0 +1,20 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    $preinstr                           @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopNarrower.S b/runtime/interpreter/mterp/arm/unopNarrower.S
new file mode 100644
index 0000000..a5fc027
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopNarrower.S
@@ -0,0 +1,23 @@
+%default {"preinstr":""}
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0/r1", where
+     * "result" is a 32-bit quantity in r0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     *
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for op_move.)
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopWide.S b/runtime/interpreter/mterp/arm/unopWide.S
new file mode 100644
index 0000000..7b8739c
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopWide.S
@@ -0,0 +1,21 @@
+%default {"preinstr":""}
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $preinstr                           @ optional op; may set condition codes
+    $instr                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
diff --git a/runtime/interpreter/mterp/arm/unopWider.S b/runtime/interpreter/mterp/arm/unopWider.S
new file mode 100644
index 0000000..657a395
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unopWider.S
@@ -0,0 +1,20 @@
+%default {"preinstr":""}
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    $preinstr                           @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    $instr                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
diff --git a/runtime/interpreter/mterp/arm/unused.S b/runtime/interpreter/mterp/arm/unused.S
new file mode 100644
index 0000000..ffa00be
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/unused.S
@@ -0,0 +1,4 @@
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
new file mode 100644
index 0000000..6e9ef55
--- /dev/null
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -0,0 +1,32 @@
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
new file mode 100644
index 0000000..436dcd2
--- /dev/null
+++ b/runtime/interpreter/mterp/config_arm
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARMv7-A targets.
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub arm/alt_stub.S
+
+# file header and basic definitions
+import arm/header.S
+
+# arch-specific entry point to interpreter
+import arm/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub arm/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start arm
+    # (override example:) op op_sub_float_2addr arm-vfp
+    # (fallback example:) op op_sub_float_2addr FALLBACK
+
+    # op op_nop FALLBACK
+    # op op_move FALLBACK
+    # op op_move_from16 FALLBACK
+    # op op_move_16 FALLBACK
+    # op op_move_wide FALLBACK
+    # op op_move_wide_from16 FALLBACK
+    # op op_move_wide_16 FALLBACK
+    # op op_move_object FALLBACK
+    # op op_move_object_from16 FALLBACK
+    # op op_move_object_16 FALLBACK
+    # op op_move_result FALLBACK
+    # op op_move_result_wide FALLBACK
+    # op op_move_result_object FALLBACK
+    # op op_move_exception FALLBACK
+    # op op_return_void FALLBACK
+    # op op_return FALLBACK
+    # op op_return_wide FALLBACK
+    # op op_return_object FALLBACK
+    # op op_const_4 FALLBACK
+    # op op_const_16 FALLBACK
+    # op op_const FALLBACK
+    # op op_const_high16 FALLBACK
+    # op op_const_wide_16 FALLBACK
+    # op op_const_wide_32 FALLBACK
+    # op op_const_wide FALLBACK
+    # op op_const_wide_high16 FALLBACK
+    # op op_const_string FALLBACK
+    # op op_const_string_jumbo FALLBACK
+    # op op_const_class FALLBACK
+    # op op_monitor_enter FALLBACK
+    # op op_monitor_exit FALLBACK
+    # op op_check_cast FALLBACK
+    # op op_instance_of FALLBACK
+    # op op_array_length FALLBACK
+    # op op_new_instance FALLBACK
+    # op op_new_array FALLBACK
+    # op op_filled_new_array FALLBACK
+    # op op_filled_new_array_range FALLBACK
+    # op op_fill_array_data FALLBACK
+    # op op_throw FALLBACK
+    # op op_goto FALLBACK
+    # op op_goto_16 FALLBACK
+    # op op_goto_32 FALLBACK
+    # op op_packed_switch FALLBACK
+    # op op_sparse_switch FALLBACK
+    # op op_cmpl_float FALLBACK
+    # op op_cmpg_float FALLBACK
+    # op op_cmpl_double FALLBACK
+    # op op_cmpg_double FALLBACK
+    # op op_cmp_long FALLBACK
+    # op op_if_eq FALLBACK
+    # op op_if_ne FALLBACK
+    # op op_if_lt FALLBACK
+    # op op_if_ge FALLBACK
+    # op op_if_gt FALLBACK
+    # op op_if_le FALLBACK
+    # op op_if_eqz FALLBACK
+    # op op_if_nez FALLBACK
+    # op op_if_ltz FALLBACK
+    # op op_if_gez FALLBACK
+    # op op_if_gtz FALLBACK
+    # op op_if_lez FALLBACK
+    # op op_unused_3e FALLBACK
+    # op op_unused_3f FALLBACK
+    # op op_unused_40 FALLBACK
+    # op op_unused_41 FALLBACK
+    # op op_unused_42 FALLBACK
+    # op op_unused_43 FALLBACK
+    # op op_aget FALLBACK
+    # op op_aget_wide FALLBACK
+    # op op_aget_object FALLBACK
+    # op op_aget_boolean FALLBACK
+    # op op_aget_byte FALLBACK
+    # op op_aget_char FALLBACK
+    # op op_aget_short FALLBACK
+    # op op_aput FALLBACK
+    # op op_aput_wide FALLBACK
+    # op op_aput_object FALLBACK
+    # op op_aput_boolean FALLBACK
+    # op op_aput_byte FALLBACK
+    # op op_aput_char FALLBACK
+    # op op_aput_short FALLBACK
+    # op op_iget FALLBACK
+    # op op_iget_wide FALLBACK
+    # op op_iget_object FALLBACK
+    # op op_iget_boolean FALLBACK
+    # op op_iget_byte FALLBACK
+    # op op_iget_char FALLBACK
+    # op op_iget_short FALLBACK
+    # op op_iput FALLBACK
+    # op op_iput_wide FALLBACK
+    # op op_iput_object FALLBACK
+    # op op_iput_boolean FALLBACK
+    # op op_iput_byte FALLBACK
+    # op op_iput_char FALLBACK
+    # op op_iput_short FALLBACK
+    # op op_sget FALLBACK
+    # op op_sget_wide FALLBACK
+    # op op_sget_object FALLBACK
+    # op op_sget_boolean FALLBACK
+    # op op_sget_byte FALLBACK
+    # op op_sget_char FALLBACK
+    # op op_sget_short FALLBACK
+    # op op_sput FALLBACK
+    # op op_sput_wide FALLBACK
+    # op op_sput_object FALLBACK
+    # op op_sput_boolean FALLBACK
+    # op op_sput_byte FALLBACK
+    # op op_sput_char FALLBACK
+    # op op_sput_short FALLBACK
+    # op op_invoke_virtual FALLBACK
+    # op op_invoke_super FALLBACK
+    # op op_invoke_direct FALLBACK
+    # op op_invoke_static FALLBACK
+    # op op_invoke_interface FALLBACK
+    # op op_return_void_no_barrier FALLBACK
+    # op op_invoke_virtual_range FALLBACK
+    # op op_invoke_super_range FALLBACK
+    # op op_invoke_direct_range FALLBACK
+    # op op_invoke_static_range FALLBACK
+    # op op_invoke_interface_range FALLBACK
+    # op op_unused_79 FALLBACK
+    # op op_unused_7a FALLBACK
+    # op op_neg_int FALLBACK
+    # op op_not_int FALLBACK
+    # op op_neg_long FALLBACK
+    # op op_not_long FALLBACK
+    # op op_neg_float FALLBACK
+    # op op_neg_double FALLBACK
+    # op op_int_to_long FALLBACK
+    # op op_int_to_float FALLBACK
+    # op op_int_to_double FALLBACK
+    # op op_long_to_int FALLBACK
+    # op op_long_to_float FALLBACK
+    # op op_long_to_double FALLBACK
+    # op op_float_to_int FALLBACK
+    # op op_float_to_long FALLBACK
+    # op op_float_to_double FALLBACK
+    # op op_double_to_int FALLBACK
+    # op op_double_to_long FALLBACK
+    # op op_double_to_float FALLBACK
+    # op op_int_to_byte FALLBACK
+    # op op_int_to_char FALLBACK
+    # op op_int_to_short FALLBACK
+    # op op_add_int FALLBACK
+    # op op_sub_int FALLBACK
+    # op op_mul_int FALLBACK
+    # op op_div_int FALLBACK
+    # op op_rem_int FALLBACK
+    # op op_and_int FALLBACK
+    # op op_or_int FALLBACK
+    # op op_xor_int FALLBACK
+    # op op_shl_int FALLBACK
+    # op op_shr_int FALLBACK
+    # op op_ushr_int FALLBACK
+    # op op_add_long FALLBACK
+    # op op_sub_long FALLBACK
+    # op op_mul_long FALLBACK
+    # op op_div_long FALLBACK
+    # op op_rem_long FALLBACK
+    # op op_and_long FALLBACK
+    # op op_or_long FALLBACK
+    # op op_xor_long FALLBACK
+    # op op_shl_long FALLBACK
+    # op op_shr_long FALLBACK
+    # op op_ushr_long FALLBACK
+    # op op_add_float FALLBACK
+    # op op_sub_float FALLBACK
+    # op op_mul_float FALLBACK
+    # op op_div_float FALLBACK
+    # op op_rem_float FALLBACK
+    # op op_add_double FALLBACK
+    # op op_sub_double FALLBACK
+    # op op_mul_double FALLBACK
+    # op op_div_double FALLBACK
+    # op op_rem_double FALLBACK
+    # op op_add_int_2addr FALLBACK
+    # op op_sub_int_2addr FALLBACK
+    # op op_mul_int_2addr FALLBACK
+    # op op_div_int_2addr FALLBACK
+    # op op_rem_int_2addr FALLBACK
+    # op op_and_int_2addr FALLBACK
+    # op op_or_int_2addr FALLBACK
+    # op op_xor_int_2addr FALLBACK
+    # op op_shl_int_2addr FALLBACK
+    # op op_shr_int_2addr FALLBACK
+    # op op_ushr_int_2addr FALLBACK
+    # op op_add_long_2addr FALLBACK
+    # op op_sub_long_2addr FALLBACK
+    # op op_mul_long_2addr FALLBACK
+    # op op_div_long_2addr FALLBACK
+    # op op_rem_long_2addr FALLBACK
+    # op op_and_long_2addr FALLBACK
+    # op op_or_long_2addr FALLBACK
+    # op op_xor_long_2addr FALLBACK
+    # op op_shl_long_2addr FALLBACK
+    # op op_shr_long_2addr FALLBACK
+    # op op_ushr_long_2addr FALLBACK
+    # op op_add_float_2addr FALLBACK
+    # op op_sub_float_2addr FALLBACK
+    # op op_mul_float_2addr FALLBACK
+    # op op_div_float_2addr FALLBACK
+    # op op_rem_float_2addr FALLBACK
+    # op op_add_double_2addr FALLBACK
+    # op op_sub_double_2addr FALLBACK
+    # op op_mul_double_2addr FALLBACK
+    # op op_div_double_2addr FALLBACK
+    # op op_rem_double_2addr FALLBACK
+    # op op_add_int_lit16 FALLBACK
+    # op op_rsub_int FALLBACK
+    # op op_mul_int_lit16 FALLBACK
+    # op op_div_int_lit16 FALLBACK
+    # op op_rem_int_lit16 FALLBACK
+    # op op_and_int_lit16 FALLBACK
+    # op op_or_int_lit16 FALLBACK
+    # op op_xor_int_lit16 FALLBACK
+    # op op_add_int_lit8 FALLBACK
+    # op op_rsub_int_lit8 FALLBACK
+    # op op_mul_int_lit8 FALLBACK
+    # op op_div_int_lit8 FALLBACK
+    # op op_rem_int_lit8 FALLBACK
+    # op op_and_int_lit8 FALLBACK
+    # op op_or_int_lit8 FALLBACK
+    # op op_xor_int_lit8 FALLBACK
+    # op op_shl_int_lit8 FALLBACK
+    # op op_shr_int_lit8 FALLBACK
+    # op op_ushr_int_lit8 FALLBACK
+    # op op_iget_quick FALLBACK
+    # op op_iget_wide_quick FALLBACK
+    # op op_iget_object_quick FALLBACK
+    # op op_iput_quick FALLBACK
+    # op op_iput_wide_quick FALLBACK
+    # op op_iput_object_quick FALLBACK
+    # op op_invoke_virtual_quick FALLBACK
+    # op op_invoke_virtual_range_quick FALLBACK
+    # op op_iput_boolean_quick FALLBACK
+    # op op_iput_byte_quick FALLBACK
+    # op op_iput_char_quick FALLBACK
+    # op op_iput_short_quick FALLBACK
+    # op op_iget_boolean_quick FALLBACK
+    # op op_iget_byte_quick FALLBACK
+    # op op_iget_char_quick FALLBACK
+    # op op_iget_short_quick FALLBACK
+    op op_invoke_lambda FALLBACK
+    # op op_unused_f4 FALLBACK
+    op op_capture_variable FALLBACK
+    op op_create_lambda FALLBACK
+    op op_liberate_variable FALLBACK
+    op op_box_lambda FALLBACK
+    op op_unbox_lambda FALLBACK
+    # op op_unused_fa FALLBACK
+    # op op_unused_fb FALLBACK
+    # op op_unused_fc FALLBACK
+    # op op_unused_fd FALLBACK
+    # op op_unused_fe FALLBACK
+    # op op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import arm/footer.S
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
new file mode 100644
index 0000000..ef3c721
--- /dev/null
+++ b/runtime/interpreter/mterp/config_arm64
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARM64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub arm64/alt_stub.S
+
+# file header and basic definitions
+import arm64/header.S
+
+# arch-specific entry point to interpreter
+import arm64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub arm64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start arm64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import arm64/footer.S
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
new file mode 100644
index 0000000..d1221f7
--- /dev/null
+++ b/runtime/interpreter/mterp/config_mips
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for MIPS_32
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub mips/alt_stub.S
+
+# file header and basic definitions
+import mips/header.S
+
+# arch-specific entry point to interpreter
+import mips/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub mips/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start mips
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import mips/footer.S
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
new file mode 100644
index 0000000..f804ce5
--- /dev/null
+++ b/runtime/interpreter/mterp/config_mips64
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for MIPS_64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub mips64/alt_stub.S
+
+# file header and basic definitions
+import mips64/header.S
+
+# arch-specific entry point to interpreter
+import mips64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub mips64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start mips64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import mips64/footer.S
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
new file mode 100644
index 0000000..277817d
--- /dev/null
+++ b/runtime/interpreter/mterp/config_x86
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for X86
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub x86/alt_stub.S
+
+# file header and basic definitions
+import x86/header.S
+
+# arch-specific entry point to interpreter
+import x86/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub x86/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start x86
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import x86/footer.S
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
new file mode 100644
index 0000000..a002dc2
--- /dev/null
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -0,0 +1,298 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for X86_64
+#
+
+handler-style computed-goto
+handler-size 128
+
+# source for alternate entry stub
+asm-alt-stub x86_64/alt_stub.S
+
+# file header and basic definitions
+import x86_64/header.S
+
+# arch-specific entry point to interpreter
+import x86_64/entry.S
+
+# Stub to switch to alternate interpreter
+fallback-stub x86_64/fallback.S
+
+# opcode list; argument to op-start is default directory
+op-start x86_64
+    # (override example:) op OP_SUB_FLOAT_2ADDR arm-vfp
+    # (fallback example:) op OP_SUB_FLOAT_2ADDR FALLBACK
+
+    op op_nop FALLBACK
+    op op_move FALLBACK
+    op op_move_from16 FALLBACK
+    op op_move_16 FALLBACK
+    op op_move_wide FALLBACK
+    op op_move_wide_from16 FALLBACK
+    op op_move_wide_16 FALLBACK
+    op op_move_object FALLBACK
+    op op_move_object_from16 FALLBACK
+    op op_move_object_16 FALLBACK
+    op op_move_result FALLBACK
+    op op_move_result_wide FALLBACK
+    op op_move_result_object FALLBACK
+    op op_move_exception FALLBACK
+    op op_return_void FALLBACK
+    op op_return FALLBACK
+    op op_return_wide FALLBACK
+    op op_return_object FALLBACK
+    op op_const_4 FALLBACK
+    op op_const_16 FALLBACK
+    op op_const FALLBACK
+    op op_const_high16 FALLBACK
+    op op_const_wide_16 FALLBACK
+    op op_const_wide_32 FALLBACK
+    op op_const_wide FALLBACK
+    op op_const_wide_high16 FALLBACK
+    op op_const_string FALLBACK
+    op op_const_string_jumbo FALLBACK
+    op op_const_class FALLBACK
+    op op_monitor_enter FALLBACK
+    op op_monitor_exit FALLBACK
+    op op_check_cast FALLBACK
+    op op_instance_of FALLBACK
+    op op_array_length FALLBACK
+    op op_new_instance FALLBACK
+    op op_new_array FALLBACK
+    op op_filled_new_array FALLBACK
+    op op_filled_new_array_range FALLBACK
+    op op_fill_array_data FALLBACK
+    op op_throw FALLBACK
+    op op_goto FALLBACK
+    op op_goto_16 FALLBACK
+    op op_goto_32 FALLBACK
+    op op_packed_switch FALLBACK
+    op op_sparse_switch FALLBACK
+    op op_cmpl_float FALLBACK
+    op op_cmpg_float FALLBACK
+    op op_cmpl_double FALLBACK
+    op op_cmpg_double FALLBACK
+    op op_cmp_long FALLBACK
+    op op_if_eq FALLBACK
+    op op_if_ne FALLBACK
+    op op_if_lt FALLBACK
+    op op_if_ge FALLBACK
+    op op_if_gt FALLBACK
+    op op_if_le FALLBACK
+    op op_if_eqz FALLBACK
+    op op_if_nez FALLBACK
+    op op_if_ltz FALLBACK
+    op op_if_gez FALLBACK
+    op op_if_gtz FALLBACK
+    op op_if_lez FALLBACK
+    op_unused_3e FALLBACK
+    op_unused_3f FALLBACK
+    op_unused_40 FALLBACK
+    op_unused_41 FALLBACK
+    op_unused_42 FALLBACK
+    op_unused_43 FALLBACK
+    op op_aget FALLBACK
+    op op_aget_wide FALLBACK
+    op op_aget_object FALLBACK
+    op op_aget_boolean FALLBACK
+    op op_aget_byte FALLBACK
+    op op_aget_char FALLBACK
+    op op_aget_short FALLBACK
+    op op_aput FALLBACK
+    op op_aput_wide FALLBACK
+    op op_aput_object FALLBACK
+    op op_aput_boolean FALLBACK
+    op op_aput_byte FALLBACK
+    op op_aput_char FALLBACK
+    op op_aput_short FALLBACK
+    op op_iget FALLBACK
+    op op_iget_wide FALLBACK
+    op op_iget_object FALLBACK
+    op op_iget_boolean FALLBACK
+    op op_iget_byte FALLBACK
+    op op_iget_char FALLBACK
+    op op_iget_short FALLBACK
+    op op_iput FALLBACK
+    op op_iput_wide FALLBACK
+    op op_iput_object FALLBACK
+    op op_iput_boolean FALLBACK
+    op op_iput_byte FALLBACK
+    op op_iput_char FALLBACK
+    op op_iput_short FALLBACK
+    op op_sget FALLBACK
+    op op_sget_wide FALLBACK
+    op op_sget_object FALLBACK
+    op op_sget_boolean FALLBACK
+    op op_sget_byte FALLBACK
+    op op_sget_char FALLBACK
+    op op_sget_short FALLBACK
+    op op_sput FALLBACK
+    op op_sput_wide FALLBACK
+    op op_sput_object FALLBACK
+    op op_sput_boolean FALLBACK
+    op op_sput_byte FALLBACK
+    op op_sput_char FALLBACK
+    op op_sput_short FALLBACK
+    op op_invoke_virtual FALLBACK
+    op op_invoke_super FALLBACK
+    op op_invoke_direct FALLBACK
+    op op_invoke_static FALLBACK
+    op op_invoke_interface FALLBACK
+    op op_return_void_no_barrier FALLBACK
+    op op_invoke_virtual_range FALLBACK
+    op op_invoke_super_range FALLBACK
+    op op_invoke_direct_range FALLBACK
+    op op_invoke_static_range FALLBACK
+    op op_invoke_interface_range FALLBACK
+    op_unused_79 FALLBACK
+    op_unused_7a FALLBACK
+    op op_neg_int FALLBACK
+    op op_not_int FALLBACK
+    op op_neg_long FALLBACK
+    op op_not_long FALLBACK
+    op op_neg_float FALLBACK
+    op op_neg_double FALLBACK
+    op op_int_to_long FALLBACK
+    op op_int_to_float FALLBACK
+    op op_int_to_double FALLBACK
+    op op_long_to_int FALLBACK
+    op op_long_to_float FALLBACK
+    op op_long_to_double FALLBACK
+    op op_float_to_int FALLBACK
+    op op_float_to_long FALLBACK
+    op op_float_to_double FALLBACK
+    op op_double_to_int FALLBACK
+    op op_double_to_long FALLBACK
+    op op_double_to_float FALLBACK
+    op op_int_to_byte FALLBACK
+    op op_int_to_char FALLBACK
+    op op_int_to_short FALLBACK
+    op op_add_int FALLBACK
+    op op_sub_int FALLBACK
+    op op_mul_int FALLBACK
+    op op_div_int FALLBACK
+    op op_rem_int FALLBACK
+    op op_and_int FALLBACK
+    op op_or_int FALLBACK
+    op op_xor_int FALLBACK
+    op op_shl_int FALLBACK
+    op op_shr_int FALLBACK
+    op op_ushr_int FALLBACK
+    op op_add_long FALLBACK
+    op op_sub_long FALLBACK
+    op op_mul_long FALLBACK
+    op op_div_long FALLBACK
+    op op_rem_long FALLBACK
+    op op_and_long FALLBACK
+    op op_or_long FALLBACK
+    op op_xor_long FALLBACK
+    op op_shl_long FALLBACK
+    op op_shr_long FALLBACK
+    op op_ushr_long FALLBACK
+    op op_add_float FALLBACK
+    op op_sub_float FALLBACK
+    op op_mul_float FALLBACK
+    op op_div_float FALLBACK
+    op op_rem_float FALLBACK
+    op op_add_double FALLBACK
+    op op_sub_double FALLBACK
+    op op_mul_double FALLBACK
+    op op_div_double FALLBACK
+    op op_rem_double FALLBACK
+    op op_add_int_2addr FALLBACK
+    op op_sub_int_2addr FALLBACK
+    op op_mul_int_2addr FALLBACK
+    op op_div_int_2addr FALLBACK
+    op op_rem_int_2addr FALLBACK
+    op op_and_int_2addr FALLBACK
+    op op_or_int_2addr FALLBACK
+    op op_xor_int_2addr FALLBACK
+    op op_shl_int_2addr FALLBACK
+    op op_shr_int_2addr FALLBACK
+    op op_ushr_int_2addr FALLBACK
+    op op_add_long_2addr FALLBACK
+    op op_sub_long_2addr FALLBACK
+    op op_mul_long_2addr FALLBACK
+    op op_div_long_2addr FALLBACK
+    op op_rem_long_2addr FALLBACK
+    op op_and_long_2addr FALLBACK
+    op op_or_long_2addr FALLBACK
+    op op_xor_long_2addr FALLBACK
+    op op_shl_long_2addr FALLBACK
+    op op_shr_long_2addr FALLBACK
+    op op_ushr_long_2addr FALLBACK
+    op op_add_float_2addr FALLBACK
+    op op_sub_float_2addr FALLBACK
+    op op_mul_float_2addr FALLBACK
+    op op_div_float_2addr FALLBACK
+    op op_rem_float_2addr FALLBACK
+    op op_add_double_2addr FALLBACK
+    op op_sub_double_2addr FALLBACK
+    op op_mul_double_2addr FALLBACK
+    op op_div_double_2addr FALLBACK
+    op op_rem_double_2addr FALLBACK
+    op op_add_int_lit16 FALLBACK
+    op op_rsub_int FALLBACK
+    op op_mul_int_lit16 FALLBACK
+    op op_div_int_lit16 FALLBACK
+    op op_rem_int_lit16 FALLBACK
+    op op_and_int_lit16 FALLBACK
+    op op_or_int_lit16 FALLBACK
+    op op_xor_int_lit16 FALLBACK
+    op op_add_int_lit8 FALLBACK
+    op op_rsub_int_lit8 FALLBACK
+    op op_mul_int_lit8 FALLBACK
+    op op_div_int_lit8 FALLBACK
+    op op_rem_int_lit8 FALLBACK
+    op op_and_int_lit8 FALLBACK
+    op op_or_int_lit8 FALLBACK
+    op op_xor_int_lit8 FALLBACK
+    op op_shl_int_lit8 FALLBACK
+    op op_shr_int_lit8 FALLBACK
+    op op_ushr_int_lit8 FALLBACK
+    op op_iget_quick FALLBACK
+    op op_iget_wide_quick FALLBACK
+    op op_iget_object_quick FALLBACK
+    op op_iput_quick FALLBACK
+    op op_iput_wide_quick FALLBACK
+    op op_iput_object_quick FALLBACK
+    op op_invoke_virtual_quick FALLBACK
+    op op_invoke_virtual_range_quick FALLBACK
+    op op_iput_boolean_quick FALLBACK
+    op op_iput_byte_quick FALLBACK
+    op op_iput_char_quick FALLBACK
+    op op_iput_short_quick FALLBACK
+    op op_iget_boolean_quick FALLBACK
+    op op_iget_byte_quick FALLBACK
+    op op_iget_char_quick FALLBACK
+    op op_iget_short_quick FALLBACK
+    op_unused_f3 FALLBACK
+    op_unused_f4 FALLBACK
+    op_unused_f5 FALLBACK
+    op_unused_f6 FALLBACK
+    op_unused_f7 FALLBACK
+    op_unused_f8 FALLBACK
+    op_unused_f9 FALLBACK
+    op_unused_fa FALLBACK
+    op_unused_fb FALLBACK
+    op_unused_fc FALLBACK
+    op_unused_fd FALLBACK
+    op_unused_fe FALLBACK
+    op_unused_ff FALLBACK
+op-end
+
+# common subroutines for asm
+import x86_64/footer.S
diff --git a/runtime/interpreter/mterp/gen_mterp.py b/runtime/interpreter/mterp/gen_mterp.py
new file mode 100755
index 0000000..f56d8bd
--- /dev/null
+++ b/runtime/interpreter/mterp/gen_mterp.py
@@ -0,0 +1,602 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Using instructions from an architecture-specific config file, generate C
+# and assembly source files for the Dalvik interpreter.
+#
+
+import sys, string, re, time
+from string import Template
+
+interp_defs_file = "../../dex_instruction_list.h" # need opcode list
+kNumPackedOpcodes = 256
+
+splitops = False
+verbose = False
+handler_size_bits = -1000
+handler_size_bytes = -1000
+in_op_start = 0             # 0=not started, 1=started, 2=ended
+in_alt_op_start = 0         # 0=not started, 1=started, 2=ended
+default_op_dir = None
+default_alt_stub = None
+opcode_locations = {}
+alt_opcode_locations = {}
+asm_stub_text = []
+fallback_stub_text = []
+label_prefix = ".L"         # use ".L" to hide labels from gdb
+alt_label_prefix = ".L_ALT" # use ".L" to hide labels from gdb
+style = None                # interpreter style
+generate_alt_table = False
+
+# Exception class.
+class DataParseError(SyntaxError):
+    "Failure when parsing data file"
+
+#
+# Set any omnipresent substitution values.
+#
+def getGlobalSubDict():
+    return { "handler_size_bits":handler_size_bits,
+             "handler_size_bytes":handler_size_bytes }
+
+#
+# Parse arch config file --
+# Set interpreter style.
+#
+def setHandlerStyle(tokens):
+    global style
+    if len(tokens) != 2:
+        raise DataParseError("handler-style requires one argument")
+    style = tokens[1]
+    if style != "computed-goto":
+        raise DataParseError("handler-style (%s) invalid" % style)
+
+#
+# Parse arch config file --
+# Set handler_size_bytes to the value of tokens[1], and handler_size_bits to
+# log2(handler_size_bytes).  Throws an exception if "bytes" is not 0 or
+# a power of two.
+#
+def setHandlerSize(tokens):
+    global handler_size_bits, handler_size_bytes
+    if style != "computed-goto":
+        print "Warning: handler-size valid only for computed-goto interpreters"
+    if len(tokens) != 2:
+        raise DataParseError("handler-size requires one argument")
+    if handler_size_bits != -1000:
+        raise DataParseError("handler-size may only be set once")
+
+    # compute log2(n), and make sure n is 0 or a power of 2
+    handler_size_bytes = bytes = int(tokens[1])
+    bits = -1
+    while bytes > 0:
+        bytes //= 2     # halve with truncating division
+        bits += 1
+
+    if handler_size_bytes == 0 or handler_size_bytes != (1 << bits):
+        raise DataParseError("handler-size (%d) must be power of 2" \
+                % orig_bytes)
+    handler_size_bits = bits
+
+#
+# Parse arch config file --
+# Copy a file in to asm output file.
+#
+def importFile(tokens):
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    source = tokens[1]
+    if source.endswith(".S"):
+        appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
+    else:
+        raise DataParseError("don't know how to import %s (expecting .cpp/.S)"
+                % source)
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setAsmStub(tokens):
+    global asm_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        asm_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load asm-stub: %s" % str(err))
+    stub_fp.close()
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setFallbackStub(tokens):
+    global fallback_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        fallback_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load fallback-stub: %s" % str(err))
+    stub_fp.close()
+#
+# Parse arch config file --
+# Record location of default alt stub
+#
+def setAsmAltStub(tokens):
+    global default_alt_stub, generate_alt_table
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    default_alt_stub = tokens[1]
+    generate_alt_table = True
+
+#
+# Parse arch config file --
+# Start of opcode list.
+#
+def opStart(tokens):
+    global in_op_start
+    global default_op_dir
+    if len(tokens) != 2:
+        raise DataParseError("opStart takes a directory name argument")
+    if in_op_start != 0:
+        raise DataParseError("opStart can only be specified once")
+    default_op_dir = tokens[1]
+    in_op_start = 1
+
+#
+# Parse arch config file --
+# Set location of a single alt opcode's source file.
+#
+def altEntry(tokens):
+    global generate_alt_table
+    if len(tokens) != 3:
+        raise DataParseError("alt requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("alt statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    if alt_opcode_locations.has_key(tokens[1]):
+        print "Note: alt overrides earlier %s (%s -> %s)" \
+                % (tokens[1], alt_opcode_locations[tokens[1]], tokens[2])
+    alt_opcode_locations[tokens[1]] = tokens[2]
+    generate_alt_table = True
+
+#
+# Parse arch config file --
+# Set location of a single opcode's source file.
+#
+def opEntry(tokens):
+    #global opcode_locations
+    if len(tokens) != 3:
+        raise DataParseError("op requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("op statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    if opcode_locations.has_key(tokens[1]):
+        print "Note: op overrides earlier %s (%s -> %s)" \
+                % (tokens[1], opcode_locations[tokens[1]], tokens[2])
+    opcode_locations[tokens[1]] = tokens[2]
+
+#
+# Parse arch config file --
+# End of opcode list; emit instruction blocks.
+#
+def opEnd(tokens):
+    global in_op_start
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if in_op_start != 1:
+        raise DataParseError("opEnd must follow opStart, and only appear once")
+    in_op_start = 2
+
+    loadAndEmitOpcodes()
+    if splitops == False:
+        if generate_alt_table:
+            loadAndEmitAltOpcodes()
+
+def genaltop(tokens):
+    if in_op_start != 2:
+       raise DataParseError("alt-op can be specified only after op-end")
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if generate_alt_table:
+        loadAndEmitAltOpcodes()
+
+#
+# Extract an ordered list of instructions from the VM sources.  We use the
+# "goto table" definition macro, which has exactly kNumPackedOpcodes
+# entries.
+#
+def getOpcodeList():
+    opcodes = []
+    opcode_fp = open(interp_defs_file)
+    opcode_re = re.compile(r"^\s*V\((....), (\w+),.*", re.DOTALL)
+    for line in opcode_fp:
+        match = opcode_re.match(line)
+        if not match:
+            continue
+        opcodes.append("op_" + match.group(2).lower())
+    opcode_fp.close()
+
+    if len(opcodes) != kNumPackedOpcodes:
+        print "ERROR: found %d opcodes in Interp.h (expected %d)" \
+                % (len(opcodes), kNumPackedOpcodes)
+        raise SyntaxError, "bad opcode count"
+    return opcodes
+
+def emitAlign():
+    if style == "computed-goto":
+        asm_fp.write("    .balign %d\n" % handler_size_bytes)
+
+#
+# Load and emit opcodes for all kNumPackedOpcodes instructions.
+#
+def loadAndEmitOpcodes():
+    sister_list = []
+    assert len(opcodes) == kNumPackedOpcodes
+    need_dummy_start = False
+    start_label = "artMterpAsmInstructionStart"
+    end_label = "artMterpAsmInstructionEnd"
+
+    # point MterpAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global %s\n" % start_label)
+    asm_fp.write("    .type   %s, %%function\n" % start_label)
+    asm_fp.write("%s = " % start_label + label_prefix + "_op_nop\n")
+    asm_fp.write("    .text\n\n")
+
+    for i in xrange(kNumPackedOpcodes):
+        op = opcodes[i]
+
+        if opcode_locations.has_key(op):
+            location = opcode_locations[op]
+        else:
+            location = default_op_dir
+
+        if location == "FALLBACK":
+            emitFallback(i)
+        else:
+            loadAndEmitAsm(location, i, sister_list)
+
+    # For a 100% C implementation, there are no asm handlers or stubs.  We
+    # need to have the MterpAsmInstructionStart label point at op_nop, and it's
+    # too annoying to try to slide it in after the alignment psuedo-op, so
+    # we take the low road and just emit a dummy op_nop here.
+    if need_dummy_start:
+        emitAlign()
+        asm_fp.write(label_prefix + "_op_nop:   /* dummy */\n");
+
+    emitAlign()
+    asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
+    asm_fp.write("    .global %s\n" % end_label)
+    asm_fp.write("%s:\n" % end_label)
+
+    if style == "computed-goto":
+        emitSectionComment("Sister implementations", asm_fp)
+        asm_fp.write("    .global artMterpAsmSisterStart\n")
+        asm_fp.write("    .type   artMterpAsmSisterStart, %function\n")
+        asm_fp.write("    .text\n")
+        asm_fp.write("    .balign 4\n")
+        asm_fp.write("artMterpAsmSisterStart:\n")
+        asm_fp.writelines(sister_list)
+        asm_fp.write("\n    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart\n")
+        asm_fp.write("    .global artMterpAsmSisterEnd\n")
+        asm_fp.write("artMterpAsmSisterEnd:\n\n")
+
+#
+# Load an alternate entry stub
+#
+def loadAndEmitAltStub(source, opindex):
+    op = opcodes[opindex]
+    if verbose:
+        print " alt emit %s --> stub" % source
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+
+    emitAsmHeader(asm_fp, dict, alt_label_prefix)
+    appendSourceFile(source, dict, asm_fp, None)
+
+#
+# Load and emit alternate opcodes for all kNumPackedOpcodes instructions.
+#
+def loadAndEmitAltOpcodes():
+    assert len(opcodes) == kNumPackedOpcodes
+    start_label = "artMterpAsmAltInstructionStart"
+    end_label = "artMterpAsmAltInstructionEnd"
+
+    # point MterpAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global %s\n" % start_label)
+    asm_fp.write("    .type   %s, %%function\n" % start_label)
+    asm_fp.write("    .text\n\n")
+    asm_fp.write("%s = " % start_label + label_prefix + "_ALT_op_nop\n")
+
+    for i in xrange(kNumPackedOpcodes):
+        op = opcodes[i]
+        if alt_opcode_locations.has_key(op):
+            source = "%s/alt_%s.S" % (alt_opcode_locations[op], op)
+        else:
+            source = default_alt_stub
+        loadAndEmitAltStub(source, i)
+
+    emitAlign()
+    asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
+    asm_fp.write("    .global %s\n" % end_label)
+    asm_fp.write("%s:\n" % end_label)
+
+#
+# Load an assembly fragment and emit it.
+#
+def loadAndEmitAsm(location, opindex, sister_list):
+    op = opcodes[opindex]
+    source = "%s/%s.S" % (location, op)
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    if verbose:
+        print " emit %s --> asm" % source
+
+    emitAsmHeader(asm_fp, dict, label_prefix)
+    appendSourceFile(source, dict, asm_fp, sister_list)
+
+#
+# Emit fallback fragment
+#
+def emitFallback(opindex):
+    op = opcodes[opindex]
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    emitAsmHeader(asm_fp, dict, label_prefix)
+    for line in fallback_stub_text:
+        asm_fp.write(line)
+    asm_fp.write("\n")
+
+#
+# Output the alignment directive and label for an assembly piece.
+#
+def emitAsmHeader(outfp, dict, prefix):
+    outfp.write("/* ------------------------------ */\n")
+    # The alignment directive ensures that the handler occupies
+    # at least the correct amount of space.  We don't try to deal
+    # with overflow here.
+    emitAlign()
+    # Emit a label so that gdb will say the right thing.  We prepend an
+    # underscore so the symbol name doesn't clash with the Opcode enum.
+    outfp.write(prefix + "_%(opcode)s: /* 0x%(opnum)02x */\n" % dict)
+
+#
+# Output a generic instruction stub that updates the "glue" struct and
+# calls the C implementation.
+#
+def emitAsmStub(outfp, dict):
+    emitAsmHeader(outfp, dict, label_prefix)
+    for line in asm_stub_text:
+        templ = Template(line)
+        outfp.write(templ.substitute(dict))
+
+#
+# Append the file specified by "source" to the open "outfp".  Each line will
+# be template-replaced using the substitution dictionary "dict".
+#
+# If the first line of the file starts with "%" it is taken as a directive.
+# A "%include" line contains a filename and, optionally, a Python-style
+# dictionary declaration with substitution strings.  (This is implemented
+# with recursion.)
+#
+# If "sister_list" is provided, and we find a line that contains only "&",
+# all subsequent lines from the file will be appended to sister_list instead
+# of copied to the output.
+#
+# This may modify "dict".
+#
+def appendSourceFile(source, dict, outfp, sister_list):
+    outfp.write("/* File: %s */\n" % source)
+    infp = open(source, "r")
+    in_sister = False
+    for line in infp:
+        if line.startswith("%include"):
+            # Parse the "include" line
+            tokens = line.strip().split(' ', 2)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%include in %s" % source)
+
+            alt_source = tokens[1].strip("\"")
+            if alt_source == source:
+                raise DataParseError("self-referential %%include in %s"
+                        % source)
+
+            new_dict = dict.copy()
+            if len(tokens) == 3:
+                new_dict.update(eval(tokens[2]))
+            #print " including src=%s dict=%s" % (alt_source, new_dict)
+            appendSourceFile(alt_source, new_dict, outfp, sister_list)
+            continue
+
+        elif line.startswith("%default"):
+            # copy keywords into dictionary
+            tokens = line.strip().split(' ', 1)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%default in %s" % source)
+            defaultValues = eval(tokens[1])
+            for entry in defaultValues:
+                dict.setdefault(entry, defaultValues[entry])
+            continue
+
+        elif line.startswith("%break") and sister_list != None:
+            # allow more than one %break, ignoring all following the first
+            if style == "computed-goto" and not in_sister:
+                in_sister = True
+                sister_list.append("\n/* continuation for %(opcode)s */\n"%dict)
+            continue
+
+        # perform keyword substitution if a dictionary was provided
+        if dict != None:
+            templ = Template(line)
+            try:
+                subline = templ.substitute(dict)
+            except KeyError, err:
+                raise DataParseError("keyword substitution failed in %s: %s"
+                        % (source, str(err)))
+            except:
+                print "ERROR: substitution failed: " + line
+                raise
+        else:
+            subline = line
+
+        # write output to appropriate file
+        if in_sister:
+            sister_list.append(subline)
+        else:
+            outfp.write(subline)
+    outfp.write("\n")
+    infp.close()
+
+#
+# Emit a C-style section header comment.
+#
+def emitSectionComment(str, fp):
+    equals = "========================================" \
+             "==================================="
+
+    fp.write("\n/*\n * %s\n *  %s\n * %s\n */\n" %
+        (equals, str, equals))
+
+
+#
+# ===========================================================================
+# "main" code
+#
+
+#
+# Check args.
+#
+if len(sys.argv) != 3:
+    print "Usage: %s target-arch output-dir" % sys.argv[0]
+    sys.exit(2)
+
+target_arch = sys.argv[1]
+output_dir = sys.argv[2]
+
+#
+# Extract opcode list.
+#
+opcodes = getOpcodeList()
+#for op in opcodes:
+#    print "  %s" % op
+
+#
+# Open config file.
+#
+try:
+    config_fp = open("config_%s" % target_arch)
+except:
+    print "Unable to open config file 'config_%s'" % target_arch
+    sys.exit(1)
+
+#
+# Open and prepare output files.
+#
+try:
+    asm_fp = open("%s/mterp_%s.S" % (output_dir, target_arch), "w")
+except:
+    print "Unable to open output files"
+    print "Make sure directory '%s' exists and existing files are writable" \
+            % output_dir
+    # Ideally we'd remove the files to avoid confusing "make", but if they
+    # failed to open we probably won't be able to remove them either.
+    sys.exit(1)
+
+print "Generating %s" % (asm_fp.name)
+
+file_header = """/*
+ * This file was generated automatically by gen-mterp.py for '%s'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+""" % (target_arch)
+
+asm_fp.write(file_header)
+
+#
+# Process the config file.
+#
+failed = False
+try:
+    for line in config_fp:
+        line = line.strip()         # remove CRLF, leading spaces
+        tokens = line.split(' ')    # tokenize
+        #print "%d: %s" % (len(tokens), tokens)
+        if len(tokens[0]) == 0:
+            #print "  blank"
+            pass
+        elif tokens[0][0] == '#':
+            #print "  comment"
+            pass
+        else:
+            if tokens[0] == "handler-size":
+                setHandlerSize(tokens)
+            elif tokens[0] == "import":
+                importFile(tokens)
+            elif tokens[0] == "asm-stub":
+                setAsmStub(tokens)
+            elif tokens[0] == "asm-alt-stub":
+                setAsmAltStub(tokens)
+            elif tokens[0] == "op-start":
+                opStart(tokens)
+            elif tokens[0] == "op-end":
+                opEnd(tokens)
+            elif tokens[0] == "alt":
+                altEntry(tokens)
+            elif tokens[0] == "op":
+                opEntry(tokens)
+            elif tokens[0] == "handler-style":
+                setHandlerStyle(tokens)
+            elif tokens[0] == "alt-ops":
+                genaltop(tokens)
+            elif tokens[0] == "split-ops":
+                splitops = True
+            elif tokens[0] == "fallback-stub":
+               setFallbackStub(tokens)
+            else:
+                raise DataParseError, "unrecognized command '%s'" % tokens[0]
+            if style == None:
+                print "tokens[0] = %s" % tokens[0]
+                raise DataParseError, "handler-style must be first command"
+except DataParseError, err:
+    print "Failed: " + str(err)
+    # TODO: remove output files so "make" doesn't get confused
+    failed = True
+    asm_fp.close()
+    asm_fp = None
+
+config_fp.close()
+
+#
+# Done!
+#
+if asm_fp:
+    asm_fp.close()
+
+sys.exit(failed)
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
new file mode 100644
index 0000000..060fe76
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -0,0 +1,611 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Mterp entry point and support functions.
+ */
+#include "interpreter/interpreter_common.h"
+#include "entrypoints/entrypoint_utils-inl.h"
+#include "mterp.h"
+
+namespace art {
+namespace interpreter {
+/*
+ * Verify some constants used by the mterp interpreter.
+ */
+void CheckMterpAsmConstants() {
+  /*
+   * If we're using computed goto instruction transitions, make sure
+   * none of the handlers overflows the 128-byte limit.  This won't tell
+   * which one did, but if any one is too big the total size will
+   * overflow.
+   */
+  const int width = 128;
+  int interp_size = (uintptr_t) artMterpAsmInstructionEnd -
+                    (uintptr_t) artMterpAsmInstructionStart;
+  if ((interp_size == 0) || (interp_size != (art::kNumPackedOpcodes * width))) {
+      LOG(art::FATAL) << "ERROR: unexpected asm interp size " << interp_size
+                      << "(did an instruction handler exceed " << width << " bytes?)";
+  }
+}
+
+void InitMterpTls(Thread* self) {
+  self->SetMterpDefaultIBase(artMterpAsmInstructionStart);
+  self->SetMterpAltIBase(artMterpAsmAltInstructionStart);
+  self->SetMterpCurrentIBase(artMterpAsmInstructionStart);
+}
+
+/*
+ * Find the matching case.  Returns the offset to the handler instructions.
+ *
+ * Returns 3 if we don't find a match (it's the size of the sparse-switch
+ * instruction).
+ */
+extern "C" int32_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
+  const int kInstrLen = 3;
+  uint16_t size;
+  const int32_t* keys;
+  const int32_t* entries;
+
+  /*
+   * Sparse switch data format:
+   *  ushort ident = 0x0200   magic value
+   *  ushort size             number of entries in the table; > 0
+   *  int keys[size]          keys, sorted low-to-high; 32-bit aligned
+   *  int targets[size]       branch targets, relative to switch opcode
+   *
+   * Total size is (2+size*4) 16-bit code units.
+   */
+
+  uint16_t signature = *switchData++;
+  DCHECK_EQ(signature, static_cast<uint16_t>(art::Instruction::kSparseSwitchSignature));
+
+  size = *switchData++;
+
+  /* The keys are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  keys = reinterpret_cast<const int32_t*>(switchData);
+
+  /* The entries are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  entries = keys + size;
+
+  /*
+   * Binary-search through the array of keys, which are guaranteed to
+   * be sorted low-to-high.
+   */
+  int lo = 0;
+  int hi = size - 1;
+  while (lo <= hi) {
+    int mid = (lo + hi) >> 1;
+
+    int32_t foundVal = keys[mid];
+    if (testVal < foundVal) {
+      hi = mid - 1;
+    } else if (testVal > foundVal) {
+      lo = mid + 1;
+    } else {
+      return entries[mid];
+    }
+  }
+  return kInstrLen;
+}
+
+extern "C" int32_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
+  const int kInstrLen = 3;
+
+  /*
+   * Packed switch data format:
+   *  ushort ident = 0x0100   magic value
+   *  ushort size             number of entries in the table
+   *  int first_key           first (and lowest) switch case value
+   *  int targets[size]       branch targets, relative to switch opcode
+   *
+   * Total size is (4+size*2) 16-bit code units.
+   */
+  uint16_t signature = *switchData++;
+  DCHECK_EQ(signature, static_cast<uint16_t>(art::Instruction::kPackedSwitchSignature));
+
+  uint16_t size = *switchData++;
+
+  int32_t firstKey = *switchData++;
+  firstKey |= (*switchData++) << 16;
+
+  int index = testVal - firstKey;
+  if (index < 0 || index >= size) {
+    return kInstrLen;
+  }
+
+  /*
+   * The entries are guaranteed to be aligned on a 32-bit boundary;
+   * we can treat them as a native int array.
+   */
+  const int32_t* entries = reinterpret_cast<const int32_t*>(switchData);
+  return entries[index];
+}
+
+
+extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
+                                   uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kVirtual, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeSuper(Thread* self, ShadowFrame* shadow_frame,
+                                 uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kSuper, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeInterface(Thread* self, ShadowFrame* shadow_frame,
+                                     uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kInterface, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeDirect(Thread* self, ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kDirect, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeStatic(Thread* self, ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kStatic, false, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeVirtualRange(Thread* self, ShadowFrame* shadow_frame,
+                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kVirtual, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeSuperRange(Thread* self, ShadowFrame* shadow_frame,
+                                      uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kSuper, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeInterfaceRange(Thread* self, ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kInterface, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeDirectRange(Thread* self, ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kDirect, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeStaticRange(Thread* self, ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvoke<kStatic, true, false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeVirtualQuick(Thread* self, ShadowFrame* shadow_frame,
+                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvokeVirtualQuick<false>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" bool MterpInvokeVirtualQuickRange(Thread* self, ShadowFrame* shadow_frame,
+                                             uint16_t* dex_pc_ptr,  uint16_t inst_data )
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JValue* result_register = shadow_frame->GetResultRegister();
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoInvokeVirtualQuick<true>(
+      self, *shadow_frame, inst, inst_data, result_register);
+}
+
+extern "C" void MterpThreadFenceForConstructor() {
+  QuasiAtomic::ThreadFenceForConstructor();
+}
+
+extern "C" bool MterpConstString(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
+                                 Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  String* s = ResolveString(self, *shadow_frame,  index);
+  if (UNLIKELY(s == nullptr)) {
+    return true;
+  }
+  shadow_frame->SetVRegReference(tgt_vreg, s);
+  return false;
+}
+
+extern "C" bool MterpConstClass(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
+                                Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return true;
+  }
+  shadow_frame->SetVRegReference(tgt_vreg, c);
+  return false;
+}
+
+extern "C" bool MterpCheckCast(uint32_t index, Object* obj, art::ArtMethod* method,
+                               Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return true;
+  }
+  if (UNLIKELY(obj != nullptr && !obj->InstanceOf(c))) {
+    ThrowClassCastException(c, obj->GetClass());
+    return true;
+  }
+  return false;
+}
+
+extern "C" bool MterpInstanceOf(uint32_t index, Object* obj, art::ArtMethod* method,
+                                Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  if (UNLIKELY(c == nullptr)) {
+    return false;  // Caller will check for pending exception.  Return value unimportant.
+  }
+  return (obj != nullptr) && obj->InstanceOf(c);
+}
+
+extern "C" bool MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return FillArrayData(obj, payload);
+}
+
+extern "C" bool MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  Object* obj = nullptr;
+  Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame->GetMethod(),
+                                    self, false, false);
+  if (LIKELY(c != nullptr)) {
+    if (UNLIKELY(c->IsStringClass())) {
+      gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+      mirror::SetStringCountVisitor visitor(0);
+      obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+    } else {
+      obj = AllocObjectFromCode<false, true>(
+        inst->VRegB_21c(), shadow_frame->GetMethod(), self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
+    }
+  }
+  if (UNLIKELY(obj == nullptr)) {
+    return false;
+  }
+  obj->GetClass()->AssertInitializedOrInitializingInThread(self);
+  shadow_frame->SetVRegReference(inst->VRegA_21c(inst_data), obj);
+  return true;
+}
+
+extern "C" bool MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFieldPut<StaticObjectWrite, Primitive::kPrimNot, false, false>
+      (self, *shadow_frame, inst, inst_data);
+}
+
+extern "C" bool MterpIputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, false, false>
+      (self, *shadow_frame, inst, inst_data);
+}
+
+extern "C" bool MterpIputObjectQuick(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                     uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoIPutQuick<Primitive::kPrimNot, false>(*shadow_frame, inst, inst_data);
+}
+
+extern "C" bool MterpAputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                uint32_t inst_data)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
+  if (UNLIKELY(a == nullptr)) {
+    return false;
+  }
+  int32_t index = shadow_frame->GetVReg(inst->VRegC_23x());
+  Object* val = shadow_frame->GetVRegReference(inst->VRegA_23x(inst_data));
+  ObjectArray<Object>* array = a->AsObjectArray<Object>();
+  if (array->CheckIsValidIndex(index) && array->CheckAssignable(val)) {
+    array->SetWithoutChecks<false>(index, val);
+    return true;
+  }
+  return false;
+}
+
+extern "C" bool MterpFilledNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                    Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFilledNewArray<false, false, false>(inst, *shadow_frame, self,
+                                               shadow_frame->GetResultRegister());
+}
+
+extern "C" bool MterpFilledNewArrayRange(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                                         Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  return DoFilledNewArray<true, false, false>(inst, *shadow_frame, self,
+                                              shadow_frame->GetResultRegister());
+}
+
+extern "C" bool MterpNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+                              uint32_t inst_data, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
+  Object* obj = AllocArrayFromCode<false, true>(
+      inst->VRegC_22c(), length, shadow_frame->GetMethod(), self,
+      Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  if (UNLIKELY(obj == nullptr)) {
+      return false;
+  }
+  shadow_frame->SetVRegReference(inst->VRegA_22c(inst_data), obj);
+  return true;
+}
+
+extern "C" bool MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(self->IsExceptionPending());
+  const instrumentation::Instrumentation* const instrumentation =
+      Runtime::Current()->GetInstrumentation();
+  uint32_t found_dex_pc = FindNextInstructionFollowingException(self, *shadow_frame,
+                                                                shadow_frame->GetDexPC(),
+                                                                instrumentation);
+  if (found_dex_pc == DexFile::kDexNoIndex) {
+    return false;
+  }
+  // OK - we can deal with it.  Update and continue.
+  shadow_frame->SetDexPC(found_dex_pc);
+  return true;
+}
+
+extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {
+    self->AssertPendingException();
+  } else {
+    self->AssertNoPendingException();
+  }
+}
+
+extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "DivideByZero: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "ArrayIndex: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NegativeArraySize: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NoSuchMethod: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "ExceptionThrown: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "NullObject: " << inst->Opcode(inst_data);
+}
+
+extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  LOG(INFO) << "Fallback: " << inst->Opcode(inst_data) << ", Suspend Pending?: "
+            << self->IsExceptionPending();
+}
+
+extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self);
+  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
+  uint16_t inst_data = inst->Fetch16(0);
+  if (flags & kCheckpointRequest) {
+    LOG(INFO) << "Checkpoint fallback: " << inst->Opcode(inst_data);
+  } else if (flags & kSuspendRequest) {
+    LOG(INFO) << "Suspend fallback: " << inst->Opcode(inst_data);
+  }
+}
+
+extern "C" void MterpSuspendCheck(Thread* self)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  self->AllowThreadSuspension();
+}
+
+extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
+                                               uint64_t* new_value, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
+                                          sizeof(int64_t));
+  if (LIKELY(field != nullptr)) {
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(field->GetDeclaringClass(), *new_value);
+    return 0;  // success
+  }
+  field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int64_t));
+  if (LIKELY(field != nullptr)) {
+    // Compiled code can't use transactional mode.
+    field->Set64<false>(field->GetDeclaringClass(), *new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
+                                        ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int8_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    if (type == Primitive::kPrimBoolean) {
+      field->SetBoolean<false>(obj, new_value);
+    } else {
+      DCHECK_EQ(Primitive::kPrimByte, type);
+      field->SetByte<false>(obj, new_value);
+    }
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet16InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
+                                        ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int16_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
+    if (type == Primitive::kPrimChar) {
+      field->SetChar<false>(obj, new_value);
+    } else {
+      DCHECK_EQ(Primitive::kPrimShort, type);
+      field->SetShort<false>(obj, new_value);
+    }
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet32InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
+                                         uint32_t new_value, ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int32_t));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    field->Set32<false>(obj, new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSet64InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
+                                         uint64_t* new_value, ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
+                                          sizeof(int64_t));
+  if (LIKELY(field != nullptr  && obj != nullptr)) {
+    field->Set64<false>(obj, *new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
+                                         mirror::Object* new_value, ArtMethod* referrer)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
+                                          sizeof(mirror::HeapReference<mirror::Object>));
+  if (LIKELY(field != nullptr && obj != nullptr)) {
+    field->SetObj<false>(obj, new_value);
+    return 0;  // success
+  }
+  return -1;  // failure
+}
+
+extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (UNLIKELY(arr == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  }
+  ObjectArray<Object>* array = arr->AsObjectArray<Object>();
+  if (LIKELY(array->CheckIsValidIndex(index))) {
+    return array->GetWithoutChecks(index);
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
new file mode 100644
index 0000000..90d21e9
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
+#define ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
+
+/*
+ * Mterp assembly handler bases
+ */
+extern "C" void* artMterpAsmInstructionStart[];
+extern "C" void* artMterpAsmInstructionEnd[];
+extern "C" void* artMterpAsmAltInstructionStart[];
+extern "C" void* artMterpAsmAltInstructionEnd[];
+
+namespace art {
+namespace interpreter {
+
+void InitMterpTls(Thread* self);
+void CheckMterpAsmConstants();
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_MTERP_MTERP_H_
diff --git a/runtime/interpreter/mterp/mterp_stub.cc b/runtime/interpreter/mterp/mterp_stub.cc
new file mode 100644
index 0000000..7e7337e
--- /dev/null
+++ b/runtime/interpreter/mterp/mterp_stub.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "../interpreter_common.h"
+
+/*
+ * Stub definitions for targets without mterp implementations.
+ */
+
+namespace art {
+namespace interpreter {
+/*
+ * Call this during initialization to verify that the values in asm-constants.h
+ * are still correct.
+ */
+void CheckMterpAsmConstants() {
+  // Dummy version when mterp not implemented.
+}
+
+void InitMterpTls(Thread* self) {
+  self->SetMterpDefaultIBase(nullptr);
+  self->SetMterpCurrentIBase(nullptr);
+  self->SetMterpAltIBase(nullptr);
+}
+
+/*
+ * The platform-specific implementation must provide this.
+ */
+extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,
+                                 ShadowFrame* shadow_frame, JValue* result_register)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  UNUSED(self); UNUSED(shadow_frame); UNUSED(code_item); UNUSED(result_register);
+  UNIMPLEMENTED(art::FATAL);
+  return false;
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
new file mode 100644
index 0000000..33036e6
--- /dev/null
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -0,0 +1,12245 @@
+/*
+ * This file was generated automatically by gen-mterp.py for 'arm'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: arm/header.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  Art assembly interpreter notes:
+
+  First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't
+  handle invoke, allows higher-level code to create frame & shadow frame.
+
+  Once that's working, support direct entry code & eliminate shadow frame (and
+  excess locals allocation.
+
+  Some (hopefully) temporary ugliness.  We'll treat rFP as pointing to the
+  base of the vreg array within the shadow frame.  Access the other fields,
+  dex_pc_, method_ and number_of_vregs_ via negative offsets.  For now, we'll continue
+  the shadow frame mechanism of double-storing object references - via rFP &
+  number_of_vregs_.
+
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.  If VFP
+is present, registers s16-s31 (a/k/a d8-d15, a/k/a q4-q7) must be preserved,
+s0-s15 (d0-d7, q0-a3) do not need to be.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+Mterp and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rSELF     self (Thread) pointer
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+
+/* During bringup, we'll use the shadow frame model instead of rFP */
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rSELF   r6
+#define rINST   r7
+#define rIBASE  r8
+#define rREFS   r11
+
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+/*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
+ * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
+ * be done *before* something throws.
+ *
+ * It's okay to do this more than once.
+ *
+ * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped
+ * dex byte codes.  However, the rest of the runtime expects dex pc to be an instruction
+ * offset into the code_items_[] array.  For effiency, we will "export" the
+ * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC
+ * to convert to a dex pc when needed.
+ */
+.macro EXPORT_PC
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+.endm
+
+.macro EXPORT_DEX_PC tmp
+    ldr  \tmp, [rFP, #OFF_FP_CODE_ITEM]
+    str  rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    add  \tmp, #CODEITEM_INSNS_OFFSET
+    sub  \tmp, rPC, \tmp
+    asr  \tmp, #1
+    str  \tmp, [rFP, #OFF_FP_DEX_PC]
+.endm
+
+/*
+ * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
+ */
+.macro FETCH_INST
+    ldrh    rINST, [rPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset.  Advances rPC
+ * to point to the next instruction.  "_count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss.  (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]!
+.endm
+
+/*
+ * The operation performed here is similar to FETCH_ADVANCE_INST, except the
+ * src and dest registers are parameterized (not hard-wired to rPC and rINST).
+ */
+.macro PREFETCH_ADVANCE_INST dreg, sreg, count
+    ldrh    \dreg, [\sreg, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
+ * rINST ahead of possible exception point.  Be sure to manually advance rPC
+ * later.
+ */
+.macro PREFETCH_INST count
+    ldrh    rINST, [rPC, #((\count)*2)]
+.endm
+
+/* Advance rPC by some number of code units. */
+.macro ADVANCE count
+  add  rPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by _reg.  Updates
+ * rPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ *
+ * We want to write "ldrh rINST, [rPC, _reg, lsl #1]!", but some of the
+ * bits that hold the shift distance are used for the half/byte/sign flags.
+ * In some cases we can pre-double _reg for free, so we require a byte offset
+ * here.
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    ldrh    rINST, [rPC, \reg]!
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC.  The
+ * "_count" value is in 16-bit code units.  Does not advance rPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+    ldrh    \reg, [rPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+    ldrsh   \reg, [rPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC.  Pass in the same
+ * "_count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+    ldrb     \reg, [rPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+    and     \reg, rINST, #255
+.endm
+
+/*
+ * Put the prefetched instruction's opcode field into the specified register.
+ */
+.macro GET_PREFETCHED_OPCODE oreg, ireg
+    and     \oreg, \ireg, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg.  Because this only jumps within the
+ * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
+ */
+.macro GOTO_OPCODE reg
+    add     pc, rIBASE, \reg, lsl #7
+.endm
+.macro GOTO_OPCODE_BASE base,reg
+    add     pc, \base, \reg, lsl #7
+.endm
+
+/*
+ * Get/set the 32-bit value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+    ldr     \reg, [rFP, \vreg, lsl #2]
+.endm
+.macro SET_VREG reg, vreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    mov     \reg, #0
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg, tmpreg
+    str     \reg, [rFP, \vreg, lsl #2]
+    str     \reg, [rREFS, \vreg, lsl #2]
+.endm
+
+/*
+ * Convert a virtual register index into an address.
+ */
+.macro VREG_INDEX_TO_ADDR reg, vreg
+    add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+.endm
+
+/*
+ * Refresh handler table.
+ */
+.macro REFRESH_IBASE
+  ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+.endm
+
+/* File: arm/entry.S */
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Interpreter entry point.
+ */
+
+    .text
+    .align  2
+    .global ExecuteMterpImpl
+    .type   ExecuteMterpImpl, %function
+
+/*
+ * On entry:
+ *  r0  Thread* self/
+ *  r1  code_item
+ *  r2  ShadowFrame
+ *  r3  JValue* result_register
+ *
+ */
+
+ExecuteMterpImpl:
+    .fnstart
+    .save {r4-r10,fp,lr}
+    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
+    .pad    #4
+    sub     sp, sp, #4                  @ align 64
+
+    /* Remember the return register */
+    str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
+
+    /* Remember the code_item */
+    str     r1, [r2, #SHADOWFRAME_CODE_ITEM_OFFSET]
+
+    /* set up "named" registers */
+    mov     rSELF, r0
+    ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
+    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
+    add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
+    add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
+    EXPORT_PC
+
+    /* Starting ibase */
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+
+    /* start executing the instruction at rPC */
+    FETCH_INST                          @ load rINST from rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* NOTE: no fallthrough */
+
+
+    .global artMterpAsmInstructionStart
+    .type   artMterpAsmInstructionStart, %function
+artMterpAsmInstructionStart = .L_op_nop
+    .text
+
+/* ------------------------------ */
+    .balign 128
+.L_op_nop: /* 0x00 */
+/* File: arm/op_nop.S */
+    FETCH_ADVANCE_INST 1                @ advance to next instr, load rINST
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    GOTO_OPCODE ip                      @ execute it
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move: /* 0x01 */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_from16: /* 0x02 */
+/* File: arm/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_16: /* 0x03 */
+/* File: arm/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide: /* 0x04 */
+/* File: arm/op_move_wide.S */
+    /* move-wide vA, vB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_from16: /* 0x05 */
+/* File: arm/op_move_wide_from16.S */
+    /* move-wide/from16 vAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 1                         @ r3<- BBBB
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_wide_16: /* 0x06 */
+/* File: arm/op_move_wide_16.S */
+    /* move-wide/16 vAAAA, vBBBB */
+    /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+    FETCH r3, 2                         @ r3<- BBBB
+    FETCH r2, 1                         @ r2<- AAAA
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object: /* 0x07 */
+/* File: arm/op_move_object.S */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_from16: /* 0x08 */
+/* File: arm/op_move_object_from16.S */
+/* File: arm/op_move_from16.S */
+    /* for: move/from16, move-object/from16 */
+    /* op vAA, vBBBB */
+    FETCH r1, 1                         @ r1<- BBBB
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[AA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_object_16: /* 0x09 */
+/* File: arm/op_move_object_16.S */
+/* File: arm/op_move_16.S */
+    /* for: move/16, move-object/16 */
+    /* op vAAAA, vBBBB */
+    FETCH r1, 2                         @ r1<- BBBB
+    FETCH r0, 1                         @ r0<- AAAA
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[BBBB]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r2, r0              @ fp[AAAA]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[AAAA]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result: /* 0x0a */
+/* File: arm/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_wide: /* 0x0b */
+/* File: arm/op_move_result_wide.S */
+    /* move-result-wide vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_result_object: /* 0x0c */
+/* File: arm/op_move_result_object.S */
+/* File: arm/op_move_result.S */
+    /* for: move-result, move-result-object */
+    /* op vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r0, [rFP, #OFF_FP_RESULT_REGISTER]  @ get pointer to result JType.
+    ldr     r0, [r0]                    @ r0 <- result.i.
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 1
+    SET_VREG_OBJECT r0, r2, r1          @ fp[AA]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_move_exception: /* 0x0d */
+/* File: arm/op_move_exception.S */
+    /* move-exception vAA */
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov     r1, #0                      @ r1<- 0
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    SET_VREG_OBJECT r3, r2              @ fp[AA]<- exception obj
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str     r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ clear exception
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void: /* 0x0e */
+/* File: arm/op_return_void.S */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return: /* 0x0f */
+/* File: arm/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_wide: /* 0x10 */
+/* File: arm/op_return_wide.S */
+    /*
+     * Return a 64-bit value.
+     */
+    /* return-wide vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    b       MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_object: /* 0x11 */
+/* File: arm/op_return_object.S */
+/* File: arm/op_return.S */
+    /*
+     * Return a 32-bit value.
+     *
+     * for: return, return-object
+     */
+    /* op vAA */
+    .extern MterpThreadFenceForConstructor
+    bl      MterpThreadFenceForConstructor
+    mov     r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA
+    mov     r1, #0
+    b       MterpReturn
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_4: /* 0x12 */
+/* File: arm/op_const_4.S */
+    /* const/4 vA, #+B */
+    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    SET_VREG r1, r0                     @ fp[A]<- r1
+    GOTO_OPCODE ip                      @ execute next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_16: /* 0x13 */
+/* File: arm/op_const_16.S */
+    /* const/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const: /* 0x14 */
+/* File: arm/op_const.S */
+    /* const vAA, #+BBBBbbbb */
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_high16: /* 0x15 */
+/* File: arm/op_const_high16.S */
+    /* const/high16 vAA, #+BBBB0000 */
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, r0, lsl #16             @ r0<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r0, r3                     @ vAA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_16: /* 0x16 */
+/* File: arm/op_const_wide_16.S */
+    /* const-wide/16 vAA, #+BBBB */
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_32: /* 0x17 */
+/* File: arm/op_const_wide_32.S */
+    /* const-wide/32 vAA, #+BBBBbbbb */
+    FETCH r0, 1                         @ r0<- 0000bbbb (low)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    FETCH_S r2, 2                       @ r2<- ssssBBBB (high)
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    mov     r1, r0, asr #31             @ r1<- ssssssss
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide: /* 0x18 */
+/* File: arm/op_const_wide.S */
+    /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (low middle)
+    FETCH r2, 3                         @ r2<- hhhh (high middle)
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb (low word)
+    FETCH r3, 4                         @ r3<- HHHH (high)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
+    FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_wide_high16: /* 0x19 */
+/* File: arm/op_const_wide_high16.S */
+    /* const-wide/high16 vAA, #+BBBB000000000000 */
+    FETCH r1, 1                         @ r1<- 0000BBBB (zero-extended)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    mov     r0, #0                      @ r0<- 00000000
+    mov     r1, r1, lsl #16             @ r1<- BBBB0000
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string: /* 0x1a */
+/* File: arm/op_const_string.S */
+    /* const/string vAA, String@BBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2                     @ load rINST
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_string_jumbo: /* 0x1b */
+/* File: arm/op_const_string_jumbo.S */
+    /* const/string vAA, String@BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (low
+    FETCH r2, 2                         @ r2<- BBBB (high
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstString            @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 3                     @ advance rPC
+    cmp     r0, #0                      @ fail?
+    bne     MterpPossibleException      @ let reference interpreter deal with it.
+    ADVANCE 3                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_const_class: /* 0x1c */
+/* File: arm/op_const_class.S */
+    /* const/class vAA, Class@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- BBBB
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    add     r2, rFP, #OFF_FP_SHADOWFRAME
+    mov     r3, rSELF
+    bl      MterpConstClass             @ (index, tgt_reg, shadow_frame, self)
+    PREFETCH_INST 2
+    cmp     r0, #0
+    bne     MterpPossibleException
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_enter: /* 0x1d */
+/* File: arm/op_monitor_enter.S */
+    /*
+     * Synchronize on an object.
+     */
+    /* monitor-enter vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r0, r2                      @ r0<- vAA (object)
+    mov      r1, rSELF                   @ r1<- self
+    bl       artLockObjectFromCode
+    cmp      r0, #0
+    bne      MterpException
+    FETCH_ADVANCE_INST 1
+    GET_INST_OPCODE ip                   @ extract opcode from rINST
+    GOTO_OPCODE ip                       @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_monitor_exit: /* 0x1e */
+/* File: arm/op_monitor_exit.S */
+    /*
+     * Unlock an object.
+     *
+     * Exceptions that occur when unlocking a monitor need to appear as
+     * if they happened at the following instruction.  See the Dalvik
+     * instruction spec.
+     */
+    /* monitor-exit vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8          @ r2<- AA
+    GET_VREG r0, r2                     @ r0<- vAA (object)
+    mov      r1, rSELF                  @ r0<- self
+    bl       artUnlockObjectFromCode    @ r0<- success for unlock(self, obj)
+    cmp     r0, #0                      @ failed?
+    bne     MterpException
+    FETCH_ADVANCE_INST 1                @ before throw: advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_check_cast: /* 0x1f */
+/* File: arm/op_check_cast.S */
+    /*
+     * Check to see if a cast from one class to another is allowed.
+     */
+    /* check-cast vAA, class@BBBB */
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- BBBB
+    mov      r1, rINST, lsr #8          @ r1<- AA
+    GET_VREG r1, r1                     @ r1<- object
+    ldr      r2, [rFP, #OFF_FP_METHOD]  @ r2<- method
+    mov      r3, rSELF                  @ r3<- self
+    bl       MterpCheckCast             @ (index, obj, method, self)
+    PREFETCH_INST 2
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_instance_of: /* 0x20 */
+/* File: arm/op_instance_of.S */
+    /*
+     * Check to see if an object reference is an instance of a class.
+     *
+     * Most common situation is a non-null object, being compared against
+     * an already-resolved class.
+     */
+    /* instance-of vA, vB, class@CCCC */
+    EXPORT_PC
+    FETCH     r0, 1                     @ r0<- CCCC
+    mov       r1, rINST, lsr #12        @ r1<- B
+    GET_VREG  r1, r1                    @ r1<- vB (object)
+    ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
+    mov       r3, rSELF                 @ r3<- self
+    mov       r9, rINST, lsr #8         @ r9<- A+
+    and       r9, r9, #15               @ r9<- A
+    bl        MterpInstanceOf           @ (index, obj, method, self)
+    ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp       r1, #0                    @ exception pending?
+    bne       MterpException
+    ADVANCE 2                           @ advance rPC
+    SET_VREG r0, r9                     @ vA<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_array_length: /* 0x21 */
+/* File: arm/op_array_length.S */
+    /*
+     * Return the length of an array.
+     */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    GET_VREG r0, r1                     @ r0<- vB (object ref)
+    cmp     r0, #0                      @ is object null?
+    beq     common_errNullObject        @ yup, fail
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- array length
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r3, r2                     @ vB<- length
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_instance: /* 0x22 */
+/* File: arm/op_new_instance.S */
+    /*
+     * Create a new instance of a class.
+     */
+    /* new-instance vAA, class@BBBB */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rSELF
+    mov     r2, rINST
+    bl      MterpNewInstance           @ (shadow_frame, self, inst_data)
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2               @ advance rPC, load rINST
+    GET_INST_OPCODE ip                 @ extract opcode from rINST
+    GOTO_OPCODE ip                     @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_new_array: /* 0x23 */
+/* File: arm/op_new_array.S */
+    /*
+     * Allocate an array of objects, specified with the array class
+     * and a count.
+     *
+     * The verifier guarantees that this is an array class, so we don't
+     * check for it here.
+     */
+    /* new-array vA, vB, class@CCCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array: /* 0x24 */
+/* File: arm/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArray
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      MterpFilledNewArray
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_filled_new_array_range: /* 0x25 */
+/* File: arm/op_filled_new_array_range.S */
+/* File: arm/op_filled_new_array.S */
+    /*
+     * Create a new array with elements filled from registers.
+     *
+     * for: filled-new-array, filled-new-array/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+    .extern MterpFilledNewArrayRange
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rSELF
+    bl      MterpFilledNewArrayRange
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_fill_array_data: /* 0x26 */
+/* File: arm/op_fill_array_data.S */
+    /* fill-array-data vAA, +BBBBBBBB */
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r1, r0, r1, lsl #16         @ r1<- BBBBbbbb
+    GET_VREG r0, r3                     @ r0<- vAA (array object)
+    add     r1, rPC, r1, lsl #1         @ r1<- PC + BBBBbbbb*2 (array data off.)
+    bl      MterpFillArrayData          @ (obj, payload)
+    cmp     r0, #0                      @ 0 means an exception is thrown
+    beq     MterpPossibleException      @ exception?
+    FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_throw: /* 0x27 */
+/* File: arm/op_throw.S */
+    /*
+     * Throw an exception object in the current thread.
+     */
+    /* throw vAA */
+    EXPORT_PC
+    mov      r2, rINST, lsr #8           @ r2<- AA
+    GET_VREG r1, r2                      @ r1<- vAA (exception object)
+    cmp      r1, #0                      @ null object?
+    beq      common_errNullObject        @ yes, throw an NPE instead
+    str      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ thread->exception<- obj
+    b        MterpException
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto: /* 0x28 */
+/* File: arm/op_goto.S */
+    /*
+     * Unconditional branch, 8-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto +AA */
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+       @ If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_16: /* 0x29 */
+/* File: arm/op_goto_16.S */
+    /*
+     * Unconditional branch, 16-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     */
+    /* goto/16 +AAAA */
+#if MTERP_SUSPEND
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_goto_32: /* 0x2a */
+/* File: arm/op_goto_32.S */
+    /*
+     * Unconditional branch, 32-bit offset.
+     *
+     * The branch distance is a signed code-unit offset, which we need to
+     * double to get a byte offset.
+     *
+     * Unlike most opcodes, this one is allowed to branch to itself, so
+     * our "backward branch" test must be "<=0" instead of "<0".  Because
+     * we need the V bit set, we'll use an adds to convert from Dalvik
+     * offset to byte offset.
+     */
+    /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_packed_switch: /* 0x2b */
+/* File: arm/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sparse_switch: /* 0x2c */
+/* File: arm/op_sparse_switch.S */
+/* File: arm/op_packed_switch.S */
+    /*
+     * Handle a packed-switch or sparse-switch instruction.  In both cases
+     * we decode it and hand it off to a helper function.
+     *
+     * We don't really expect backward branches in a switch statement, but
+     * they're perfectly legal, so we check for them here.
+     *
+     * for: packed-switch, sparse-switch
+     */
+    /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_float: /* 0x2d */
+/* File: arm/op_cmpl_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_float: /* 0x2e */
+/* File: arm/op_cmpg_float.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    flds    s0, [r2]                    @ s0<- vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpl_double: /* 0x2f */
+/* File: arm/op_cmpl_double.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mvn     r0, #0                      @ r0<- -1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r1<- 1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmpg_double: /* 0x30 */
+/* File: arm/op_cmpg_double.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     */
+    /* op vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, #1                      @ r0<- 1 (default)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_cmp_long: /* 0x31 */
+/* File: arm/op_cmp_long.S */
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .Lop_cmp_long_less            @ signed compare on high part
+    bgt     .Lop_cmp_long_greater
+    subs    r1, r0, r2                  @ r1<- r0 - r2
+    bhi     .Lop_cmp_long_greater         @ unsigned compare on low part
+    bne     .Lop_cmp_long_less
+    b       .Lop_cmp_long_finish          @ equal; r1 already holds 0
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eq: /* 0x32 */
+/* File: arm/op_if_eq.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ne: /* 0x33 */
+/* File: arm/op_if_ne.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lt: /* 0x34 */
+/* File: arm/op_if_lt.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ge: /* 0x35 */
+/* File: arm/op_if_ge.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gt: /* 0x36 */
+/* File: arm/op_if_gt.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_le: /* 0x37 */
+/* File: arm/op_if_le.S */
+/* File: arm/bincmp.S */
+    /*
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+     */
+    /* if-cmp vA, vB, +CCCC */
+#if MTERP_SUSPEND
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    GET_VREG r3, r1                     @ r3<- vB
+    GET_VREG r2, r0                     @ r2<- vA
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, r3                      @ compare (vA, vB)
+    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_eqz: /* 0x38 */
+/* File: arm/op_if_eqz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movne r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movne r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_nez: /* 0x39 */
+/* File: arm/op_if_nez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    moveq r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    moveq r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_ltz: /* 0x3a */
+/* File: arm/op_if_ltz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movge r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movge r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gez: /* 0x3b */
+/* File: arm/op_if_gez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movlt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movlt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_gtz: /* 0x3c */
+/* File: arm/op_if_gtz.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movle r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movle r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_if_lez: /* 0x3d */
+/* File: arm/op_if_lez.S */
+/* File: arm/zcmp.S */
+    /*
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
+     *
+     * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+     */
+    /* if-cmp vAA, +BBBB */
+#if MTERP_SUSPEND
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    cmp     r2, #0                      @ compare (vA, 0)
+    movgt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    GET_VREG r2, r0                     @ r2<- vAA
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movgt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3e: /* 0x3e */
+/* File: arm/op_unused_3e.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_3f: /* 0x3f */
+/* File: arm/op_unused_3f.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_40: /* 0x40 */
+/* File: arm/op_unused_40.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_41: /* 0x41 */
+/* File: arm/op_unused_41.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_42: /* 0x42 */
+/* File: arm/op_unused_42.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_43: /* 0x43 */
+/* File: arm/op_unused_43.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget: /* 0x44 */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #2     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldr   r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r9              @ vAA<- r2
+    .else
+    SET_VREG r2, r9                     @ vAA<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_wide: /* 0x45 */
+/* File: arm/op_aget_wide.S */
+    /*
+     * Array get, 64 bits.  vAA <- vBB[vCC].
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use LDRD.
+     */
+    /* aget-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_object: /* 0x46 */
+/* File: arm/op_aget_object.S */
+    /*
+     * Array object get.  vAA <- vBB[vCC].
+     *
+     * for: aget-object
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    EXPORT_PC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    bl       artAGetObjectFromMterp     @ (array, index)
+    ldr      r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    PREFETCH_INST 2
+    cmp      r1, #0
+    bne      MterpException
+    SET_VREG_OBJECT r0, r9
+    ADVANCE 2
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_boolean: /* 0x47 */
+/* File: arm/op_aget_boolean.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrb   r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r9              @ vAA<- r2
+    .else
+    SET_VREG r2, r9                     @ vAA<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_byte: /* 0x48 */
+/* File: arm/op_aget_byte.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrsb   r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r9              @ vAA<- r2
+    .else
+    SET_VREG r2, r9                     @ vAA<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_char: /* 0x49 */
+/* File: arm/op_aget_char.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrh   r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r9              @ vAA<- r2
+    .else
+    SET_VREG r2, r9                     @ vAA<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aget_short: /* 0x4a */
+/* File: arm/op_aget_short.S */
+/* File: arm/op_aget.S */
+    /*
+     * Array get, 32 bits or less.  vAA <- vBB[vCC].
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     *
+     * NOTE: assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldrsh   r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r9              @ vAA<- r2
+    .else
+    SET_VREG r2, r9                     @ vAA<- r2
+    .endif
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput: /* 0x4b */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #2     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    str  r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_wide: /* 0x4c */
+/* File: arm/op_aput_wide.S */
+    /*
+     * Array put, 64 bits.  vBB[vCC] <- vAA.
+     *
+     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
+     */
+    /* aput-wide vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_object: /* 0x4d */
+/* File: arm/op_aput_object.S */
+    /*
+     * Store an object into an array.  vBB[vCC] <- vAA.
+     */
+    /* op vAA, vBB, vCC */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpAputObject
+    cmp     r0, #0
+    beq     MterpPossibleException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_boolean: /* 0x4e */
+/* File: arm/op_aput_boolean.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strb  r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_byte: /* 0x4f */
+/* File: arm/op_aput_byte.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #0     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strb  r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_char: /* 0x50 */
+/* File: arm/op_aput_char.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strh  r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_aput_short: /* 0x51 */
+/* File: arm/op_aput_short.S */
+/* File: arm/op_aput.S */
+    /*
+     * Array put, 32 bits or less.  vBB[vCC] <- vAA.
+     *
+     * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
+     * instructions.  We use a pair of FETCH_Bs instead.
+     *
+     * for: aput, aput-boolean, aput-byte, aput-char, aput-short
+     *
+     * NOTE: this assumes data offset for arrays is the same for all non-wide types.
+     * If this changes, specialize.
+     */
+    /* op vAA, vBB, vCC */
+    FETCH_B r2, 1, 0                    @ r2<- BB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    FETCH_B r3, 1, 1                    @ r3<- CC
+    GET_VREG r0, r2                     @ r0<- vBB (array object)
+    GET_VREG r1, r3                     @ r1<- vCC (requested index)
+    cmp     r0, #0                      @ null array object?
+    beq     common_errNullObject        @ yes, bail
+    ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]     @ r3<- arrayObj->length
+    add     r0, r0, r1, lsl #1     @ r0<- arrayObj + index*width
+    cmp     r1, r3                      @ compare unsigned index, length
+    bcs     common_errArrayIndex        @ index >= length, bail
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_VREG r2, r9                     @ r2<- vAA
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    strh  r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ vBB[vCC]<- r2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget: /* 0x52 */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet32InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide: /* 0x53 */
+/* File: arm/op_iget_wide.S */
+    /*
+     * 64-bit instance field get.
+     *
+     * for: iget-wide
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGet64InstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpException                @ bail out
+    add     r3, rFP, r2, lsl #2            @ r3<- &fp[A]
+    stmia   r3, {r0-r1}                    @ fp[A]<- r0/r1
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object: /* 0x54 */
+/* File: arm/op_iget_object.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetObjInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 1
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean: /* 0x55 */
+/* File: arm/op_iget_boolean.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetBooleanInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte: /* 0x56 */
+/* File: arm/op_iget_byte.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetByteInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char: /* 0x57 */
+/* File: arm/op_iget_char.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetCharInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short: /* 0x58 */
+/* File: arm/op_iget_short.S */
+/* File: arm/op_iget.S */
+    /*
+     * General instance field get.
+     *
+     * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
+     */
+    EXPORT_PC
+    FETCH    r0, 1                         @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12            @ r1<- B
+    GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
+    ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
+    mov      r3, rSELF                     @ r3<- self
+    bl       artGetShortInstanceFromCode
+    ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx     r2, rINST, #8, #4             @ r2<- A
+    PREFETCH_INST 2
+    cmp      r3, #0
+    bne      MterpPossibleException        @ bail out
+    .if 0
+    SET_VREG_OBJECT r0, r2                 @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                        @ fp[A]<- r0
+    .endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                     @ extract opcode from rINST
+    GOTO_OPCODE ip                         @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput: /* 0x59 */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet32InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet32InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide: /* 0x5a */
+/* File: arm/op_iput_wide.S */
+    /* iput-wide vA, vB, field@CCCC */
+    .extern artSet64InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet64InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object: /* 0x5b */
+/* File: arm/op_iput_object.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpIputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean: /* 0x5c */
+/* File: arm/op_iput_boolean.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte: /* 0x5d */
+/* File: arm/op_iput_byte.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet8InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet8InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char: /* 0x5e */
+/* File: arm/op_iput_char.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short: /* 0x5f */
+/* File: arm/op_iput_short.S */
+/* File: arm/op_iput.S */
+    /*
+     * General 32-bit instance field put.
+     *
+     * for: iput, iput-object, iput-boolean, iput-byte, iput-char, iput-short
+     */
+    /* op vA, vB, field@CCCC */
+    .extern artSet16InstanceFromMterp
+    EXPORT_PC
+    FETCH    r0, 1                      @ r0<- field ref CCCC
+    mov      r1, rINST, lsr #12         @ r1<- B
+    GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
+    ubfx     r2, rINST, #8, #4          @ r2<- A
+    GET_VREG r2, r2                     @ r2<- fp[A]
+    ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
+    PREFETCH_INST 2
+    bl       artSet16InstanceFromMterp
+    cmp      r0, #0
+    bne      MterpPossibleException
+    ADVANCE  2                          @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget: /* 0x60 */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGet32StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet32StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_wide: /* 0x61 */
+/* File: arm/op_sget_wide.S */
+    /*
+     * SGET_WIDE handler wrapper.
+     *
+     */
+    /* sget-wide vAA, field@BBBB */
+
+    .extern artGet64StaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGet64StaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r9, rINST, lsr #8             @ r9<- AA
+    add   r9, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_object: /* 0x62 */
+/* File: arm/op_sget_object.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetObjStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetObjStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 1
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_boolean: /* 0x63 */
+/* File: arm/op_sget_boolean.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetBooleanStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetBooleanStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_byte: /* 0x64 */
+/* File: arm/op_sget_byte.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetByteStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetByteStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_char: /* 0x65 */
+/* File: arm/op_sget_char.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetCharStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetCharStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sget_short: /* 0x66 */
+/* File: arm/op_sget_short.S */
+/* File: arm/op_sget.S */
+    /*
+     * General SGET handler wrapper.
+     *
+     * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
+     */
+    /* op vAA, field@BBBB */
+
+    .extern artGetShortStaticFromCode
+    EXPORT_PC
+    FETCH r0, 1                         @ r0<- field ref BBBB
+    ldr   r1, [rFP, #OFF_FP_METHOD]
+    mov   r2, rSELF
+    bl    artGetShortStaticFromCode
+    ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    mov   r2, rINST, lsr #8             @ r2<- AA
+    PREFETCH_INST 2
+    cmp   r3, #0                        @ Fail to resolve?
+    bne   MterpException                @ bail out
+.if 0
+    SET_VREG_OBJECT r0, r2              @ fp[AA]<- r0
+.else
+    SET_VREG r0, r2                     @ fp[AA]<- r0
+.endif
+    ADVANCE 2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput: /* 0x67 */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet32StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_wide: /* 0x68 */
+/* File: arm/op_sput_wide.S */
+    /*
+     * SPUT_WIDE handler wrapper.
+     *
+     */
+    /* sput-wide vAA, field@BBBB */
+    .extern artSet64IndirectStaticFromMterp
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    ldr     r1, [rFP, #OFF_FP_METHOD]
+    mov     r2, rINST, lsr #8           @ r3<- AA
+    add     r2, rFP, r2, lsl #2
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet64IndirectStaticFromMterp
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_object: /* 0x69 */
+/* File: arm/op_sput_object.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    mov     r3, rSELF
+    bl      MterpSputObject
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_boolean: /* 0x6a */
+/* File: arm/op_sput_boolean.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_byte: /* 0x6b */
+/* File: arm/op_sput_byte.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet8StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_char: /* 0x6c */
+/* File: arm/op_sput_char.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sput_short: /* 0x6d */
+/* File: arm/op_sput_short.S */
+/* File: arm/op_sput.S */
+    /*
+     * General SPUT handler wrapper.
+     *
+     * for: sput, sput-boolean, sput-byte, sput-char, sput-short
+     */
+    /* op vAA, field@BBBB */
+    EXPORT_PC
+    FETCH   r0, 1                       @ r0<- field ref BBBB
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    GET_VREG r1, r3                     @ r1<= fp[AA]
+    ldr     r2, [rFP, #OFF_FP_METHOD]
+    mov     r3, rSELF
+    PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
+    bl      artSet16StaticFromCode
+    cmp     r0, #0                      @ 0 on success, -1 on failure
+    bne     MterpException
+    ADVANCE 2                           @ Past exception point - now advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual: /* 0x6e */
+/* File: arm/op_invoke_virtual.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtual
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtual
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a virtual method call.
+     *
+     * for: invoke-virtual, invoke-virtual/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super: /* 0x6f */
+/* File: arm/op_invoke_super.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuper
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeSuper
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle a "super" method call.
+     *
+     * for: invoke-super, invoke-super/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op vAA, {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct: /* 0x70 */
+/* File: arm/op_invoke_direct.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirect
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeDirect
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static: /* 0x71 */
+/* File: arm/op_invoke_static.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStatic
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeStatic
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface: /* 0x72 */
+/* File: arm/op_invoke_interface.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterface
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeInterface
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+    /*
+     * Handle an interface method call.
+     *
+     * for: invoke-interface, invoke-interface/range
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_return_void_no_barrier: /* 0x73 */
+/* File: arm/op_return_void_no_barrier.S */
+    mov    r0, #0
+    mov    r1, #0
+    b      MterpReturn
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range: /* 0x74 */
+/* File: arm/op_invoke_virtual_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_super_range: /* 0x75 */
+/* File: arm/op_invoke_super_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeSuperRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeSuperRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_direct_range: /* 0x76 */
+/* File: arm/op_invoke_direct_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeDirectRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeDirectRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_static_range: /* 0x77 */
+/* File: arm/op_invoke_static_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeStaticRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeStaticRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_interface_range: /* 0x78 */
+/* File: arm/op_invoke_interface_range.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeInterfaceRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeInterfaceRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_79: /* 0x79 */
+/* File: arm/op_unused_79.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_7a: /* 0x7a */
+/* File: arm/op_unused_7a.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_int: /* 0x7b */
+/* File: arm/op_neg_int.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    rsb     r0, r0, #0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_int: /* 0x7c */
+/* File: arm/op_not_int.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mvn     r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_long: /* 0x7d */
+/* File: arm/op_neg_long.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    rsbs    r0, r0, #0                           @ optional op; may set condition codes
+    rsc     r1, r1, #0                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_not_long: /* 0x7e */
+/* File: arm/op_not_long.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mvn     r0, r0                           @ optional op; may set condition codes
+    mvn     r1, r1                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_float: /* 0x7f */
+/* File: arm/op_neg_float.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r0, r0, #0x80000000                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_neg_double: /* 0x80 */
+/* File: arm/op_neg_double.S */
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    add     r1, r1, #0x80000000                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_long: /* 0x81 */
+/* File: arm/op_int_to_long.S */
+/* File: arm/unopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    mov     r1, r0, asr #31                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_float: /* 0x82 */
+/* File: arm/op_int_to_float.S */
+/* File: arm/funop.S */
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fsitos  s1, s0                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_double: /* 0x83 */
+/* File: arm/op_int_to_double.S */
+/* File: arm/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fsitod  d0, s0                              @ d0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_int: /* 0x84 */
+/* File: arm/op_long_to_int.S */
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: arm/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B from 15:12
+    ubfx    r0, rINST, #8, #4           @ r0<- A from 11:8
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    GET_VREG r2, r1                     @ r2<- fp[B]
+    GET_INST_OPCODE ip                  @ ip<- opcode from rINST
+    .if 0
+    SET_VREG_OBJECT r2, r0              @ fp[A]<- r2
+    .else
+    SET_VREG r2, r0                     @ fp[A]<- r2
+    .endif
+    GOTO_OPCODE ip                      @ execute next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_float: /* 0x85 */
+/* File: arm/op_long_to_float.S */
+/* File: arm/unopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0/r1", where
+     * "result" is a 32-bit quantity in r0.
+     *
+     * For: long-to-float, double-to-int, double-to-float
+     *
+     * (This would work for long-to-int, but that instruction is actually
+     * an exact match for op_move.)
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      __aeabi_l2f                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_long_to_double: /* 0x86 */
+/* File: arm/op_long_to_double.S */
+    /*
+     * Specialised 64-bit floating point operation.
+     *
+     * Note: The result will be returned in d2.
+     *
+     * For: long-to-double
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    vldr    d0, [r3]                    @ d0<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    vcvt.f64.s32    d1, s1              @ d1<- (double)(vAAh)
+    vcvt.f64.u32    d2, s0              @ d2<- (double)(vAAl)
+    vldr            d3, constvalop_long_to_double
+    vmla.f64        d2, d1, d3          @ d2<- vAAh*2^32 + vAAl
+
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    vstr.64 d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+    /* literal pool helper */
+constvalop_long_to_double:
+    .8byte          0x41f0000000000000
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_int: /* 0x87 */
+/* File: arm/op_float_to_int.S */
+/* File: arm/funop.S */
+    /*
+     * Generic 32-bit unary floating-point operation.  Provide an "instr"
+     * line that specifies an instruction that performs "s1 = op s0".
+     *
+     * for: int-to-float, float-to-int
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    ftosizs s1, s0                              @ s1<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s1, [r9]                    @ vA<- s1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_long: /* 0x88 */
+/* File: arm/op_float_to_long.S */
+@include "arm/unopWider.S" {"instr":"bl      __aeabi_f2lz"}
+/* File: arm/unopWider.S */
+    /*
+     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = op r0", where
+     * "result" is a 64-bit quantity in r0/r1.
+     *
+     * For: int-to-long, int-to-double, float-to-long, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    bl      f2l_doconv                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 9-10 instructions */
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_float_to_double: /* 0x89 */
+/* File: arm/op_float_to_double.S */
+/* File: arm/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    flds    s0, [r3]                    @ s0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fcvtds  d0, s0                              @ d0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fstd    d0, [r9]                    @ vA<- d0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_int: /* 0x8a */
+/* File: arm/op_double_to_int.S */
+/* File: arm/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    ftosizd  s0, d0                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_long: /* 0x8b */
+/* File: arm/op_double_to_long.S */
+@include "arm/unopWide.S" {"instr":"bl      __aeabi_d2lz"}
+/* File: arm/unopWide.S */
+    /*
+     * Generic 64-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0/r1".
+     * This could be an ARM instruction or a function call.
+     *
+     * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      d2l_doconv                              @ r0/r1<- op, r2-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-11 instructions */
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_double_to_float: /* 0x8c */
+/* File: arm/op_double_to_float.S */
+/* File: arm/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit unary floating point operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    fldd    d0, [r3]                    @ d0<- vB
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    and     r9, r9, #15                 @ r9<- A
+    fcvtsd  s0, d0                              @ s0<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    fsts    s0, [r9]                    @ vA<- s0
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_byte: /* 0x8d */
+/* File: arm/op_int_to_byte.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    sxtb    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_char: /* 0x8e */
+/* File: arm/op_int_to_char.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    uxth    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_int_to_short: /* 0x8f */
+/* File: arm/op_int_to_short.S */
+/* File: arm/unop.S */
+    /*
+     * Generic 32-bit unary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = op r0".
+     * This could be an ARM instruction or a function call.
+     *
+     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+     *      int-to-byte, int-to-char, int-to-short
+     */
+    /* unop vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r3                     @ r0<- vB
+                               @ optional op; may set condition codes
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    sxth    r0, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 8-9 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int: /* 0x90 */
+/* File: arm/op_add_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int: /* 0x91 */
+/* File: arm/op_sub_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    sub     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int: /* 0x92 */
+/* File: arm/op_mul_int.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int: /* 0x93 */
+/* File: arm/op_div_int.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl    __aeabi_idiv                  @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int: /* 0x94 */
+/* File: arm/op_rem_int.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int
+     *
+     */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls  r1, r1, r2, r0                 @ r1<- op, r0-r2 changed
+#else
+    bl   __aeabi_idivmod                @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int: /* 0x95 */
+/* File: arm/op_and_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int: /* 0x96 */
+/* File: arm/op_or_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int: /* 0x97 */
+/* File: arm/op_xor_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int: /* 0x98 */
+/* File: arm/op_shl_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int: /* 0x99 */
+/* File: arm/op_shr_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int: /* 0x9a */
+/* File: arm/op_ushr_int.S */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long: /* 0x9b */
+/* File: arm/op_add_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    adds    r0, r0, r2                           @ optional op; may set condition codes
+    adc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long: /* 0x9c */
+/* File: arm/op_sub_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    subs    r0, r0, r2                           @ optional op; may set condition codes
+    sbc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long: /* 0x9d */
+/* File: arm/op_mul_long.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long: /* 0x9e */
+/* File: arm/op_div_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long: /* 0x9f */
+/* File: arm/op_rem_long.S */
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2,r3}     @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long: /* 0xa0 */
+/* File: arm/op_and_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r0, r0, r2                           @ optional op; may set condition codes
+    and     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long: /* 0xa1 */
+/* File: arm/op_or_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    orr     r0, r0, r2                           @ optional op; may set condition codes
+    orr     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long: /* 0xa2 */
+/* File: arm/op_xor_long.S */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    eor     r0, r0, r2                           @ optional op; may set condition codes
+    eor     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long: /* 0xa3 */
+/* File: arm/op_shl_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shl-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long: /* 0xa4 */
+/* File: arm/op_shr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* shr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long: /* 0xa5 */
+/* File: arm/op_ushr_long.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
+     * 6 bits of the shift distance.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r3, r0, #255                @ r3<- BB
+    mov     r0, r0, lsr #8              @ r0<- CC
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    GET_VREG r2, r0                     @ r2<- vCC
+    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float: /* 0xa6 */
+/* File: arm/op_add_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fadds   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float: /* 0xa7 */
+/* File: arm/op_sub_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fsubs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float: /* 0xa8 */
+/* File: arm/op_mul_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fmuls   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float: /* 0xa9 */
+/* File: arm/op_div_float.S */
+/* File: arm/fbinop.S */
+    /*
+     * Generic 32-bit floating-point operation.  Provide an "instr" line that
+     * specifies an instruction that performs "s2 = s0 op s1".  Because we
+     * use the "softfp" ABI, this must be an instruction, not a function call.
+     *
+     * For: add-float, sub-float, mul-float, div-float
+     */
+    /* floatop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    flds    s1, [r3]                    @ s1<- vCC
+    flds    s0, [r2]                    @ s0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fdivs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float: /* 0xaa */
+/* File: arm/op_rem_float.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm/binop.S */
+    /*
+     * Generic 32-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.  Note that we
+     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+     * handles it correctly.
+     *
+     * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+     *      xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+     *      mul-float, div-float, rem-float
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    GET_VREG r1, r3                     @ r1<- vCC
+    GET_VREG r0, r2                     @ r0<- vBB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+                               @ optional op; may set condition codes
+    bl      fmodf                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 11-14 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double: /* 0xab */
+/* File: arm/op_add_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    faddd   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double: /* 0xac */
+/* File: arm/op_sub_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fsubd   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double: /* 0xad */
+/* File: arm/op_mul_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fmuld   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double: /* 0xae */
+/* File: arm/op_div_double.S */
+/* File: arm/fbinopWide.S */
+    /*
+     * Generic 64-bit double-precision floating point binary operation.
+     * Provide an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * for: add-double, sub-double, mul-double, div-double
+     */
+    /* doubleop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    mov     r3, r0, lsr #8              @ r3<- CC
+    and     r2, r0, #255                @ r2<- BB
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
+    fldd    d1, [r3]                    @ d1<- vCC
+    fldd    d0, [r2]                    @ d0<- vBB
+
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    fdivd   d2, d0, d1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double: /* 0xaf */
+/* File: arm/op_rem_double.S */
+/* EABI doesn't define a double remainder function, but libm does */
+/* File: arm/binopWide.S */
+    /*
+     * Generic 64-bit binary operation.  Provide an "instr" line that
+     * specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+     *      xor-long, add-double, sub-double, mul-double, div-double,
+     *      rem-double
+     *
+     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH r0, 1                         @ r0<- CCBB
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r0, #255                @ r2<- BB
+    mov     r3, r0, lsr #8              @ r3<- CC
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
+    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      fmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 14-17 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_2addr: /* 0xb0 */
+/* File: arm/op_add_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_int_2addr: /* 0xb1 */
+/* File: arm/op_sub_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    sub     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_2addr: /* 0xb2 */
+/* File: arm/op_mul_int_2addr.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_2addr: /* 0xb3 */
+/* File: arm/op_div_int_2addr.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_2addr: /* 0xb4 */
+/* File: arm/op_rem_int_2addr.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/2addr
+     *
+     */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl      __aeabi_idivmod             @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_2addr: /* 0xb5 */
+/* File: arm/op_and_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_2addr: /* 0xb6 */
+/* File: arm/op_or_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_2addr: /* 0xb7 */
+/* File: arm/op_xor_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_2addr: /* 0xb8 */
+/* File: arm/op_shl_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_2addr: /* 0xb9 */
+/* File: arm/op_shr_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_2addr: /* 0xba */
+/* File: arm/op_ushr_int_2addr.S */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_long_2addr: /* 0xbb */
+/* File: arm/op_add_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    adds    r0, r0, r2                           @ optional op; may set condition codes
+    adc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_long_2addr: /* 0xbc */
+/* File: arm/op_sub_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    subs    r0, r0, r2                           @ optional op; may set condition codes
+    sbc     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_long_2addr: /* 0xbd */
+/* File: arm/op_mul_long_2addr.S */
+    /*
+     * Signed 64-bit integer multiply, "/2addr" version.
+     *
+     * See op_mul_long for an explanation.
+     *
+     * We get a little tight on registers, so to avoid looking up &fp[A]
+     * again we stuff it into rINST.
+     */
+    /* mul-long/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_long_2addr: /* 0xbe */
+/* File: arm/op_div_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_long_2addr: /* 0xbf */
+/* File: arm/op_rem_long_2addr.S */
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 1
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      __aeabi_ldivmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r2,r3}     @ vAA/vAA+1<- r2/r3
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_long_2addr: /* 0xc0 */
+/* File: arm/op_and_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    and     r0, r0, r2                           @ optional op; may set condition codes
+    and     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_long_2addr: /* 0xc1 */
+/* File: arm/op_or_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    orr     r0, r0, r2                           @ optional op; may set condition codes
+    orr     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_long_2addr: /* 0xc2 */
+/* File: arm/op_xor_long_2addr.S */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+    eor     r0, r0, r2                           @ optional op; may set condition codes
+    eor     r1, r1, r3                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_long_2addr: /* 0xc3 */
+/* File: arm/op_shl_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shl-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_long_2addr: /* 0xc4 */
+/* File: arm/op_shr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* shr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm/op_ushr_long_2addr.S */
+    /*
+     * Long integer shift, 2addr version.  vA is 64-bit value/result, vB is
+     * 32-bit shift distance.
+     */
+    /* ushr-long/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r2, r3                     @ r2<- vB
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_float_2addr: /* 0xc6 */
+/* File: arm/op_add_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fadds   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_float_2addr: /* 0xc7 */
+/* File: arm/op_sub_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fsubs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_float_2addr: /* 0xc8 */
+/* File: arm/op_mul_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fmuls   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_float_2addr: /* 0xc9 */
+/* File: arm/op_div_float_2addr.S */
+/* File: arm/fbinop2addr.S */
+    /*
+     * Generic 32-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "s2 = s0 op s1".
+     *
+     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    flds    s1, [r3]                    @ s1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    flds    s0, [r9]                    @ s0<- vA
+
+    fdivs   s2, s0, s1                              @ s2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fsts    s2, [r9]                    @ vAA<- s2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_float_2addr: /* 0xca */
+/* File: arm/op_rem_float_2addr.S */
+/* EABI doesn't define a float remainder function, but libm does */
+/* File: arm/binop2addr.S */
+    /*
+     * Generic 32-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+     *      rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+     *      shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+     *      sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r1, r3                     @ r1<- vB
+    GET_VREG r0, r9                     @ r0<- vA
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      fmodf                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_double_2addr: /* 0xcb */
+/* File: arm/op_add_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    faddd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_sub_double_2addr: /* 0xcc */
+/* File: arm/op_sub_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    fsubd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_double_2addr: /* 0xcd */
+/* File: arm/op_mul_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    fmuld   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_double_2addr: /* 0xce */
+/* File: arm/op_div_double_2addr.S */
+/* File: arm/fbinopWide2addr.S */
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.  Provide
+     * an "instr" line that specifies an instruction that performs
+     * "d2 = d0 op d1".
+     *
+     * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+     *      div-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r3, rINST, lsr #12          @ r3<- B
+    mov     r9, rINST, lsr #8           @ r9<- A+
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
+    and     r9, r9, #15                 @ r9<- A
+    fldd    d1, [r3]                    @ d1<- vB
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+    fldd    d0, [r9]                    @ d0<- vA
+
+    fdivd   d2, d0, d1                              @ d2<- op
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    fstd    d2, [r9]                    @ vAA<- d2
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_double_2addr: /* 0xcf */
+/* File: arm/op_rem_double_2addr.S */
+/* EABI doesn't define a double remainder function, but libm does */
+/* File: arm/binopWide2addr.S */
+    /*
+     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+     *      and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+     *      sub-double/2addr, mul-double/2addr, div-double/2addr,
+     *      rem-double/2addr
+     */
+    /* binop/2addr vA, vB */
+    mov     r1, rINST, lsr #12          @ r1<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
+    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    .if 0
+    orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    bl      fmod                              @ result<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r9, {r0,r1}     @ vAA/vAA+1<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 12-15 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit16: /* 0xd0 */
+/* File: arm/op_add_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int: /* 0xd1 */
+/* File: arm/op_rsub_int.S */
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit16: /* 0xd2 */
+/* File: arm/op_mul_int_lit16.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit16: /* 0xd3 */
+/* File: arm/op_div_int_lit16.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl       __aeabi_idiv               @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit16: /* 0xd4 */
+/* File: arm/op_rem_int_lit16.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit16
+     *
+     */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl     __aeabi_idivmod              @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit16: /* 0xd5 */
+/* File: arm/op_and_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit16: /* 0xd6 */
+/* File: arm/op_or_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit16: /* 0xd7 */
+/* File: arm/op_xor_int_lit16.S */
+/* File: arm/binopLit16.S */
+    /*
+     * Generic 32-bit "lit16" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+     *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+     */
+    /* binop/lit16 vA, vB, #+CCCC */
+    FETCH_S r1, 1                       @ r1<- ssssCCCC (sign-extended)
+    mov     r2, rINST, lsr #12          @ r2<- B
+    ubfx    r9, rINST, #8, #4           @ r9<- A
+    GET_VREG r0, r2                     @ r0<- vB
+    .if 0
+    cmp     r1, #0                      @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-13 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_add_int_lit8: /* 0xd8 */
+/* File: arm/op_add_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    add     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm/op_rsub_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    rsb     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_mul_int_lit8: /* 0xda */
+/* File: arm/op_mul_int_lit8.S */
+/* must be "mul r0, r1, r0" -- "r0, r0, r1" is illegal */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    mul     r0, r1, r0                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_div_int_lit8: /* 0xdb */
+/* File: arm/op_div_int_lit8.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * div-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r0, r0, r1                  @ r0<- op
+#else
+    bl   __aeabi_idiv                   @ r0<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                     @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_rem_int_lit8: /* 0xdc */
+/* File: arm/op_rem_int_lit8.S */
+    /*
+     * Specialized 32-bit binary operation
+     *
+     * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+     * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+     * ARMv7 CPUs that have hardware division support).
+     *
+     * NOTE: idivmod returns quotient in r0 and remainder in r1
+     *
+     * rem-int/lit8
+     *
+     */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+    sdiv    r2, r0, r1
+    mls     r1, r1, r2, r0              @ r1<- op
+#else
+    bl       __aeabi_idivmod            @ r1<- op, r0-r3 changed
+#endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+/* ------------------------------ */
+    .balign 128
+.L_op_and_int_lit8: /* 0xdd */
+/* File: arm/op_and_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    and     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_or_int_lit8: /* 0xde */
+/* File: arm/op_or_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    orr     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_xor_int_lit8: /* 0xdf */
+/* File: arm/op_xor_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+                               @ optional op; may set condition codes
+    eor     r0, r0, r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shl_int_lit8: /* 0xe0 */
+/* File: arm/op_shl_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asl r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_shr_int_lit8: /* 0xe1 */
+/* File: arm/op_shr_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, asr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm/op_ushr_int_lit8.S */
+/* File: arm/binopLit8.S */
+    /*
+     * Generic 32-bit "lit8" binary operation.  Provide an "instr" line
+     * that specifies an instruction that performs "result = r0 op r1".
+     * This could be an ARM instruction or a function call.  (If the result
+     * comes back in a register other than r0, you can override "result".)
+     *
+     * If "chkzero" is set to 1, we perform a divide-by-zero check on
+     * vCC (r1).  Useful for integer division and modulus.
+     *
+     * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+     *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+     *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
+     */
+    /* binop/lit8 vAA, vBB, #+CC */
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    mov     r9, rINST, lsr #8           @ r9<- AA
+    and     r2, r3, #255                @ r2<- BB
+    GET_VREG r0, r2                     @ r0<- vBB
+    movs    r1, r3, asr #8              @ r1<- ssssssCC (sign extended)
+    .if 0
+    @cmp     r1, #0                     @ is second operand zero?
+    beq     common_errDivideByZero
+    .endif
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+
+    and     r1, r1, #31                           @ optional op; may set condition codes
+    mov     r0, r0, lsr r1                              @ r0<- op, r0-r3 changed
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    SET_VREG r0, r9                @ vAA<- r0
+    GOTO_OPCODE ip                      @ jump to next instruction
+    /* 10-12 instructions */
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_quick: /* 0xe3 */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldr   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    .endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_wide_quick: /* 0xe4 */
+/* File: arm/op_iget_wide_quick.S */
+    /* iget-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH ip, 1                         @ ip<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_object_quick: /* 0xe5 */
+/* File: arm/op_iget_object_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldr   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    .if 1
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    .endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_quick: /* 0xe6 */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    str     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_wide_quick: /* 0xe7 */
+/* File: arm/op_iput_wide_quick.S */
+    /* iput-wide-quick vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r3, 1                         @ r3<- field byte offset
+    GET_VREG r2, r2                     @ r2<- fp[B], the object pointer
+    ubfx    r0, rINST, #8, #4           @ r0<- A
+    cmp     r2, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strd    r0, [r2, r3]                @ obj.field<- r0/r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_object_quick: /* 0xe8 */
+/* File: arm/op_iput_object_quick.S */
+    EXPORT_PC
+    add     r0, rFP, #OFF_FP_SHADOWFRAME
+    mov     r1, rPC
+    mov     r2, rINST
+    bl      MterpIputObjectQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm/op_invoke_virtual_quick.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuick
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualQuick
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm/op_invoke_virtual_range_quick.S */
+/* File: arm/invoke.S */
+    /*
+     * Generic invoke handler wrapper.
+     */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
+    .extern MterpInvokeVirtualQuickRange
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rPC
+    mov     r3, rINST
+    bl      MterpInvokeVirtualQuickRange
+    cmp     r0, #0
+    beq     MterpException
+    FETCH_ADVANCE_INST 3
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_boolean_quick: /* 0xeb */
+/* File: arm/op_iput_boolean_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strb     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_byte_quick: /* 0xec */
+/* File: arm/op_iput_byte_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strb     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_char_quick: /* 0xed */
+/* File: arm/op_iput_char_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strh     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iput_short_quick: /* 0xee */
+/* File: arm/op_iput_short_quick.S */
+/* File: arm/op_iput_quick.S */
+    /* For: iput-quick, iput-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- fp[B], the object pointer
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    GET_VREG r0, r2                     @ r0<- fp[A]
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    strh     r0, [r3, r1]             @ obj.field<- r0
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_boolean_quick: /* 0xef */
+/* File: arm/op_iget_boolean_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrb   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    .endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_byte_quick: /* 0xf0 */
+/* File: arm/op_iget_byte_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrsb   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    .endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_char_quick: /* 0xf1 */
+/* File: arm/op_iget_char_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrh   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    .endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_iget_short_quick: /* 0xf2 */
+/* File: arm/op_iget_short_quick.S */
+/* File: arm/op_iget_quick.S */
+    /* For: iget-quick, iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r3, r2                     @ r3<- object we're operating on
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    cmp     r3, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    ldrsh   r0, [r3, r1]                @ r0<- obj.field
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    .if 0
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    .else
+    SET_VREG r0, r2                     @ fp[A]<- r0
+    .endif
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_invoke_lambda: /* 0xf3 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_f4: /* 0xf4 */
+/* File: arm/op_unused_f4.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_capture_variable: /* 0xf5 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_create_lambda: /* 0xf6 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_liberate_variable: /* 0xf7 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_box_lambda: /* 0xf8 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unbox_lambda: /* 0xf9 */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fa: /* 0xfa */
+/* File: arm/op_unused_fa.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fb: /* 0xfb */
+/* File: arm/op_unused_fb.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fc: /* 0xfc */
+/* File: arm/op_unused_fc.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fd: /* 0xfd */
+/* File: arm/op_unused_fd.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_fe: /* 0xfe */
+/* File: arm/op_unused_fe.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+/* ------------------------------ */
+    .balign 128
+.L_op_unused_ff: /* 0xff */
+/* File: arm/op_unused_ff.S */
+/* File: arm/unused.S */
+/*
+ * Bail to reference interpreter to throw.
+ */
+  b MterpFallback
+
+
+    .balign 128
+    .size   artMterpAsmInstructionStart, .-artMterpAsmInstructionStart
+    .global artMterpAsmInstructionEnd
+artMterpAsmInstructionEnd:
+
+/*
+ * ===========================================================================
+ *  Sister implementations
+ * ===========================================================================
+ */
+    .global artMterpAsmSisterStart
+    .type   artMterpAsmSisterStart, %function
+    .text
+    .balign 4
+artMterpAsmSisterStart:
+
+/* continuation for op_cmp_long */
+
+.Lop_cmp_long_less:
+    mvn     r1, #0                      @ r1<- -1
+    @ Want to cond code the next mov so we can avoid branch, but don't see it;
+    @ instead, we just replicate the tail end.
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.Lop_cmp_long_greater:
+    mov     r1, #1                      @ r1<- 1
+    @ fall through to _finish
+
+.Lop_cmp_long_finish:
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/* continuation for op_float_to_long */
+/*
+ * Convert the float in r0 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+f2l_doconv:
+    stmfd   sp!, {r4, lr}
+    mov     r1, #0x5f000000             @ (float)maxlong
+    mov     r4, r0
+    bl      __aeabi_fcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffff)
+    mvnne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, #0xdf000000             @ (float)minlong
+    bl      __aeabi_fcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (80000000)
+    movne   r1, #0x80000000
+    ldmnefd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r4
+    bl      __aeabi_fcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    ldmeqfd sp!, {r4, pc}
+
+    mov     r0, r4                      @ recover arg
+    bl      __aeabi_f2lz                @ convert float to long
+    ldmfd   sp!, {r4, pc}
+
+/* continuation for op_double_to_long */
+/*
+ * Convert the double in r0/r1 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification.  The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer.  The EABI convert function isn't doing this for us.
+ */
+d2l_doconv:
+    stmfd   sp!, {r4, r5, lr}           @ save regs
+    mov     r3, #0x43000000             @ maxlong, as a double (high word)
+    add     r3, #0x00e00000             @  0x43e00000
+    mov     r2, #0                      @ maxlong, as a double (low word)
+    sub     sp, sp, #4                  @ align for EABI
+    mov     r4, r0                      @ save a copy of r0
+    mov     r5, r1                      @  and r1
+    bl      __aeabi_dcmpge              @ is arg >= maxlong?
+    cmp     r0, #0                      @ nonzero == yes
+    mvnne   r0, #0                      @ return maxlong (7fffffffffffffff)
+    mvnne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r3, #0xc3000000             @ minlong, as a double (high word)
+    add     r3, #0x00e00000             @  0xc3e00000
+    mov     r2, #0                      @ minlong, as a double (low word)
+    bl      __aeabi_dcmple              @ is arg <= minlong?
+    cmp     r0, #0                      @ nonzero == yes
+    movne   r0, #0                      @ return minlong (8000000000000000)
+    movne   r1, #0x80000000
+    bne     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    mov     r2, r4                      @ compare against self
+    mov     r3, r5
+    bl      __aeabi_dcmpeq              @ is arg == self?
+    cmp     r0, #0                      @ zero == no
+    moveq   r1, #0                      @ return zero for NaN
+    beq     1f
+
+    mov     r0, r4                      @ recover arg
+    mov     r1, r5
+    bl      __aeabi_d2lz                @ convert double to long
+
+1:
+    add     sp, sp, #4
+    ldmfd   sp!, {r4, r5, pc}
+
+    .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
+    .global artMterpAsmSisterEnd
+artMterpAsmSisterEnd:
+
+
+    .global artMterpAsmAltInstructionStart
+    .type   artMterpAsmAltInstructionStart, %function
+    .text
+
+artMterpAsmAltInstructionStart = .L_ALT_op_nop
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_nop: /* 0x00 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (0 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move: /* 0x01 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (1 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_from16: /* 0x02 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (2 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_16: /* 0x03 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (3 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide: /* 0x04 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (4 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_from16: /* 0x05 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (5 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_wide_16: /* 0x06 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (6 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object: /* 0x07 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (7 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_from16: /* 0x08 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (8 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_object_16: /* 0x09 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (9 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result: /* 0x0a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (10 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_wide: /* 0x0b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (11 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_result_object: /* 0x0c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (12 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_move_exception: /* 0x0d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (13 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void: /* 0x0e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (14 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return: /* 0x0f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (15 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_wide: /* 0x10 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (16 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_object: /* 0x11 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (17 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_4: /* 0x12 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (18 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_16: /* 0x13 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (19 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const: /* 0x14 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (20 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_high16: /* 0x15 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (21 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_16: /* 0x16 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (22 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_32: /* 0x17 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (23 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide: /* 0x18 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (24 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_wide_high16: /* 0x19 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (25 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string: /* 0x1a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (26 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_string_jumbo: /* 0x1b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (27 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_const_class: /* 0x1c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (28 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_enter: /* 0x1d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (29 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_monitor_exit: /* 0x1e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (30 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_check_cast: /* 0x1f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (31 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_instance_of: /* 0x20 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (32 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_array_length: /* 0x21 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (33 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_instance: /* 0x22 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (34 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_new_array: /* 0x23 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (35 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array: /* 0x24 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (36 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_filled_new_array_range: /* 0x25 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (37 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_fill_array_data: /* 0x26 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (38 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_throw: /* 0x27 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (39 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto: /* 0x28 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (40 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_16: /* 0x29 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (41 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_goto_32: /* 0x2a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (42 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_packed_switch: /* 0x2b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (43 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sparse_switch: /* 0x2c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (44 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_float: /* 0x2d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (45 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_float: /* 0x2e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (46 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpl_double: /* 0x2f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (47 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmpg_double: /* 0x30 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (48 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_cmp_long: /* 0x31 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (49 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eq: /* 0x32 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (50 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ne: /* 0x33 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (51 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lt: /* 0x34 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (52 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ge: /* 0x35 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (53 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gt: /* 0x36 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (54 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_le: /* 0x37 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (55 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_eqz: /* 0x38 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (56 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_nez: /* 0x39 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (57 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_ltz: /* 0x3a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (58 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gez: /* 0x3b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (59 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_gtz: /* 0x3c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (60 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_if_lez: /* 0x3d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (61 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3e: /* 0x3e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (62 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_3f: /* 0x3f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (63 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_40: /* 0x40 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (64 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_41: /* 0x41 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (65 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_42: /* 0x42 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (66 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_43: /* 0x43 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (67 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget: /* 0x44 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (68 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_wide: /* 0x45 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (69 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_object: /* 0x46 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (70 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_boolean: /* 0x47 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (71 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_byte: /* 0x48 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (72 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_char: /* 0x49 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (73 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aget_short: /* 0x4a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (74 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput: /* 0x4b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (75 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_wide: /* 0x4c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (76 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_object: /* 0x4d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (77 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_boolean: /* 0x4e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (78 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_byte: /* 0x4f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (79 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_char: /* 0x50 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (80 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_aput_short: /* 0x51 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (81 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget: /* 0x52 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (82 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide: /* 0x53 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (83 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object: /* 0x54 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (84 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean: /* 0x55 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (85 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte: /* 0x56 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (86 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char: /* 0x57 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (87 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short: /* 0x58 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (88 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput: /* 0x59 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (89 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide: /* 0x5a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (90 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object: /* 0x5b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (91 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean: /* 0x5c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (92 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte: /* 0x5d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (93 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char: /* 0x5e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (94 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short: /* 0x5f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (95 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget: /* 0x60 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (96 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_wide: /* 0x61 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (97 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_object: /* 0x62 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (98 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_boolean: /* 0x63 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (99 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_byte: /* 0x64 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (100 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_char: /* 0x65 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (101 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sget_short: /* 0x66 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (102 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput: /* 0x67 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (103 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_wide: /* 0x68 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (104 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_object: /* 0x69 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (105 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_boolean: /* 0x6a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (106 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_byte: /* 0x6b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (107 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_char: /* 0x6c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (108 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sput_short: /* 0x6d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (109 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual: /* 0x6e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (110 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super: /* 0x6f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (111 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct: /* 0x70 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (112 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static: /* 0x71 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (113 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface: /* 0x72 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (114 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_return_void_no_barrier: /* 0x73 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (115 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range: /* 0x74 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (116 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_super_range: /* 0x75 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (117 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_direct_range: /* 0x76 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (118 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_static_range: /* 0x77 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (119 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_interface_range: /* 0x78 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (120 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_79: /* 0x79 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (121 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_7a: /* 0x7a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (122 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_int: /* 0x7b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (123 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_int: /* 0x7c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (124 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_long: /* 0x7d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (125 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_not_long: /* 0x7e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (126 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_float: /* 0x7f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (127 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_neg_double: /* 0x80 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (128 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_long: /* 0x81 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (129 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_float: /* 0x82 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (130 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_double: /* 0x83 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (131 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_int: /* 0x84 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (132 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_float: /* 0x85 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (133 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_long_to_double: /* 0x86 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (134 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_int: /* 0x87 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (135 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_long: /* 0x88 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (136 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_float_to_double: /* 0x89 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (137 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_int: /* 0x8a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (138 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_long: /* 0x8b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (139 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_double_to_float: /* 0x8c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (140 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_byte: /* 0x8d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (141 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_char: /* 0x8e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (142 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_int_to_short: /* 0x8f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (143 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int: /* 0x90 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (144 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int: /* 0x91 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (145 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int: /* 0x92 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (146 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int: /* 0x93 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (147 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int: /* 0x94 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (148 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int: /* 0x95 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (149 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int: /* 0x96 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (150 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int: /* 0x97 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (151 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int: /* 0x98 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (152 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int: /* 0x99 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (153 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int: /* 0x9a */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (154 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long: /* 0x9b */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (155 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long: /* 0x9c */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (156 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long: /* 0x9d */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (157 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long: /* 0x9e */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (158 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long: /* 0x9f */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (159 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long: /* 0xa0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (160 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long: /* 0xa1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (161 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long: /* 0xa2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (162 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long: /* 0xa3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (163 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long: /* 0xa4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (164 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long: /* 0xa5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (165 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float: /* 0xa6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (166 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float: /* 0xa7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (167 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float: /* 0xa8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (168 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float: /* 0xa9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (169 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float: /* 0xaa */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (170 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double: /* 0xab */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (171 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double: /* 0xac */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (172 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double: /* 0xad */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (173 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double: /* 0xae */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (174 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double: /* 0xaf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (175 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_2addr: /* 0xb0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (176 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_int_2addr: /* 0xb1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (177 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_2addr: /* 0xb2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (178 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_2addr: /* 0xb3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (179 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_2addr: /* 0xb4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (180 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_2addr: /* 0xb5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (181 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_2addr: /* 0xb6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (182 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_2addr: /* 0xb7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (183 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_2addr: /* 0xb8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (184 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_2addr: /* 0xb9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (185 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_2addr: /* 0xba */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (186 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_long_2addr: /* 0xbb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (187 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_long_2addr: /* 0xbc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (188 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_long_2addr: /* 0xbd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (189 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_long_2addr: /* 0xbe */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (190 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_long_2addr: /* 0xbf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (191 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_long_2addr: /* 0xc0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (192 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_long_2addr: /* 0xc1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (193 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_long_2addr: /* 0xc2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (194 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_long_2addr: /* 0xc3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (195 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_long_2addr: /* 0xc4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (196 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_long_2addr: /* 0xc5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (197 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_float_2addr: /* 0xc6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (198 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_float_2addr: /* 0xc7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (199 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_float_2addr: /* 0xc8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (200 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_float_2addr: /* 0xc9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (201 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_float_2addr: /* 0xca */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (202 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_double_2addr: /* 0xcb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (203 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_sub_double_2addr: /* 0xcc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (204 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_double_2addr: /* 0xcd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (205 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_double_2addr: /* 0xce */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (206 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_double_2addr: /* 0xcf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (207 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit16: /* 0xd0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (208 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int: /* 0xd1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (209 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit16: /* 0xd2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (210 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit16: /* 0xd3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (211 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit16: /* 0xd4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (212 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit16: /* 0xd5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (213 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit16: /* 0xd6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (214 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit16: /* 0xd7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (215 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_add_int_lit8: /* 0xd8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (216 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rsub_int_lit8: /* 0xd9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (217 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_mul_int_lit8: /* 0xda */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (218 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_div_int_lit8: /* 0xdb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (219 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_rem_int_lit8: /* 0xdc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (220 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_and_int_lit8: /* 0xdd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (221 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_or_int_lit8: /* 0xde */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (222 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_xor_int_lit8: /* 0xdf */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (223 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shl_int_lit8: /* 0xe0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (224 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_shr_int_lit8: /* 0xe1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (225 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_ushr_int_lit8: /* 0xe2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (226 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_quick: /* 0xe3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (227 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_wide_quick: /* 0xe4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (228 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_object_quick: /* 0xe5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (229 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_quick: /* 0xe6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (230 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_wide_quick: /* 0xe7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (231 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_object_quick: /* 0xe8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (232 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_quick: /* 0xe9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (233 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_virtual_range_quick: /* 0xea */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (234 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_boolean_quick: /* 0xeb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (235 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_byte_quick: /* 0xec */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (236 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_char_quick: /* 0xed */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (237 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iput_short_quick: /* 0xee */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (238 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_boolean_quick: /* 0xef */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (239 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_byte_quick: /* 0xf0 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (240 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_char_quick: /* 0xf1 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (241 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_iget_short_quick: /* 0xf2 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (242 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_invoke_lambda: /* 0xf3 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (243 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_f4: /* 0xf4 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (244 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_capture_variable: /* 0xf5 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (245 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_create_lambda: /* 0xf6 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (246 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_liberate_variable: /* 0xf7 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (247 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_box_lambda: /* 0xf8 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (248 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unbox_lambda: /* 0xf9 */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (249 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fa: /* 0xfa */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (250 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fb: /* 0xfb */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (251 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fc: /* 0xfc */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (252 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fd: /* 0xfd */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (253 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_fe: /* 0xfe */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (254 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+/* ------------------------------ */
+    .balign 128
+.L_ALT_op_unused_ff: /* 0xff */
+/* File: arm/alt_stub.S */
+/*
+ * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
+ * any interesting requests and then jump to the real instruction
+ * handler.  Note that the call to MterpCheckBefore is done as a tail call.
+ */
+    .extern MterpCheckBefore
+    EXPORT_PC
+    ldr    rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]            @ refresh IBASE.
+    adrl   lr, artMterpAsmInstructionStart + (255 * 128)       @ Addr of primary handler.
+    mov    r0, rSELF
+    add    r1, rFP, #OFF_FP_SHADOWFRAME
+    b      MterpCheckBefore     @ (self, shadow_frame)              @ Tail call.
+
+    .balign 128
+    .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
+    .global artMterpAsmAltInstructionEnd
+artMterpAsmAltInstructionEnd:
+/* File: arm/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+#define MTERP_LOGGING 0
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    ldr  r2, [rSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     r0, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    cmp     r0, #0                                  @ Exception pending?
+    beq     MterpFallback                           @ If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    @ (self, shadow_frame)
+    cmp     r0, #0
+    beq     MterpExceptionReturn                    @ no local catch, back to caller.
+    ldr     r0, [rFP, #OFF_FP_CODE_ITEM]
+    ldr     r1, [rFP, #OFF_FP_DEX_PC]
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     rPC, r0, #CODEITEM_INSNS_OFFSET
+    add     rPC, rPC, r1, lsl #1                    @ generate new dex_pc_ptr
+    str     rPC, [rFP, #OFF_FP_DEX_PC_PTR]
+    /* resume execution at catch block */
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+
+/*
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+    EXPORT_PC
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck           @ (self)
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+    mov  r0, rSELF
+    add  r1, rFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+MterpCommonFallback:
+    mov     r0, #0                                  @ signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* rFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, #1                                  @ signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    str     r0, [r2]
+    str     r1, [r2, #4]
+    mov     r0, rSELF
+    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    blne    MterpSuspendCheck                       @ (self)
+    mov     r0, #1                                  @ signal return to caller.
+MterpDone:
+    add     sp, sp, #4                              @ un-align 64
+    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+
+
+    .fnend
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+
diff --git a/runtime/interpreter/mterp/rebuild.sh b/runtime/interpreter/mterp/rebuild.sh
new file mode 100755
index 0000000..a325fff
--- /dev/null
+++ b/runtime/interpreter/mterp/rebuild.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Rebuild for all known targets.  Necessary until the stuff in "out" gets
+# generated as part of the build.
+#
+set -e
+
+# for arch in arm x86 mips arm64 x86_64 mips64; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
+for arch in arm; do TARGET_ARCH_EXT=$arch make -f Makefile_mterp; done
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
new file mode 100644
index 0000000..3c2898b
--- /dev/null
+++ b/runtime/jit/debugger_interface.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debugger_interface.h"
+
+namespace art {
+
+// -------------------------------------------------------------------
+// Binary GDB JIT Interface as described in
+//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
+// -------------------------------------------------------------------
+extern "C" {
+  typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+  } JITAction;
+
+  struct JITCodeEntry {
+    JITCodeEntry* next_;
+    JITCodeEntry* prev_;
+    const uint8_t *symfile_addr_;
+    uint64_t symfile_size_;
+  };
+
+  struct JITDescriptor {
+    uint32_t version_;
+    uint32_t action_flag_;
+    JITCodeEntry* relevant_entry_;
+    JITCodeEntry* first_entry_;
+  };
+
+  // GDB will place breakpoint into this function.
+  // To prevent GCC from inlining or removing it we place noinline attribute
+  // and inline assembler statement inside.
+  void __attribute__((noinline)) __jit_debug_register_code();
+  void __attribute__((noinline)) __jit_debug_register_code() {
+    __asm__("");
+  }
+
+  // GDB will inspect contents of this descriptor.
+  // Static initialization is necessary to prevent GDB from seeing
+  // uninitialized descriptor.
+  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
+}
+
+JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size) {
+  JITCodeEntry* entry = new JITCodeEntry;
+  entry->symfile_addr_ = symfile_addr;
+  entry->symfile_size_ = symfile_size;
+  entry->prev_ = nullptr;
+
+  // TODO: Do we need a lock here?
+  entry->next_ = __jit_debug_descriptor.first_entry_;
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry;
+  }
+  __jit_debug_descriptor.first_entry_ = entry;
+  __jit_debug_descriptor.relevant_entry_ = entry;
+
+  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
+  __jit_debug_register_code();
+  return entry;
+}
+
+void DeleteJITCodeEntry(JITCodeEntry* entry) {
+  // TODO: Do we need a lock here?
+  if (entry->prev_ != nullptr) {
+    entry->prev_->next_ = entry->next_;
+  } else {
+    __jit_debug_descriptor.first_entry_ = entry->next_;
+  }
+
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry->prev_;
+  }
+
+  __jit_debug_descriptor.relevant_entry_ = entry;
+  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
+  __jit_debug_register_code();
+  delete entry;
+}
+
+}  // namespace art
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
new file mode 100644
index 0000000..a784ef5
--- /dev/null
+++ b/runtime/jit/debugger_interface.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+#define ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+
+#include <inttypes.h>
+
+namespace art {
+
+extern "C" {
+  struct JITCodeEntry;
+}
+
+// Notify native debugger about new JITed code by passing in-memory ELF.
+JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size);
+
+// Notify native debugger that JITed code has been removed.
+void DeleteJITCodeEntry(JITCodeEntry* entry);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index b2fc74d..ab70f4c 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -26,6 +26,7 @@
 #include "jit_instrumentation.h"
 #include "oat_file_manager.h"
 #include "offline_profiling_info.h"
+#include "profile_saver.h"
 #include "runtime.h"
 #include "runtime_options.h"
 #include "utils.h"
@@ -66,7 +67,7 @@
 Jit::Jit()
     : jit_library_handle_(nullptr), jit_compiler_handle_(nullptr), jit_load_(nullptr),
       jit_compile_method_(nullptr), dump_info_on_shutdown_(false),
-      cumulative_timings_("JIT timings") {
+      cumulative_timings_("JIT timings"), save_profiling_info_(false) {
 }
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
@@ -80,14 +81,12 @@
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
-  jit->offline_profile_info_.reset(nullptr);
-  if (options->GetSaveProfilingInfo()) {
-    jit->offline_profile_info_.reset(new OfflineProfilingInfo());
-  }
+  jit->save_profiling_info_ = options->GetSaveProfilingInfo();
   LOG(INFO) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
-      << ", compile_threshold=" << options->GetCompileThreshold();
+      << ", compile_threshold=" << options->GetCompileThreshold()
+      << ", save_profiling_info=" << options->GetSaveProfilingInfo();
   return jit.release();
 }
 
@@ -173,25 +172,21 @@
   }
 }
 
-void Jit::SaveProfilingInfo(const std::string& filename) {
-  if (offline_profile_info_ == nullptr) {
-    return;
+void Jit::StartProfileSaver(const std::string& filename,
+                            const std::vector<std::string>& code_paths) {
+  if (save_profiling_info_) {
+    ProfileSaver::Start(filename, code_cache_.get(), code_paths);
   }
-  uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
-  if (offline_profile_info_->NeedsSaving(last_update_ns)) {
-    VLOG(profiler) << "Initiate save profiling information to: " << filename;
-    std::set<ArtMethod*> methods;
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      code_cache_->GetCompiledArtMethods(offline_profile_info_->GetTrackedDexLocations(), methods);
-    }
-    offline_profile_info_->SaveProfilingInfo(filename, last_update_ns, methods);
-  } else {
-    VLOG(profiler) << "No need to save profiling information to: " << filename;
+}
+
+void Jit::StopProfileSaver() {
+  if (save_profiling_info_ && ProfileSaver::IsStarted()) {
+    ProfileSaver::Stop();
   }
 }
 
 Jit::~Jit() {
+  DCHECK(!save_profiling_info_ || !ProfileSaver::IsStarted());
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG(INFO));
   }
@@ -210,12 +205,5 @@
       new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
 }
 
-void Jit::SetDexLocationsForProfiling(const std::vector<std::string>& dex_base_locations) {
-  if (offline_profile_info_ == nullptr) {
-    return;
-  }
-  offline_profile_info_->SetTrackedDexLocations(dex_base_locations);
-}
-
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 7a2db31..0edce2f 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -72,8 +72,8 @@
     return instrumentation_cache_.get();
   }
 
-  void SetDexLocationsForProfiling(const std::vector<std::string>& dex_locations);
-  void SaveProfilingInfo(const std::string& filename);
+  void StartProfileSaver(const std::string& filename, const std::vector<std::string>& code_paths);
+  void StopProfileSaver();
 
   void DumpForSigQuit(std::ostream& os) {
     DumpInfo(os);
@@ -98,7 +98,8 @@
   std::unique_ptr<jit::JitCodeCache> code_cache_;
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
-  std::unique_ptr<OfflineProfilingInfo> offline_profile_info_;
+  bool save_profiling_info_;
+
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 033a8f0..c260ca4 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -317,7 +317,7 @@
       // code.
       GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
     }
-    last_update_time_ns_ = NanoTime();
+    last_update_time_ns_.StoreRelease(NanoTime());
     VLOG(jit)
         << "JIT added "
         << PrettyMethod(method) << "@" << method
@@ -668,6 +668,11 @@
     return nullptr;
   }
   info = new (data) ProfilingInfo(method, entries);
+
+  // Make sure other threads see the data in the profiling info object before the
+  // store in the ArtMethod's ProfilingInfo pointer.
+  QuasiAtomic::ThreadFenceRelease();
+
   method->SetProfilingInfo(info);
   profiling_infos_.push_back(info);
   return info;
@@ -689,18 +694,17 @@
 }
 
 void JitCodeCache::GetCompiledArtMethods(const std::set<const std::string>& dex_base_locations,
-                                         std::set<ArtMethod*>& methods) {
+                                         std::vector<ArtMethod*>& methods) {
   MutexLock mu(Thread::Current(), lock_);
   for (auto it : method_code_map_) {
     if (ContainsElement(dex_base_locations, it.second->GetDexFile()->GetBaseLocation())) {
-      methods.insert(it.second);
+      methods.push_back(it.second);
     }
   }
 }
 
-uint64_t JitCodeCache::GetLastUpdateTimeNs() {
-  MutexLock mu(Thread::Current(), lock_);
-  return last_update_time_ns_;
+uint64_t JitCodeCache::GetLastUpdateTimeNs() const {
+  return last_update_time_ns_.LoadAcquire();
 }
 
 bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 0ceb17a..1c842e4 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -148,11 +148,11 @@
 
   // Adds to `methods` all the compiled ArtMethods which are part of any of the given dex locations.
   void GetCompiledArtMethods(const std::set<const std::string>& dex_base_locations,
-                             std::set<ArtMethod*>& methods)
+                             std::vector<ArtMethod*>& methods)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  uint64_t GetLastUpdateTimeNs() REQUIRES(!lock_);
+  uint64_t GetLastUpdateTimeNs() const;
 
   size_t GetCurrentCapacity() REQUIRES(!lock_) {
     MutexLock lock(Thread::Current(), lock_);
@@ -249,7 +249,8 @@
   bool has_done_one_collection_ GUARDED_BY(lock_);
 
   // Last time the the code_cache was updated.
-  uint64_t last_update_time_ns_ GUARDED_BY(lock_);
+  // It is atomic to avoid locking when reading it.
+  Atomic<uint64_t> last_update_time_ns_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index 511b53d..a132701 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -17,7 +17,7 @@
 #include "offline_profiling_info.h"
 
 #include <fstream>
-#include <set>
+#include <vector>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
@@ -30,66 +30,40 @@
 
 namespace art {
 
-// An arbitrary value to throttle save requests. Set to 500ms for now.
-static constexpr const uint64_t kMilisecondsToNano = 1000000;
-static constexpr const uint64_t kMinimumTimeBetweenSavesNs = 500 * kMilisecondsToNano;
-
-void OfflineProfilingInfo::SetTrackedDexLocations(
-      const std::vector<std::string>& dex_base_locations) {
-  tracked_dex_base_locations_.clear();
-  tracked_dex_base_locations_.insert(dex_base_locations.begin(), dex_base_locations.end());
-  VLOG(profiler) << "Tracking dex locations: " << Join(dex_base_locations, ':');
-}
-
-const std::set<const std::string>& OfflineProfilingInfo::GetTrackedDexLocations() const {
-  return tracked_dex_base_locations_;
-}
-
-bool OfflineProfilingInfo::NeedsSaving(uint64_t last_update_time_ns) const {
-  return !tracked_dex_base_locations_.empty() &&
-      (last_update_time_ns - last_update_time_ns_.LoadRelaxed() > kMinimumTimeBetweenSavesNs);
-}
-
-void OfflineProfilingInfo::SaveProfilingInfo(const std::string& filename,
-                                             uint64_t last_update_time_ns,
-                                             const std::set<ArtMethod*>& methods) {
-  if (!NeedsSaving(last_update_time_ns)) {
-    VLOG(profiler) << "No need to saved profile info to " << filename;
-    return;
-  }
-
+bool ProfileCompilationInfo::SaveProfilingInfo(const std::string& filename,
+                                               const std::vector<ArtMethod*>& methods) {
   if (methods.empty()) {
     VLOG(profiler) << "No info to save to " << filename;
-    return;
+    return true;
   }
 
-  DexFileToMethodsMap info;
+  ProfileCompilationInfo info;
+  if (!info.Load(filename)) {
+    LOG(WARNING) << "Could not load previous profile data from file " << filename;
+    return false;
+  }
   {
     ScopedObjectAccess soa(Thread::Current());
     for (auto it = methods.begin(); it != methods.end(); it++) {
-      DCHECK(ContainsElement(tracked_dex_base_locations_, (*it)->GetDexFile()->GetBaseLocation()));
-      AddMethodInfo(*it, &info);
+      const DexFile* dex_file = (*it)->GetDexFile();
+      if (!info.AddData(dex_file->GetLocation(),
+                        dex_file->GetLocationChecksum(),
+                        (*it)->GetDexMethodIndex())) {
+        return false;
+      }
     }
   }
 
   // This doesn't need locking because we are trying to lock the file for exclusive
   // access and fail immediately if we can't.
-  if (Serialize(filename, info)) {
-    last_update_time_ns_.StoreRelaxed(last_update_time_ns);
-    VLOG(profiler) << "Successfully saved profile info to "
-                   << filename << " with time stamp: " << last_update_time_ns;
+  bool result = info.Save(filename);
+  if (result) {
+    VLOG(profiler) << "Successfully saved profile info to " << filename
+        << " Size: " << GetFileSizeBytes(filename);
+  } else {
+    VLOG(profiler) << "Failed to save profile info to " << filename;
   }
-}
-
-void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
-  DCHECK(method != nullptr);
-  const DexFile* dex_file = method->GetDexFile();
-
-  auto info_it = info->find(dex_file);
-  if (info_it == info->end()) {
-    info_it = info->Put(dex_file, std::set<uint32_t>());
-  }
-  info_it->second.insert(method->GetDexMethodIndex());
+  return result;
 }
 
 enum OpenMode {
@@ -105,9 +79,7 @@
       break;
     case READ_WRITE:
       // TODO(calin) allow the shared uid of the app to access the file.
-      fd = open(filename.c_str(),
-                    O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
-                    S_IRUSR | S_IWUSR);
+      fd = open(filename.c_str(), O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
       break;
   }
 
@@ -165,8 +137,7 @@
  *    /system/priv-app/app/app.apk,131232145,11,23,454,54
  *    /system/priv-app/app/app.apk:classes5.dex,218490184,39,13,49,1
  **/
-bool OfflineProfilingInfo::Serialize(const std::string& filename,
-                                     const DexFileToMethodsMap& info) const {
+bool ProfileCompilationInfo::Save(const std::string& filename) {
   int fd = OpenFile(filename, READ_WRITE);
   if (fd == -1) {
     return false;
@@ -176,14 +147,12 @@
   // TODO(calin): Profile this and see how much memory it takes. If too much,
   // write to file directly.
   std::ostringstream os;
-  for (auto it : info) {
-    const DexFile* dex_file = it.first;
-    const std::set<uint32_t>& method_dex_ids = it.second;
+  for (const auto& it : info_) {
+    const std::string& dex_location = it.first;
+    const DexFileData& dex_data = it.second;
 
-    os << dex_file->GetLocation()
-        << kFieldSeparator
-        << dex_file->GetLocationChecksum();
-    for (auto method_it : method_dex_ids) {
+    os << dex_location << kFieldSeparator << dex_data.checksum;
+    for (auto method_it : dex_data.method_set) {
       os << kFieldSeparator << method_it;
     }
     os << kLineSeparator;
@@ -218,8 +187,22 @@
   }
 }
 
-bool ProfileCompilationInfo::ProcessLine(const std::string& line,
-                                         const std::vector<const DexFile*>& dex_files) {
+bool ProfileCompilationInfo::AddData(const std::string& dex_location,
+                                     uint32_t checksum,
+                                     uint16_t method_idx) {
+  auto info_it = info_.find(dex_location);
+  if (info_it == info_.end()) {
+    info_it = info_.Put(dex_location, DexFileData(checksum));
+  }
+  if (info_it->second.checksum != checksum) {
+    LOG(WARNING) << "Checksum mismatch for dex " << dex_location;
+    return false;
+  }
+  info_it->second.method_set.insert(method_idx);
+  return true;
+}
+
+bool ProfileCompilationInfo::ProcessLine(const std::string& line) {
   std::vector<std::string> parts;
   SplitString(line, kFieldSeparator, &parts);
   if (parts.size() < 3) {
@@ -233,39 +216,13 @@
     return false;
   }
 
-  const DexFile* current_dex_file = nullptr;
-  for (auto dex_file : dex_files) {
-    if (dex_file->GetLocation() == dex_location) {
-      if (checksum != dex_file->GetLocationChecksum()) {
-        LOG(WARNING) << "Checksum mismatch for "
-            << dex_file->GetLocation() << " when parsing " << filename_;
-        return false;
-      }
-      current_dex_file = dex_file;
-      break;
-    }
-  }
-  if (current_dex_file == nullptr) {
-    return true;
-  }
-
   for (size_t i = 2; i < parts.size(); i++) {
     uint32_t method_idx;
     if (!ParseInt(parts[i].c_str(), &method_idx)) {
       LOG(WARNING) << "Cannot parse method_idx " << parts[i];
       return false;
     }
-    uint16_t class_idx = current_dex_file->GetMethodId(method_idx).class_idx_;
-    auto info_it = info_.find(current_dex_file);
-    if (info_it == info_.end()) {
-      info_it = info_.Put(current_dex_file, ClassToMethodsMap());
-    }
-    ClassToMethodsMap& class_map = info_it->second;
-    auto class_it = class_map.find(class_idx);
-    if (class_it == class_map.end()) {
-      class_it = class_map.Put(class_idx, std::set<uint32_t>());
-    }
-    class_it->second.insert(method_idx);
+    AddData(dex_location, checksum, method_idx);
   }
   return true;
 }
@@ -292,25 +249,8 @@
   return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
 }
 
-bool ProfileCompilationInfo::Load(const std::vector<const DexFile*>& dex_files) {
-  if (dex_files.empty()) {
-    return true;
-  }
-  if (kIsDebugBuild) {
-    // In debug builds verify that the locations are unique.
-    std::set<std::string> locations;
-    for (auto dex_file : dex_files) {
-      const std::string& location = dex_file->GetLocation();
-      DCHECK(locations.find(location) == locations.end())
-          << "DexFiles appear to belong to different apks."
-          << " There are multiple dex files with the same location: "
-          << location;
-      locations.insert(location);
-    }
-  }
-  info_.clear();
-
-  int fd = OpenFile(filename_, READ);
+bool ProfileCompilationInfo::Load(const std::string& filename) {
+  int fd = OpenFile(filename, READ);
   if (fd == -1) {
     return false;
   }
@@ -323,7 +263,7 @@
   while (success) {
     int n = read(fd, buffer, kBufferSize);
     if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file " << filename_;
+      PLOG(WARNING) << "Error when reading profile file " << filename;
       success = false;
       break;
     } else if (n == 0) {
@@ -337,7 +277,7 @@
       if (current_start_pos == -1) {
         break;
       }
-      if (!ProcessLine(current_line, dex_files)) {
+      if (!ProcessLine(current_line)) {
         success = false;
         break;
       }
@@ -348,25 +288,50 @@
   if (!success) {
     info_.clear();
   }
-  return CloseDescriptorForFile(fd, filename_) && success;
+  return CloseDescriptorForFile(fd, filename) && success;
+}
+
+bool ProfileCompilationInfo::Load(const ProfileCompilationInfo& other) {
+  for (const auto& other_it : other.info_) {
+    const std::string& other_dex_location = other_it.first;
+    const DexFileData& other_dex_data = other_it.second;
+
+    auto info_it = info_.find(other_dex_location);
+    if (info_it == info_.end()) {
+      info_it = info_.Put(other_dex_location, DexFileData(other_dex_data.checksum));
+    }
+    if (info_it->second.checksum != other_dex_data.checksum) {
+      LOG(WARNING) << "Checksum mismatch for dex " << other_dex_location;
+      return false;
+    }
+    info_it->second.method_set.insert(other_dex_data.method_set.begin(),
+                                      other_dex_data.method_set.end());
+  }
+  return true;
 }
 
 bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
-  auto info_it = info_.find(method_ref.dex_file);
+  auto info_it = info_.find(method_ref.dex_file->GetLocation());
   if (info_it != info_.end()) {
-    uint16_t class_idx = method_ref.dex_file->GetMethodId(method_ref.dex_method_index).class_idx_;
-    const ClassToMethodsMap& class_map = info_it->second;
-    auto class_it = class_map.find(class_idx);
-    if (class_it != class_map.end()) {
-      const std::set<uint32_t>& methods = class_it->second;
-      return methods.find(method_ref.dex_method_index) != methods.end();
+    if (method_ref.dex_file->GetLocationChecksum() != info_it->second.checksum) {
+      return false;
     }
-    return false;
+    const std::set<uint16_t>& methods = info_it->second.method_set;
+    return methods.find(method_ref.dex_method_index) != methods.end();
   }
   return false;
 }
 
-std::string ProfileCompilationInfo::DumpInfo(bool print_full_dex_location) const {
+uint32_t ProfileCompilationInfo::GetNumberOfMethods() const {
+  uint32_t total = 0;
+  for (const auto& it : info_) {
+    total += it.second.method_set.size();
+  }
+  return total;
+}
+
+std::string ProfileCompilationInfo::DumpInfo(const std::vector<const DexFile*>* dex_files,
+                                             bool print_full_dex_location) const {
   std::ostringstream os;
   if (info_.empty()) {
     return "ProfileInfo: empty";
@@ -374,17 +339,11 @@
 
   os << "ProfileInfo:";
 
-  // Use an additional map to achieve a predefined order based on the dex locations.
-  SafeMap<const std::string, const DexFile*> dex_locations_map;
-  for (auto info_it : info_) {
-    dex_locations_map.Put(info_it.first->GetLocation(), info_it.first);
-  }
-
   const std::string kFirstDexFileKeySubstitute = ":classes.dex";
-  for (auto dex_file_it : dex_locations_map) {
+  for (const auto& it : info_) {
     os << "\n";
-    const std::string& location = dex_file_it.first;
-    const DexFile* dex_file = dex_file_it.second;
+    const std::string& location = it.first;
+    const DexFileData& dex_data = it.second;
     if (print_full_dex_location) {
       os << location;
     } else {
@@ -392,10 +351,19 @@
       std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
       os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
     }
-    for (auto class_it : info_.find(dex_file)->second) {
-      for (auto method_it : class_it.second) {
-        os << "\n  " << PrettyMethod(method_it, *dex_file, true);
+    for (const auto method_it : dex_data.method_set) {
+      if (dex_files != nullptr) {
+        const DexFile* dex_file = nullptr;
+        for (size_t i = 0; i < dex_files->size(); i++) {
+          if (location == (*dex_files)[i]->GetLocation()) {
+            dex_file = (*dex_files)[i];
+          }
+        }
+        if (dex_file != nullptr) {
+          os << "\n  " << PrettyMethod(method_it, *dex_file, true);
+        }
       }
+      os << "\n  " << method_it;
     }
   }
   return os.str();
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 8c5ffbe..26e1ac3 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
 
 #include <set>
+#include <vector>
 
 #include "atomic.h"
 #include "dex_file.h"
@@ -28,71 +29,50 @@
 
 class ArtMethod;
 
+// TODO: rename file.
 /**
- * Profiling information in a format that can be serialized to disk.
- * It is a serialize-friendly format based on information collected
- * by the interpreter (ProfileInfo).
+ * Profile information in a format suitable to be queried by the compiler and
+ * performing profile guided compilation.
+ * It is a serialize-friendly format based on information collected by the
+ * interpreter (ProfileInfo).
  * Currently it stores only the hot compiled methods.
  */
-class OfflineProfilingInfo {
- public:
-  bool NeedsSaving(uint64_t last_update_time_ns) const;
-  void SaveProfilingInfo(const std::string& filename,
-                         uint64_t last_update_time_ns,
-                         const std::set<ArtMethod*>& methods);
-  void SetTrackedDexLocations(const std::vector<std::string>& dex_locations);
-  const std::set<const std::string>& GetTrackedDexLocations() const;
-
- private:
-  // Map identifying the location of the profiled methods.
-  // dex_file_ -> [dex_method_index]+
-  using DexFileToMethodsMap = SafeMap<const DexFile*, std::set<uint32_t>>;
-
-  void AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
-
-  // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
-  // single thread?)
-  Atomic<uint64_t> last_update_time_ns_;
-
-  std::set<const std::string> tracked_dex_base_locations_;
-};
-
-/**
- * Profile information in a format suitable to be queried by the compiler and performing
- * profile guided compilation.
- */
 class ProfileCompilationInfo {
  public:
-  // Constructs a ProfileCompilationInfo backed by the provided file.
-  explicit ProfileCompilationInfo(const std::string& filename) : filename_(filename) {}
+  static bool SaveProfilingInfo(const std::string& filename,
+                                const std::vector<ArtMethod*>& methods);
 
-  // Loads profile information corresponding to the provided dex files.
-  // The dex files' multidex suffixes must be unique.
-  // This resets the state of the profiling information
-  // (i.e. all previously loaded info are cleared).
-  bool Load(const std::vector<const DexFile*>& dex_files);
+  // Loads profile information from the given file.
+  bool Load(const std::string& profile_filename);
+  // Loads the data from another ProfileCompilationInfo object.
+  bool Load(const ProfileCompilationInfo& info);
+  // Saves the profile data to the given file.
+  bool Save(const std::string& profile_filename);
+  // Returns the number of methods that were profiled.
+  uint32_t GetNumberOfMethods() const;
 
   // Returns true if the method reference is present in the profiling info.
   bool ContainsMethod(const MethodReference& method_ref) const;
 
-  const std::string& GetFilename() const { return filename_; }
-
   // Dumps all the loaded profile info into a string and returns it.
+  // If dex_files is not null then the method indices will be resolved to their
+  // names.
   // This is intended for testing and debugging.
-  std::string DumpInfo(bool print_full_dex_location = true) const;
+  std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
+                       bool print_full_dex_location = true) const;
 
  private:
-  bool ProcessLine(const std::string& line,
-                   const std::vector<const DexFile*>& dex_files);
+  bool AddData(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
+  bool ProcessLine(const std::string& line);
 
-  using ClassToMethodsMap = SafeMap<uint32_t, std::set<uint32_t>>;
-  // Map identifying the location of the profiled methods.
-  // dex_file -> class_index -> [dex_method_index]+
-  using DexFileToProfileInfoMap = SafeMap<const DexFile*, ClassToMethodsMap>;
+  struct DexFileData {
+    explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
+    uint32_t checksum;
+    std::set<uint16_t> method_set;
+  };
 
-  const std::string filename_;
+  using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
+
   DexFileToProfileInfoMap info_;
 };
 
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
new file mode 100644
index 0000000..ec289ea
--- /dev/null
+++ b/runtime/jit/profile_saver.cc
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profile_saver.h"
+
+#include "art_method-inl.h"
+#include "scoped_thread_state_change.h"
+#include "oat_file_manager.h"
+
+namespace art {
+
+// An arbitrary value to throttle save requests. Set to 500ms for now.
+static constexpr const uint64_t kMilisecondsToNano = 1000000;
+static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 500 * kMilisecondsToNano;
+
+// TODO: read the constants from ProfileOptions,
+// Add a random delay each time we go to sleep so that we don't hammer the CPU
+// with all profile savers running at the same time.
+static constexpr const uint64_t kRandomDelayMaxMs = 10 * 1000;  // 10 seconds
+static constexpr const uint64_t kMaxBackoffMs = 4 * 60 * 1000;  // 4 minutes
+static constexpr const uint64_t kSavePeriodMs = 4 * 1000;  // 4 seconds
+static constexpr const double kBackoffCoef = 1.5;
+
+static constexpr const uint32_t kMinimumNrOrMethodsToSave = 10;
+
+ProfileSaver* ProfileSaver::instance_ = nullptr;
+pthread_t ProfileSaver::profiler_pthread_ = 0U;
+
+ProfileSaver::ProfileSaver(const std::string& output_filename,
+                           jit::JitCodeCache* jit_code_cache,
+                           const std::vector<std::string>& code_paths)
+    : output_filename_(output_filename),
+      jit_code_cache_(jit_code_cache),
+      tracked_dex_base_locations_(code_paths.begin(), code_paths.end()),
+      code_cache_last_update_time_ns_(0),
+      shutting_down_(false),
+      wait_lock_("ProfileSaver wait lock"),
+      period_condition_("ProfileSaver period condition", wait_lock_) {
+}
+
+void ProfileSaver::Run() {
+  srand(MicroTime() * getpid());
+  Thread* self = Thread::Current();
+
+  uint64_t save_period_ms = kSavePeriodMs;
+  VLOG(profiler) << "Save profiling information every " << save_period_ms << " ms";
+  while (true) {
+    if (ShuttingDown(self)) {
+      break;
+    }
+
+    uint64_t random_sleep_delay_ms = rand() % kRandomDelayMaxMs;
+    uint64_t sleep_time_ms = save_period_ms + random_sleep_delay_ms;
+    {
+      MutexLock mu(self, wait_lock_);
+      period_condition_.TimedWait(self, sleep_time_ms, 0);
+    }
+
+    if (ShuttingDown(self)) {
+      break;
+    }
+
+    if (!ProcessProfilingInfo() && save_period_ms < kMaxBackoffMs) {
+      // If we don't need to save now it is less likely that we will need to do
+      // so in the future. Increase the time between saves according to the
+      // kBackoffCoef, but make it no larger than kMaxBackoffMs.
+      save_period_ms = static_cast<uint64_t>(kBackoffCoef * save_period_ms);
+    } else {
+      // Reset the period to the initial value as it's highly likely to JIT again.
+      save_period_ms = kSavePeriodMs;
+    }
+  }
+}
+
+bool ProfileSaver::ProcessProfilingInfo() {
+  VLOG(profiler) << "Initiating save profiling information to: " << output_filename_;
+
+  uint64_t last_update_time_ns = jit_code_cache_->GetLastUpdateTimeNs();
+  if (last_update_time_ns - code_cache_last_update_time_ns_
+      > kMinimumTimeBetweenCodeCacheUpdatesNs) {
+    VLOG(profiler) << "Not enough time has passed since the last code cache update.";
+    return false;
+  }
+
+  uint64_t start = NanoTime();
+  code_cache_last_update_time_ns_ = last_update_time_ns;
+  std::vector<ArtMethod*> methods;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    jit_code_cache_->GetCompiledArtMethods(tracked_dex_base_locations_, methods);
+  }
+  if (methods.size() < kMinimumNrOrMethodsToSave) {
+    VLOG(profiler) << "Not enough information to save. Nr of methods: " << methods.size();
+    return false;
+  }
+
+  ProfileCompilationInfo::SaveProfilingInfo(output_filename_, methods);
+  VLOG(profiler) << "Profile process time: " << PrettyDuration(NanoTime() - start);
+  return true;
+}
+
+void* ProfileSaver::RunProfileSaverThread(void* arg) {
+  Runtime* runtime = Runtime::Current();
+  ProfileSaver* profile_saver = reinterpret_cast<ProfileSaver*>(arg);
+
+  CHECK(runtime->AttachCurrentThread("Profile Saver",
+                                     /*as_daemon*/true,
+                                     runtime->GetSystemThreadGroup(),
+                                     /*create_peer*/true));
+  profile_saver->Run();
+
+  runtime->DetachCurrentThread();
+  VLOG(profiler) << "Profile saver shutdown";
+  return nullptr;
+}
+
+void ProfileSaver::Start(const std::string& output_filename,
+                         jit::JitCodeCache* jit_code_cache,
+                         const std::vector<std::string>& code_paths) {
+  DCHECK(Runtime::Current()->UseJit());
+  DCHECK(!output_filename.empty());
+  DCHECK(jit_code_cache != nullptr);
+
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  // Don't start two profile saver threads.
+  if (instance_ != nullptr) {
+    DCHECK(false) << "Tried to start two profile savers";
+    return;
+  }
+
+  VLOG(profiler) << "Starting profile saver using output file: " << output_filename
+      << ". Tracking: " << Join(code_paths, ':');
+
+  instance_ = new ProfileSaver(output_filename, jit_code_cache, code_paths);
+
+  // Create a new thread which does the saving.
+  CHECK_PTHREAD_CALL(
+      pthread_create,
+      (&profiler_pthread_, nullptr, &RunProfileSaverThread, reinterpret_cast<void*>(instance_)),
+      "Profile saver thread");
+}
+
+void ProfileSaver::Stop() {
+  ProfileSaver* profile_saver = nullptr;
+  pthread_t profiler_pthread = 0U;
+
+  {
+    MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
+    VLOG(profiler) << "Stopping profile saver thread for file: " << instance_->output_filename_;
+    profile_saver = instance_;
+    profiler_pthread = profiler_pthread_;
+    if (instance_ == nullptr) {
+      DCHECK(false) << "Tried to stop a profile saver which was not started";
+      return;
+    }
+    if (instance_->shutting_down_) {
+      DCHECK(false) << "Tried to stop the profile saver twice";
+      return;
+    }
+    instance_->shutting_down_ = true;
+  }
+
+  {
+    // Wake up the saver thread if it is sleeping to allow for a clean exit.
+    MutexLock wait_mutex(Thread::Current(), profile_saver->wait_lock_);
+    profile_saver->period_condition_.Signal(Thread::Current());
+  }
+
+  // Wait for the saver thread to stop.
+  CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profile saver thread shutdown");
+
+  {
+    MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
+    instance_ = nullptr;
+    profiler_pthread_ = 0U;
+  }
+  delete profile_saver;
+}
+
+bool ProfileSaver::ShuttingDown(Thread* self) {
+  MutexLock mu(self, *Locks::profiler_lock_);
+  return shutting_down_;
+}
+
+bool ProfileSaver::IsStarted() {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  return instance_ != nullptr;
+}
+
+}   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
new file mode 100644
index 0000000..d60142b
--- /dev/null
+++ b/runtime/jit/profile_saver.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILE_SAVER_H_
+#define ART_RUNTIME_JIT_PROFILE_SAVER_H_
+
+#include "base/mutex.h"
+#include "jit_code_cache.h"
+#include "offline_profiling_info.h"
+
+namespace art {
+
+class ProfileSaver {
+ public:
+  // Starts the profile saver thread.
+  static void Start(const std::string& output_filename,
+                    jit::JitCodeCache* jit_code_cache,
+                    const std::vector<std::string>& code_paths)
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_);
+
+  // Stops the profile saver thread.
+  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
+  static void Stop()
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  // Returns true if the profile saver is started.
+  static bool IsStarted() REQUIRES(!Locks::profiler_lock_);
+
+ private:
+  ProfileSaver(const std::string& output_filename,
+               jit::JitCodeCache* jit_code_cache,
+               const std::vector<std::string>& code_paths);
+
+  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
+  static void* RunProfileSaverThread(void* arg)
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  // The run loop for the saver.
+  void Run() REQUIRES(!Locks::profiler_lock_, !wait_lock_);
+  // Processes the existing profiling info from the jit code cache and returns
+  // true if it needed to be saved to disk.
+  bool ProcessProfilingInfo();
+  // Returns true if the saver is shutting down (ProfileSaver::Stop() has been called).
+  bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
+
+  // The only instance of the saver.
+  static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
+  // Profile saver thread.
+  static pthread_t profiler_pthread_ GUARDED_BY(Locks::profiler_lock_);
+
+  const std::string output_filename_;
+  jit::JitCodeCache* jit_code_cache_;
+  const std::set<const std::string> tracked_dex_base_locations_;
+  uint64_t code_cache_last_update_time_ns_;
+  bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
+
+  // Save period condition support.
+  Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(ProfileSaver);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILE_SAVER_H_
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index aa25f67..1ee1611 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -59,6 +59,7 @@
       local_ref_cookie(IRT_FIRST_SEGMENT),
       locals(kLocalsInitial, kLocalsMax, kLocal, false),
       check_jni(false),
+      runtime_deleted(false),
       critical(0),
       monitors("monitors", kMonitorsInitial, kMonitorsMax) {
   functions = unchecked_functions = GetJniNativeInterface();
@@ -67,6 +68,11 @@
   }
 }
 
+void JNIEnvExt::SetFunctionsToRuntimeShutdownFunctions() {
+  functions = GetRuntimeShutdownNativeInterface();
+  runtime_deleted = true;
+}
+
 JNIEnvExt::~JNIEnvExt() {
 }
 
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 2f8decf..d4accc3 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -74,6 +74,9 @@
   // Frequently-accessed fields cached from JavaVM.
   bool check_jni;
 
+  // If we are a JNI env for a daemon thread with a deleted runtime.
+  bool runtime_deleted;
+
   // How many nested "critical" JNI calls are we in?
   int critical;
 
@@ -95,6 +98,9 @@
   // Check that no monitors are held that have been acquired in this JNI "segment."
   void CheckNoHeldMonitors() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Set the functions to the runtime shutdown functions.
+  void SetFunctionsToRuntimeShutdownFunctions();
+
  private:
   // The constructor should not be called directly. It may leave the object in an erronuous state,
   // and the result needs to be checked.
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index cb67ee3..c893a0f 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2734,6 +2734,246 @@
   return &gJniNativeInterface;
 }
 
+void (*gJniSleepForeverStub[])()  = {
+  nullptr,  // reserved0.
+  nullptr,  // reserved1.
+  nullptr,  // reserved2.
+  nullptr,  // reserved3.
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+  SleepForever,
+};
+
+const JNINativeInterface* GetRuntimeShutdownNativeInterface() {
+  return reinterpret_cast<JNINativeInterface*>(&gJniSleepForeverStub);
+}
+
 void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
                            jint method_count) {
   ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 48b10f5..3429962 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -30,6 +30,7 @@
 namespace art {
 
 const JNINativeInterface* GetJniNativeInterface();
+const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
 // Similar to RegisterNatives except its passed a descriptor for a class name and failures are
 // fatal.
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index b41d16b..c718466 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -2091,8 +2091,7 @@
   MakeExecutable(nullptr, "java.lang.Class");
   MakeExecutable(nullptr, "java.lang.Object");
   MakeExecutable(nullptr, "java.nio.DirectByteBuffer");
-  MakeExecutable(nullptr, "java.nio.MemoryBlock");
-  MakeExecutable(nullptr, "java.nio.MemoryBlock$UnmanagedBlock");
+  MakeExecutable(nullptr, "java.nio.Bits");
   MakeExecutable(nullptr, "java.nio.MappedByteBuffer");
   MakeExecutable(nullptr, "java.nio.ByteBuffer");
   MakeExecutable(nullptr, "java.nio.Buffer");
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index ce879ba..489c269 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -560,8 +560,8 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(size_t pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 65;
-    return ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 0, pointer_size);
+    uint32_t vtable_entries = Object::kVTableLength + 69;
+    return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
   // The size of a java.lang.Class representing a primitive such as int.class.
@@ -1275,6 +1275,8 @@
   // 'Class' Object Fields
   // Order governed by java field ordering. See art::ClassLinker::LinkFields.
 
+  HeapReference<Object> annotation_type_;
+
   // Defining class loader, or null for the "bootstrap" system loader.
   HeapReference<ClassLoader> class_loader_;
 
@@ -1320,6 +1322,9 @@
   // virtual_ methods_ for miranda methods.
   HeapReference<PointerArray> vtable_;
 
+  // Access flags; low 16 bits are defined by VM spec.
+  uint32_t access_flags_;
+
   // Short cuts to dex_cache_ member for fast compiled code access.
   uint64_t dex_cache_strings_;
 
@@ -1352,9 +1357,6 @@
   // Static fields length-prefixed array.
   uint64_t sfields_;
 
-  // Access flags; low 16 bits are defined by VM spec.
-  uint32_t access_flags_;
-
   // Class flags to help speed up visiting object references.
   uint32_t class_flags_;
 
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index 01e99b9..bd4a9c1 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -23,7 +23,7 @@
 namespace mirror {
 
 inline uint32_t Reference::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 5;
+  uint32_t vtable_entries = Object::kVTableLength + 4;
   return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0, 0, 0, pointer_size);
 }
 
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 28a830d..cdf468c 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -33,8 +33,8 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 52;
-  return Class::ComputeClassSize(true, vtable_entries, 0, 1, 0, 1, 2, pointer_size);
+  uint32_t vtable_entries = Object::kVTableLength + 53;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 2, 0, 1, 2, pointer_size);
 }
 
 // Sets string count in the allocation code path to ensure it is guarded by a CAS.
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index e215994..f068b3e 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -56,9 +56,9 @@
 void Throwable::SetStackState(Object* state) SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(state != nullptr);
   if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+    SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_), state);
   } else {
-    SetFieldObjectVolatile<false>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+    SetFieldObjectVolatile<false>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_), state);
   }
 }
 
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 0f488dc..6aacc8d 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -60,16 +60,16 @@
 
  private:
   Object* GetStackState() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_));
+    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
   }
   Object* GetStackTrace() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_trace_));
+    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<Object> backtrace_;  // Note this is Java volatile:
   HeapReference<Throwable> cause_;
   HeapReference<String> detail_message_;
-  HeapReference<Object> stack_state_;  // Note this is Java volatile:
   HeapReference<Object> stack_trace_;
   HeapReference<Object> suppressed_exceptions_;
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 424cc11..da4a891 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -224,7 +224,6 @@
 static void VMRuntime_updateProcessState(JNIEnv*, jobject, jint process_state) {
   Runtime* runtime = Runtime::Current();
   runtime->GetHeap()->UpdateProcessState(static_cast<gc::ProcessState>(process_state));
-  runtime->UpdateProfilerState(process_state);
 }
 
 static void VMRuntime_trimHeap(JNIEnv* env, jobject) {
@@ -566,8 +565,8 @@
  */
 static void VMRuntime_registerAppInfo(JNIEnv* env,
                                       jclass clazz ATTRIBUTE_UNUSED,
-                                      jstring pkg_name,
-                                      jstring app_dir,
+                                      jstring profile_file,
+                                      jstring app_dir ATTRIBUTE_UNUSED,  // TODO: remove argument
                                       jobjectArray code_paths) {
   std::vector<std::string> code_paths_vec;
   int code_paths_length = env->GetArrayLength(code_paths);
@@ -578,13 +577,11 @@
     env->ReleaseStringUTFChars(code_path, raw_code_path);
   }
 
-  const char* raw_app_dir = env->GetStringUTFChars(app_dir, nullptr);
-  const char* raw_pkg_name = env->GetStringUTFChars(pkg_name, nullptr);
-  std::string profile_file = StringPrintf("%s/code_cache/%s.prof", raw_app_dir, raw_pkg_name);
-  env->ReleaseStringUTFChars(pkg_name, raw_pkg_name);
-  env->ReleaseStringUTFChars(app_dir, raw_app_dir);
+  const char* raw_profile_file = env->GetStringUTFChars(profile_file, nullptr);
+  std::string profile_file_str(raw_profile_file);
+  env->ReleaseStringUTFChars(profile_file, raw_profile_file);
 
-  Runtime::Current()->RegisterAppInfo(code_paths_vec, profile_file);
+  Runtime::Current()->RegisterAppInfo(code_paths_vec, profile_file_str);
 }
 
 static jboolean VMRuntime_isBootClassPathOnDisk(JNIEnv* env, jclass, jstring java_instruction_set) {
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index ae1a4d7..67d825e 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -129,7 +129,11 @@
   return reinterpret_cast<jlong>(ThreadForEnv(env));
 }
 
-static void ZygoteHooks_nativePostForkChild(JNIEnv* env, jclass, jlong token, jint debug_flags,
+static void ZygoteHooks_nativePostForkChild(JNIEnv* env,
+                                            jclass,
+                                            jlong token,
+                                            jint debug_flags,
+                                            jboolean is_system_server,
                                             jstring instruction_set) {
   Thread* thread = reinterpret_cast<Thread*>(token);
   // Our system thread ID, etc, has changed so reset Thread state.
@@ -174,22 +178,24 @@
     }
   }
 
-  if (instruction_set != nullptr) {
+  if (instruction_set != nullptr && !is_system_server) {
     ScopedUtfChars isa_string(env, instruction_set);
     InstructionSet isa = GetInstructionSetFromString(isa_string.c_str());
     Runtime::NativeBridgeAction action = Runtime::NativeBridgeAction::kUnload;
     if (isa != kNone && isa != kRuntimeISA) {
       action = Runtime::NativeBridgeAction::kInitialize;
     }
-    Runtime::Current()->InitNonZygoteOrPostFork(env, action, isa_string.c_str());
+    Runtime::Current()->InitNonZygoteOrPostFork(
+        env, is_system_server, action, isa_string.c_str());
   } else {
-    Runtime::Current()->InitNonZygoteOrPostFork(env, Runtime::NativeBridgeAction::kUnload, nullptr);
+    Runtime::Current()->InitNonZygoteOrPostFork(
+        env, is_system_server, Runtime::NativeBridgeAction::kUnload, nullptr);
   }
 }
 
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(ZygoteHooks, nativePreFork, "()J"),
-  NATIVE_METHOD(ZygoteHooks, nativePostForkChild, "(JILjava/lang/String;)V"),
+  NATIVE_METHOD(ZygoteHooks, nativePostForkChild, "(JIZLjava/lang/String;)V"),
 };
 
 void register_dalvik_system_ZygoteHooks(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 1977481..e89c74d 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -288,13 +288,6 @@
       GetPublicFieldRecursive(soa.Self(), DecodeClass(soa, javaThis), name_string));
 }
 
-static jobject Class_getDeclaredFieldInternal(JNIEnv* env, jobject javaThis, jstring name) {
-  ScopedFastNativeObjectAccess soa(env);
-  auto* name_string = soa.Decode<mirror::String*>(name);
-  return soa.AddLocalReference<jobject>(
-      GetDeclaredField(soa.Self(), DecodeClass(soa, javaThis), name_string));
-}
-
 static jobject Class_getDeclaredField(JNIEnv* env, jobject javaThis, jstring name) {
   ScopedFastNativeObjectAccess soa(env);
   auto* name_string = soa.Decode<mirror::String*>(name);
@@ -306,6 +299,12 @@
   mirror::Field* result = GetDeclaredField(soa.Self(), klass, name_string);
   if (result == nullptr) {
     std::string name_str = name_string->ToModifiedUtf8();
+    if (name_str == "value" && klass->IsStringClass()) {
+      // We log the error for this specific case, as the user might just swallow the exception.
+      // This helps diagnose crashes when applications rely on the String#value field being
+      // there.
+      LOG(ERROR) << "The String#value field is not present on Android versions >= 6.0";
+    }
     // We may have a pending exception if we failed to resolve.
     if (!soa.Self()->IsExceptionPending()) {
       ThrowNoSuchFieldException(DecodeClass(soa, javaThis), name_str.c_str());
@@ -723,7 +722,6 @@
   NATIVE_METHOD(Class, getDeclaredConstructorsInternal, "!(Z)[Ljava/lang/reflect/Constructor;"),
   NATIVE_METHOD(Class, getDeclaredField, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getPublicFieldRecursive, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
-  NATIVE_METHOD(Class, getDeclaredFieldInternal, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredFieldsUnchecked, "!(Z)[Ljava/lang/reflect/Field;"),
   NATIVE_METHOD(Class, getDeclaredMethodInternal,
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index f42a17d..c177f19 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -80,7 +80,7 @@
   // Starting with N nativeLoad uses classloader local
   // linker namespace instead of global LD_LIBRARY_PATH
   // (23 is Marshmallow)
-  if (target_sdk_version <= INT_MAX) {
+  if (target_sdk_version == 0) {
     SetLdLibraryPath(env, javaLibrarySearchPath);
   }
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 45b9484..ddcaade 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -86,7 +86,7 @@
  * with an interface, array, or primitive class. If this is coming from
  * native, it is OK to avoid access checks since JNI does not enforce them.
  */
-static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
+static jobject Constructor_newInstance0(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Constructor* m = soa.Decode<mirror::Constructor*>(javaMethod);
   StackHandleScope<1> hs(soa.Self());
@@ -99,7 +99,9 @@
   }
   // Verify that we can access the class.
   if (!m->IsAccessible() && !c->IsPublic()) {
-    auto* caller = GetCallingClass(soa.Self(), 1);
+    // Go 2 frames back, this method is always called from newInstance0, which is called from
+    // Constructor.newInstance(Object... args).
+    auto* caller = GetCallingClass(soa.Self(), 2);
     // If caller is null, then we called from JNI, just avoid the check since JNI avoids most
     // access checks anyways. TODO: Investigate if this the correct behavior.
     if (caller != nullptr && !caller->CanAccess(c.Get())) {
@@ -127,7 +129,7 @@
 
   // String constructor is replaced by a StringFactory method in InvokeMethod.
   if (c->IsStringClass()) {
-    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 1);
+    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 2);
   }
 
   mirror::Object* receiver =
@@ -136,11 +138,18 @@
     return nullptr;
   }
   jobject javaReceiver = soa.AddLocalReference<jobject>(receiver);
-  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 1);
+  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 2);
   // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
   return javaReceiver;
 }
 
+static jobject Constructor_newInstanceFromSerialization(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED,
+                                                        jclass ctorClass, jclass allocClass) {
+    jmethodID ctor = env->GetMethodID(ctorClass, "<init>", "()V");
+    DCHECK(ctor != NULL);
+    return env->NewObject(allocClass, ctor);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Constructor, getAnnotationNative,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
@@ -149,7 +158,8 @@
   NATIVE_METHOD(Constructor, getParameterAnnotationsNative,
                 "!()[[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Constructor, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
-  NATIVE_METHOD(Constructor, newInstance, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Constructor, newInstance0, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Constructor, newInstanceFromSerialization, "!(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Constructor(JNIEnv* env) {
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 83125ce..8a2c7e4 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -15,7 +15,7 @@
  */
 
 #include "sun_misc_Unsafe.h"
-
+#include "common_throws.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/array.h"
@@ -23,6 +23,10 @@
 #include "mirror/object-inl.h"
 #include "scoped_fast_native_object_access.h"
 
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
 namespace art {
 
 static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
@@ -196,6 +200,279 @@
   return Primitive::ComponentSize(primitive_type);
 }
 
+static jint Unsafe_addressSize(JNIEnv* env ATTRIBUTE_UNUSED, jobject ob ATTRIBUTE_UNUSED) {
+  return sizeof(void*);
+}
+
+static jint Unsafe_pageSize(JNIEnv* env ATTRIBUTE_UNUSED, jobject ob ATTRIBUTE_UNUSED) {
+  return sysconf(_SC_PAGESIZE);
+}
+
+static jlong Unsafe_allocateMemory(JNIEnv* env, jobject, jlong bytes) {
+  ScopedFastNativeObjectAccess soa(env);
+  // bytes is nonnegative and fits into size_t
+  if (bytes < 0 || bytes != (jlong)(size_t) bytes) {
+    ThrowIllegalAccessException("wrong number of bytes");
+    return 0;
+  }
+  void* mem = malloc(bytes);
+  if (mem == nullptr) {
+    soa.Self()->ThrowOutOfMemoryError("native alloc");
+    return 0;
+  }
+  return (uintptr_t) mem;
+}
+
+static void Unsafe_freeMemory(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  free(reinterpret_cast<void*>(static_cast<uintptr_t>(address)));
+}
+
+static void Unsafe_setMemory(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jlong bytes, jbyte value) {
+  memset(reinterpret_cast<void*>(static_cast<uintptr_t>(address)), value, bytes);
+}
+
+static jbyte Unsafe_getByte$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jbyte*>(address);
+}
+
+static void Unsafe_putByte$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jbyte value) {
+  *reinterpret_cast<jbyte*>(address) = value;
+}
+
+static jshort Unsafe_getShort$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jshort*>(address);
+}
+
+static void Unsafe_putShort$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jshort value) {
+  *reinterpret_cast<jshort*>(address) = value;
+}
+
+static jchar Unsafe_getChar$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jchar*>(address);
+}
+
+static void Unsafe_putChar$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jchar value) {
+  *reinterpret_cast<jchar*>(address) = value;
+}
+
+static jint Unsafe_getInt$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jint*>(address);
+}
+
+static void Unsafe_putInt$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jint value) {
+  *reinterpret_cast<jint*>(address) = value;
+}
+
+static jlong Unsafe_getLong$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jlong*>(address);
+}
+
+static void Unsafe_putLong$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jlong value) {
+  *reinterpret_cast<jlong*>(address) = value;
+}
+
+static jfloat Unsafe_getFloat$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jfloat*>(address);
+}
+
+static void Unsafe_putFloat$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jfloat value) {
+  *reinterpret_cast<jfloat*>(address) = value;
+}
+static jdouble Unsafe_getDouble$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jdouble*>(address);
+}
+
+static void Unsafe_putDouble$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jdouble value) {
+  *reinterpret_cast<jdouble*>(address) = value;
+}
+
+static void Unsafe_copyMemory(JNIEnv *env, jobject unsafe ATTRIBUTE_UNUSED, jlong src,
+                              jlong dst, jlong size) {
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ScopedFastNativeObjectAccess soa(env);
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    memcpy(reinterpret_cast<void *>(dst), reinterpret_cast<void *>(src), sz);
+}
+
+template<typename T>
+static void copyToArray(jlong srcAddr, mirror::PrimitiveArray<T>* array,
+                        size_t array_offset,
+                        size_t size)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+    const T* src = reinterpret_cast<T*>(srcAddr);
+    size_t sz = size / sizeof(T);
+    size_t of = array_offset / sizeof(T);
+    for (size_t i = 0; i < sz; ++i) {
+        array->Set(i + of, *(src + i));
+    }
+}
+
+template<typename T>
+static void copyFromArray(jlong dstAddr, mirror::PrimitiveArray<T>* array,
+                          size_t array_offset,
+                          size_t size)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+    T* dst = reinterpret_cast<T*>(dstAddr);
+    size_t sz = size / sizeof(T);
+    size_t of = array_offset / sizeof(T);
+    for (size_t i = 0; i < sz; ++i) {
+        *(dst + i) = array->Get(i + of);
+    }
+}
+
+static void Unsafe_copyMemoryToPrimitiveArray(JNIEnv *env,
+                                              jobject unsafe ATTRIBUTE_UNUSED,
+                                              jlong srcAddr,
+                                              jobject dstObj,
+                                              jlong dstOffset,
+                                              jlong size) {
+    ScopedObjectAccess soa(env);
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    size_t dst_offset = (size_t)dstOffset;
+    mirror::Object* dst = soa.Decode<mirror::Object*>(dstObj);
+    mirror::Class* component_type = dst->GetClass()->GetComponentType();
+    if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
+        copyToArray(srcAddr, dst->AsByteSizedArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
+        copyToArray(srcAddr, dst->AsShortSizedArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
+        copyToArray(srcAddr, dst->AsIntArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
+        copyToArray(srcAddr, dst->AsLongArray(), dst_offset, sz);
+    } else {
+        ThrowIllegalAccessException("not a primitive array");
+    }
+}
+
+static void Unsafe_copyMemoryFromPrimitiveArray(JNIEnv *env,
+                                                jobject unsafe ATTRIBUTE_UNUSED,
+                                                jobject srcObj,
+                                                jlong srcOffset,
+                                                jlong dstAddr,
+                                                jlong size) {
+    ScopedObjectAccess soa(env);
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    size_t src_offset = (size_t)srcOffset;
+    mirror::Object* src = soa.Decode<mirror::Object*>(srcObj);
+    mirror::Class* component_type = src->GetClass()->GetComponentType();
+    if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
+        copyFromArray(dstAddr, src->AsByteSizedArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
+        copyFromArray(dstAddr, src->AsShortSizedArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
+        copyFromArray(dstAddr, src->AsIntArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
+        copyFromArray(dstAddr, src->AsLongArray(), src_offset, sz);
+    } else {
+        ThrowIllegalAccessException("not a primitive array");
+    }
+}
+static jboolean Unsafe_getBoolean(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldBoolean(MemberOffset(offset));
+}
+
+static void Unsafe_putBoolean(JNIEnv* env, jobject, jobject javaObj, jlong offset, jboolean newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode (SetField8 is non-transactional).
+    obj->SetFieldBoolean<false>(MemberOffset(offset), newValue);
+}
+
+static jbyte Unsafe_getByte(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldByte(MemberOffset(offset));
+}
+
+static void Unsafe_putByte(JNIEnv* env, jobject, jobject javaObj, jlong offset, jbyte newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldByte<false>(MemberOffset(offset), newValue);
+}
+
+static jchar Unsafe_getChar(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldChar(MemberOffset(offset));
+}
+
+static void Unsafe_putChar(JNIEnv* env, jobject, jobject javaObj, jlong offset, jchar newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldChar<false>(MemberOffset(offset), newValue);
+}
+
+static jshort Unsafe_getShort(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldShort(MemberOffset(offset));
+}
+
+static void Unsafe_putShort(JNIEnv* env, jobject, jobject javaObj, jlong offset, jshort newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldShort<false>(MemberOffset(offset), newValue);
+}
+
+static jfloat Unsafe_getFloat(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int32_t val; jfloat converted;} conv;
+  conv.val = obj->GetField32(MemberOffset(offset));
+  return conv.converted;
+}
+
+static void Unsafe_putFloat(JNIEnv* env, jobject, jobject javaObj, jlong offset, jfloat newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int32_t converted; jfloat val;} conv;
+  conv.val = newValue;
+  // JNI must use non transactional mode.
+  obj->SetField32<false>(MemberOffset(offset), conv.converted);
+}
+
+static jdouble Unsafe_getDouble(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int64_t val; jdouble converted;} conv;
+  conv.val = obj->GetField64(MemberOffset(offset));
+  return conv.converted;
+}
+
+static void Unsafe_putDouble(JNIEnv* env, jobject, jobject javaObj, jlong offset, jdouble newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int64_t converted; jdouble val;} conv;
+  conv.val = newValue;
+  // JNI must use non transactional mode.
+  obj->SetField64<false>(MemberOffset(offset), conv.converted);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Unsafe, compareAndSwapInt, "!(Ljava/lang/Object;JII)Z"),
   NATIVE_METHOD(Unsafe, compareAndSwapLong, "!(Ljava/lang/Object;JJJ)Z"),
@@ -217,6 +494,40 @@
   NATIVE_METHOD(Unsafe, putOrderedObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
   NATIVE_METHOD(Unsafe, getArrayBaseOffsetForComponentType, "!(Ljava/lang/Class;)I"),
   NATIVE_METHOD(Unsafe, getArrayIndexScaleForComponentType, "!(Ljava/lang/Class;)I"),
+  NATIVE_METHOD(Unsafe, addressSize, "!()I"),
+  NATIVE_METHOD(Unsafe, pageSize, "!()I"),
+  NATIVE_METHOD(Unsafe, allocateMemory, "!(J)J"),
+  NATIVE_METHOD(Unsafe, freeMemory, "!(J)V"),
+  NATIVE_METHOD(Unsafe, setMemory, "!(JJB)V"),
+  NATIVE_METHOD(Unsafe, getByte$, "!(J)B"),
+  NATIVE_METHOD(Unsafe, putByte$, "!(JB)V"),
+  NATIVE_METHOD(Unsafe, getShort$, "!(J)S"),
+  NATIVE_METHOD(Unsafe, putShort$, "!(JS)V"),
+  NATIVE_METHOD(Unsafe, getChar$, "!(J)C"),
+  NATIVE_METHOD(Unsafe, putChar$, "!(JC)V"),
+  NATIVE_METHOD(Unsafe, getInt$, "!(J)I"),
+  NATIVE_METHOD(Unsafe, putInt$, "!(JI)V"),
+  NATIVE_METHOD(Unsafe, getLong$, "!(J)J"),
+  NATIVE_METHOD(Unsafe, putLong$, "!(JJ)V"),
+  NATIVE_METHOD(Unsafe, getFloat$, "!(J)F"),
+  NATIVE_METHOD(Unsafe, putFloat$, "!(JF)V"),
+  NATIVE_METHOD(Unsafe, getDouble$, "!(J)D"),
+  NATIVE_METHOD(Unsafe, putDouble$, "!(JD)V"),
+  NATIVE_METHOD(Unsafe, copyMemory, "!(JJJ)V"),
+  NATIVE_METHOD(Unsafe, copyMemoryToPrimitiveArray, "!(JLjava/lang/Object;JJ)V"),
+  NATIVE_METHOD(Unsafe, copyMemoryFromPrimitiveArray, "!(Ljava/lang/Object;JJJ)V"),
+  NATIVE_METHOD(Unsafe, getBoolean, "!(Ljava/lang/Object;J)Z"),
+  NATIVE_METHOD(Unsafe, getByte, "!(Ljava/lang/Object;J)B"),
+  NATIVE_METHOD(Unsafe, getChar, "!(Ljava/lang/Object;J)C"),
+  NATIVE_METHOD(Unsafe, getShort, "!(Ljava/lang/Object;J)S"),
+  NATIVE_METHOD(Unsafe, getFloat, "!(Ljava/lang/Object;J)F"),
+  NATIVE_METHOD(Unsafe, getDouble, "!(Ljava/lang/Object;J)D"),
+  NATIVE_METHOD(Unsafe, putBoolean, "!(Ljava/lang/Object;JZ)V"),
+  NATIVE_METHOD(Unsafe, putByte, "!(Ljava/lang/Object;JB)V"),
+  NATIVE_METHOD(Unsafe, putChar, "!(Ljava/lang/Object;JC)V"),
+  NATIVE_METHOD(Unsafe, putShort, "!(Ljava/lang/Object;JS)V"),
+  NATIVE_METHOD(Unsafe, putFloat, "!(Ljava/lang/Object;JF)V"),
+  NATIVE_METHOD(Unsafe, putDouble, "!(Ljava/lang/Object;JD)V"),
 };
 
 void register_sun_misc_Unsafe(JNIEnv* env) {
diff --git a/runtime/oat.h b/runtime/oat.h
index 5ed1977..13fd6a4 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -39,6 +39,7 @@
   static constexpr const char* kPicKey = "pic";
   static constexpr const char* kDebuggableKey = "debuggable";
   static constexpr const char* kClassPathKey = "classpath";
+  static constexpr const char* kBootClassPath = "bootclasspath";
 
   static constexpr const char kTrueValue[] = "true";
   static constexpr const char kFalseValue[] = "false";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 0f3a013..d6b0868 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -846,11 +846,12 @@
 
 std::string OatFileAssistant::ImageLocation() {
   Runtime* runtime = Runtime::Current();
-  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetBootImageSpace();
-  if (image_space == nullptr) {
+  const std::vector<gc::space::ImageSpace*>& image_spaces =
+      runtime->GetHeap()->GetBootImageSpaces();
+  if (image_spaces.empty()) {
     return "";
   }
-  return image_space->GetImageLocation();
+  return image_spaces[0]->GetImageLocation();
 }
 
 const uint32_t* OatFileAssistant::GetRequiredDexChecksum() {
@@ -949,12 +950,12 @@
     image_info_load_attempted_ = true;
 
     Runtime* runtime = Runtime::Current();
-    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetBootImageSpace();
-    if (image_space != nullptr) {
-      cached_image_info_.location = image_space->GetImageLocation();
+    std::vector<gc::space::ImageSpace*> image_spaces = runtime->GetHeap()->GetBootImageSpaces();
+    if (!image_spaces.empty()) {
+      cached_image_info_.location = image_spaces[0]->GetImageLocation();
 
       if (isa_ == kRuntimeISA) {
-        const ImageHeader& image_header = image_space->GetImageHeader();
+        const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
         cached_image_info_.oat_checksum = image_header.GetOatChecksum();
         cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
             image_header.GetOatDataBegin());
@@ -969,7 +970,7 @@
         cached_image_info_.patch_delta = image_header->GetPatchDelta();
       }
     }
-    image_info_load_succeeded_ = (image_space != nullptr);
+    image_info_load_succeeded_ = (!image_spaces.empty());
   }
   return image_info_load_succeeded_ ? &cached_image_info_ : nullptr;
 }
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 8c7efb2..f994f0c 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -223,9 +223,10 @@
         false, dex_location.c_str(), &error_msg));
     ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
 
-    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetBootImageSpace();
-    ASSERT_TRUE(image_space != nullptr);
-    const ImageHeader& image_header = image_space->GetImageHeader();
+    const std::vector<gc::space::ImageSpace*> image_spaces =
+        runtime->GetHeap()->GetBootImageSpaces();
+    ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
+    const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
     const OatHeader& oat_header = odex_file->GetOatHeader();
     EXPECT_FALSE(odex_file->IsPic());
     EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
@@ -1025,7 +1026,7 @@
 
   // We use the lib core dex file, because it's large, and hopefully should
   // take a while to generate.
-  Copy(GetLibCoreDexFileName(), dex_location);
+  Copy(GetLibCoreDexFileNames()[0], dex_location);
 
   const int kNumThreads = 32;
   Thread* self = Thread::Current();
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index ea6d3ff..36a967f 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -78,17 +78,23 @@
   return nullptr;
 }
 
-const OatFile* OatFileManager::GetBootOatFile() const {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-  return (image_space == nullptr) ? nullptr : image_space->GetOatFile();
+std::vector<const OatFile*> OatFileManager::GetBootOatFiles() const {
+  std::vector<const OatFile*> oat_files;
+  std::vector<gc::space::ImageSpace*> image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  for (gc::space::ImageSpace* image_space : image_spaces) {
+    oat_files.push_back(image_space->GetOatFile());
+  }
+  return oat_files;
 }
 
 const OatFile* OatFileManager::GetPrimaryOatFile() const {
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
-  const OatFile* boot_oat_file = GetBootOatFile();
-  if (boot_oat_file != nullptr) {
+  std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
+  if (!boot_oat_files.empty()) {
     for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
-      if (oat_file.get() != boot_oat_file) {
+      if (std::find(boot_oat_files.begin(), boot_oat_files.end(), oat_file.get()) ==
+          boot_oat_files.end()) {
         return oat_file.get();
       }
     }
@@ -102,8 +108,13 @@
   oat_files_.clear();
 }
 
-const OatFile* OatFileManager::RegisterImageOatFile(gc::space::ImageSpace* space) {
-  return RegisterOatFile(space->ReleaseOatFile());
+std::vector<const OatFile*> OatFileManager::RegisterImageOatFiles(
+    std::vector<gc::space::ImageSpace*> spaces) {
+  std::vector<const OatFile*> oat_files;
+  for (gc::space::ImageSpace* space : spaces) {
+    oat_files.push_back(RegisterOatFile(space->ReleaseOatFile()));
+  }
+  return oat_files;
 }
 
 class DexFileAndClassPair : ValueObject {
@@ -213,7 +224,7 @@
   std::priority_queue<DexFileAndClassPair> queue;
 
   // Add dex files from already loaded oat files, but skip boot.
-  const OatFile* boot_oat = GetBootOatFile();
+  std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
   // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
   // need to check both against each other since they would have resolved the same way at compile
   // time.
@@ -221,8 +232,8 @@
   for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
     DCHECK_NE(loaded_oat_file.get(), oat_file);
     const std::string& location = loaded_oat_file->GetLocation();
-    if (loaded_oat_file.get() != boot_oat &&
-        location != oat_file->GetLocation() &&
+    if (std::find(boot_oat_files.begin(), boot_oat_files.end(), loaded_oat_file.get()) ==
+        boot_oat_files.end() && location != oat_file->GetLocation() &&
         unique_locations.find(location) == unique_locations.end()) {
       unique_locations.insert(location);
       AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index af7efb4..4690e45 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -73,15 +73,15 @@
     return have_non_pic_oat_file_;
   }
 
-  // Returns the boot image oat file.
-  const OatFile* GetBootOatFile() const;
+  // Returns the boot image oat files.
+  std::vector<const OatFile*> GetBootOatFiles() const;
 
   // Returns the first non-image oat file in the class path.
   const OatFile* GetPrimaryOatFile() const REQUIRES(!Locks::oat_file_manager_lock_);
 
-  // Return the oat file for an image, registers the oat file. Takes ownership of the imagespace's
-  // underlying oat file.
-  const OatFile* RegisterImageOatFile(gc::space::ImageSpace* space)
+  // Returns the oat files for the images, registers the oat files.
+  // Takes ownership of the imagespace's underlying oat files.
+  std::vector<const OatFile*> RegisterImageOatFiles(std::vector<gc::space::ImageSpace*> spaces)
       REQUIRES(!Locks::oat_file_manager_lock_);
 
   // Finds or creates the oat file holding dex_location. Then loads and returns
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 03cad08..5643739 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -44,7 +44,8 @@
     uintptr_t code = reinterpret_cast<uintptr_t>(code_ptr);
     uintptr_t header = code - OFFSETOF_MEMBER(OatQuickMethodHeader, code_);
     DCHECK(IsAlignedParam(code, GetInstructionSetAlignment(kRuntimeISA)) ||
-           IsAlignedParam(header, GetInstructionSetAlignment(kRuntimeISA)));
+           IsAlignedParam(header, GetInstructionSetAlignment(kRuntimeISA)))
+        << std::hex << code << " " << std::hex << header;
     return reinterpret_cast<OatQuickMethodHeader*>(header);
   }
 
diff --git a/runtime/openjdkjvm/NOTICE b/runtime/openjdkjvm/NOTICE
new file mode 100644
index 0000000..700a206
--- /dev/null
+++ b/runtime/openjdkjvm/NOTICE
@@ -0,0 +1,29 @@
+Copyright (C) 2014 The Android Open Source Project
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This file implements interfaces from the file jvm.h. This implementation
+is licensed under the same terms as the file jvm.h.  The
+copyright and license information for the file jvm.h follows.
+
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License version 2 only, as
+published by the Free Software Foundation.  Oracle designates this
+particular file as subject to the "Classpath" exception as provided
+by Oracle in the LICENSE file that accompanied this code.
+
+This code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+version 2 for more details (a copy is included in the LICENSE file that
+accompanied this code).
+
+You should have received a copy of the GNU General Public License version
+2 along with this work; if not, write to the Free Software Foundation,
+Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+or visit www.oracle.com if you need additional information or have any
+questions.
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
new file mode 100644
index 0000000..ab0d934
--- /dev/null
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -0,0 +1,540 @@
+/* Copyright (C) 2014 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvm.h. This implementation
+ * is licensed under the same terms as the file jvm.h.  The
+ * copyright and license information for the file jvm.h follows.
+ *
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * Services that OpenJDK expects the VM to provide.
+ */
+#include<stdio.h>
+#include <dlfcn.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include "common_throws.h"
+#include "gc/heap.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "runtime.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
+#include "mirror/class_loader.h"
+#include "verify_object-inl.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
+#include "jni_internal.h"
+#include "mirror/string-inl.h"
+#include "native/scoped_fast_native_object_access.h"
+#include "ScopedLocalRef.h"
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#ifdef __ANDROID__
+// This function is provided by android linker.
+extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
+#endif  // __ANDROID__
+
+#undef LOG_TAG
+#define LOG_TAG "artopenjdk"
+
+using art::DEBUG;
+using art::WARNING;
+using art::VERBOSE;
+using art::INFO;
+using art::ERROR;
+using art::FATAL;
+
+/* posix open() with extensions; used by e.g. ZipFile */
+JNIEXPORT jint JVM_Open(const char* fname, jint flags, jint mode) {
+    LOG(DEBUG) << "JVM_Open fname='" << fname << "', flags=" << flags << ", mode=" << mode;
+
+    /*
+     * The call is expected to handle JVM_O_DELETE, which causes the file
+     * to be removed after it is opened.  Also, some code seems to
+     * want the special return value JVM_EEXIST if the file open fails
+     * due to O_EXCL.
+     */
+    int fd = TEMP_FAILURE_RETRY(open(fname, flags & ~JVM_O_DELETE, mode));
+    if (fd < 0) {
+        int err = errno;
+        LOG(DEBUG) << "open(" << fname << ") failed: " << strerror(errno);
+        if (err == EEXIST) {
+            return JVM_EEXIST;
+        } else {
+            return -1;
+        }
+    }
+
+    if (flags & JVM_O_DELETE) {
+        LOG(DEBUG) << "Deleting '" << fname << "' after open\n";
+        if (unlink(fname) != 0) {
+            LOG(WARNING) << "Post-open deletion of '" << fname << "' failed: " << strerror(errno);
+        }
+        /* ignore */
+    }
+
+    LOG(VERBOSE) << "open(" << fname << ") --> " << fd;
+    return fd;
+}
+
+/* posix close() */
+JNIEXPORT jint JVM_Close(jint fd) {
+    LOG(DEBUG) << "JVM_Close fd=" << fd;
+    // don't want TEMP_FAILURE_RETRY here -- file is closed even if EINTR
+    return close(fd);
+}
+
+/* posix read() */
+JNIEXPORT jint JVM_Read(jint fd, char* buf, jint nbytes) {
+    LOG(DEBUG) << "JVM_Read fd=" << fd << ", buf='" << buf << "', nbytes=" << nbytes;
+    return TEMP_FAILURE_RETRY(read(fd, buf, nbytes));
+}
+
+/* posix write(); is used to write messages to stderr */
+JNIEXPORT jint JVM_Write(jint fd, char* buf, jint nbytes) {
+    LOG(DEBUG) << "JVM_Write fd=" << fd << ", buf='" << buf << "', nbytes=" << nbytes;
+    return TEMP_FAILURE_RETRY(write(fd, buf, nbytes));
+}
+
+/* posix lseek() */
+JNIEXPORT jlong JVM_Lseek(jint fd, jlong offset, jint whence) {
+    LOG(DEBUG) << "JVM_Lseek fd=" << fd << ", offset=" << offset << ", whence=" << whence;
+    return TEMP_FAILURE_RETRY(lseek(fd, offset, whence));
+}
+
+/*
+ * "raw monitors" seem to be expected to behave like non-recursive pthread
+ * mutexes.  They're used by ZipFile.
+ */
+JNIEXPORT void* JVM_RawMonitorCreate(void) {
+    LOG(DEBUG) << "JVM_RawMonitorCreate";
+    pthread_mutex_t* newMutex =
+        reinterpret_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+    pthread_mutex_init(newMutex, NULL);
+    return newMutex;
+}
+
+JNIEXPORT void JVM_RawMonitorDestroy(void* mon) {
+    LOG(DEBUG) << "JVM_RawMonitorDestroy mon=" << mon;
+    pthread_mutex_destroy(reinterpret_cast<pthread_mutex_t*>(mon));
+}
+
+JNIEXPORT jint JVM_RawMonitorEnter(void* mon) {
+    LOG(DEBUG) << "JVM_RawMonitorEnter mon=" << mon;
+    return pthread_mutex_lock(reinterpret_cast<pthread_mutex_t*>(mon));
+}
+
+JNIEXPORT void JVM_RawMonitorExit(void* mon) {
+    LOG(DEBUG) << "JVM_RawMonitorExit mon=" << mon;
+    pthread_mutex_unlock(reinterpret_cast<pthread_mutex_t*>(mon));
+}
+
+JNIEXPORT char* JVM_NativePath(char* path) {
+    LOG(DEBUG) << "JVM_NativePath path='" << path << "'";
+    return path;
+}
+
+JNIEXPORT jint JVM_GetLastErrorString(char* buf, int len) {
+#if defined(__GLIBC__) || defined(__BIONIC__)
+  int err = errno;    // grab before JVM_TRACE can trash it
+  LOG(DEBUG) << "JVM_GetLastErrorString buf=" << buf << ", len=" << len;
+
+  if (len == 0) {
+    return 0;
+  }
+
+  char* result = strerror_r(err, buf, len);
+  if (result != buf) {
+    strncpy(buf, result, len);
+    buf[len - 1] = '\0';
+  }
+
+  return strlen(buf);
+#else
+  UNUSED(buf);
+  UNUSED(len);
+  return -1;
+#endif
+}
+
+JNIEXPORT int jio_fprintf(FILE* fp, const char* fmt, ...) {
+    va_list args;
+
+    va_start(args, fmt);
+    int len = jio_vfprintf(fp, fmt, args);
+    va_end(args);
+
+    return len;
+}
+
+JNIEXPORT int jio_vfprintf(FILE* fp, const char* fmt, va_list args) {
+    assert(fp != NULL);
+    return vfprintf(fp, fmt, args);
+}
+
+/* posix fsync() */
+JNIEXPORT jint JVM_Sync(jint fd) {
+    LOG(DEBUG) << "JVM_Sync fd=" << fd;
+    return TEMP_FAILURE_RETRY(fsync(fd));
+}
+
+JNIEXPORT void* JVM_FindLibraryEntry(void* handle, const char* name) {
+    LOG(DEBUG) << "JVM_FindLibraryEntry handle=" << handle << " name=" << name;
+    return dlsym(handle, name);
+}
+
+JNIEXPORT jlong JVM_CurrentTimeMillis(JNIEnv* env, jclass clazz ATTRIBUTE_UNUSED) {
+    LOG(DEBUG) << "JVM_CurrentTimeMillis env=" << env;
+    struct timeval tv;
+
+    gettimeofday(&tv, (struct timezone *) NULL);
+    jlong when = tv.tv_sec * 1000LL + tv.tv_usec / 1000;
+    return when;
+}
+
+JNIEXPORT jint JVM_Socket(jint domain, jint type, jint protocol) {
+    LOG(DEBUG) << "JVM_Socket domain=" << domain << ", type=" << type << ", protocol=" << protocol;
+
+    return TEMP_FAILURE_RETRY(socket(domain, type, protocol));
+}
+
+JNIEXPORT jint JVM_InitializeSocketLibrary() {
+  return 0;
+}
+
+int jio_vsnprintf(char *str, size_t count, const char *fmt, va_list args) {
+  if ((intptr_t)count <= 0) return -1;
+  return vsnprintf(str, count, fmt, args);
+}
+
+int jio_snprintf(char *str, size_t count, const char *fmt, ...) {
+  va_list args;
+  int len;
+  va_start(args, fmt);
+  len = jio_vsnprintf(str, count, fmt, args);
+  va_end(args);
+  return len;
+}
+
+JNIEXPORT jint JVM_SetSockOpt(jint fd, int level, int optname,
+    const char* optval, int optlen) {
+  LOG(DEBUG) << "JVM_SetSockOpt fd=" << fd << ", level=" << level << ", optname=" << optname
+             << ", optval=" << optval << ", optlen=" << optlen;
+  return TEMP_FAILURE_RETRY(setsockopt(fd, level, optname, optval, optlen));
+}
+
+JNIEXPORT jint JVM_SocketShutdown(jint fd, jint howto) {
+  LOG(DEBUG) << "JVM_SocketShutdown fd=" << fd << ", howto=" << howto;
+  return TEMP_FAILURE_RETRY(shutdown(fd, howto));
+}
+
+JNIEXPORT jint JVM_GetSockOpt(jint fd, int level, int optname, char* optval,
+  int* optlen) {
+  LOG(DEBUG) << "JVM_GetSockOpt fd=" << fd << ", level=" << level << ", optname=" << optname
+             << ", optval=" << optval << ", optlen=" << optlen;
+
+  socklen_t len = *optlen;
+  int cc = TEMP_FAILURE_RETRY(getsockopt(fd, level, optname, optval, &len));
+  *optlen = len;
+  return cc;
+}
+
+JNIEXPORT jint JVM_GetSockName(jint fd, struct sockaddr* addr, int* addrlen) {
+  LOG(DEBUG) << "JVM_GetSockName fd=" << fd << ", addr=" << addr << ", addrlen=" << addrlen;
+
+  socklen_t len = *addrlen;
+  int cc = TEMP_FAILURE_RETRY(getsockname(fd, addr, &len));
+  *addrlen = len;
+  return cc;
+}
+
+JNIEXPORT jint JVM_SocketAvailable(jint fd, jint* result) {
+  LOG(DEBUG) << "JVM_SocketAvailable fd=" << fd << ", result=" << result;
+
+  if (TEMP_FAILURE_RETRY(ioctl(fd, FIONREAD, result)) < 0) {
+      LOG(DEBUG) << "ioctl(" << fd << ", FIONREAD) failed: " << strerror(errno);
+      return JNI_FALSE;
+  }
+
+  return JNI_TRUE;
+}
+
+JNIEXPORT jint JVM_Send(jint fd, char* buf, jint nBytes, jint flags) {
+  LOG(DEBUG) << "JVM_Send fd=" << fd << ", buf=" << buf << ", nBytes="
+             << nBytes << ", flags=" << flags;
+
+  return TEMP_FAILURE_RETRY(send(fd, buf, nBytes, flags));
+}
+
+JNIEXPORT jint JVM_SocketClose(jint fd) {
+  LOG(DEBUG) << "JVM_SocketClose fd=" << fd;
+
+    // don't want TEMP_FAILURE_RETRY here -- file is closed even if EINTR
+  return close(fd);
+}
+
+JNIEXPORT jint JVM_Listen(jint fd, jint count) {
+  LOG(DEBUG) << "JVM_Listen fd=" << fd << ", count=" << count;
+
+  return TEMP_FAILURE_RETRY(listen(fd, count));
+}
+
+JNIEXPORT jint JVM_Connect(jint fd, struct sockaddr* addr, jint addrlen) {
+  LOG(DEBUG) << "JVM_Connect fd=" << fd << ", addr=" << addr << ", addrlen=" << addrlen;
+
+  return TEMP_FAILURE_RETRY(connect(fd, addr, addrlen));
+}
+
+JNIEXPORT int JVM_GetHostName(char* name, int namelen) {
+  LOG(DEBUG) << "JVM_GetHostName name=" << name << ", namelen=" << namelen;
+
+  return TEMP_FAILURE_RETRY(gethostname(name, namelen));
+}
+
+JNIEXPORT jstring JVM_InternString(JNIEnv* env, jstring jstr) {
+  LOG(DEBUG) << "JVM_InternString env=" << env << ", jstr=" << jstr;
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::mirror::String* s = soa.Decode<art::mirror::String*>(jstr);
+  art::mirror::String* result = s->Intern();
+  return soa.AddLocalReference<jstring>(result);
+}
+
+JNIEXPORT jlong JVM_FreeMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetFreeMemory();
+}
+
+JNIEXPORT jlong JVM_TotalMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetTotalMemory();
+}
+
+JNIEXPORT jlong JVM_MaxMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetMaxMemory();
+}
+
+JNIEXPORT void JVM_GC(void) {
+  if (art::Runtime::Current()->IsExplicitGcDisabled()) {
+      LOG(INFO) << "Explicit GC skipped.";
+      return;
+  }
+  art::Runtime::Current()->GetHeap()->CollectGarbage(false);
+}
+
+JNIEXPORT __attribute__((noreturn)) void JVM_Exit(jint status) {
+  LOG(INFO) << "System.exit called, status: " << status;
+  art::Runtime::Current()->CallExitHook(status);
+  exit(status);
+}
+
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
+#ifdef __ANDROID__
+  if (javaLdLibraryPath != nullptr) {
+    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
+    if (ldLibraryPath.c_str() != nullptr) {
+      android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
+    }
+  }
+
+#else
+  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  UNUSED(javaLdLibraryPath, env);
+#endif
+}
+
+
+JNIEXPORT jstring JVM_NativeLoad(JNIEnv* env, jstring javaFilename, jobject javaLoader,
+                                 jboolean isSharedNamespace, jstring javaLibrarySearchPath,
+                                 jstring javaLibraryPermittedPath) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (filename.c_str() == NULL) {
+    return NULL;
+  }
+
+  int32_t target_sdk_version = art::Runtime::Current()->GetTargetSdkVersion();
+
+  // Starting with N nativeLoad uses classloader local
+  // linker namespace instead of global LD_LIBRARY_PATH
+  // (23 is Marshmallow)
+  if (target_sdk_version <= 23) {
+    SetLdLibraryPath(env, javaLibrarySearchPath);
+  }
+
+  std::string error_msg;
+  {
+    art::ScopedObjectAccess soa(env);
+    art::StackHandleScope<1> hs(soa.Self());
+    art::JavaVMExt* vm = art::Runtime::Current()->GetJavaVM();
+    bool success = vm->LoadNativeLibrary(env,
+                                         filename.c_str(),
+                                         javaLoader,
+                                         isSharedNamespace == JNI_TRUE,
+                                         javaLibrarySearchPath,
+                                         javaLibraryPermittedPath,
+                                         &error_msg);
+    if (success) {
+      return nullptr;
+    }
+  }
+
+  // Don't let a pending exception from JNI_OnLoad cause a CheckJNI issue with NewStringUTF.
+  env->ExceptionClear();
+  return env->NewStringUTF(error_msg.c_str());
+}
+
+JNIEXPORT void JVM_StartThread(JNIEnv* env, jobject jthread, jlong stack_size, jboolean daemon) {
+  art::Thread::CreateNativeThread(env, jthread, stack_size, daemon == JNI_TRUE);
+}
+
+JNIEXPORT void JVM_SetThreadPriority(JNIEnv* env, jobject jthread, jint prio) {
+  art::ScopedObjectAccess soa(env);
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+  art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+  if (thread != NULL) {
+    thread->SetNativePriority(prio);
+  }
+}
+
+JNIEXPORT void JVM_Yield(JNIEnv* env ATTRIBUTE_UNUSED, jclass threadClass ATTRIBUTE_UNUSED) {
+  sched_yield();
+}
+
+JNIEXPORT void JVM_Sleep(JNIEnv* env, jclass threadClass ATTRIBUTE_UNUSED,
+                         jobject java_lock, jlong millis) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::mirror::Object* lock = soa.Decode<art::mirror::Object*>(java_lock);
+  art::Monitor::Wait(art::Thread::Current(), lock, millis, 0, true, art::kSleeping);
+}
+
+JNIEXPORT jobject JVM_CurrentThread(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  return soa.AddLocalReference<jobject>(soa.Self()->GetPeer());
+}
+
+JNIEXPORT void JVM_Interrupt(JNIEnv* env, jobject jthread) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+  art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+  if (thread != nullptr) {
+    thread->Interrupt(soa.Self());
+  }
+}
+
+JNIEXPORT jboolean JVM_IsInterrupted(JNIEnv* env, jobject jthread, jboolean clearInterrupted) {
+  if (clearInterrupted) {
+    return static_cast<art::JNIEnvExt*>(env)->self->Interrupted() ? JNI_TRUE : JNI_FALSE;
+  } else {
+    art::ScopedFastNativeObjectAccess soa(env);
+    art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+    art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+    return (thread != nullptr) ? thread->IsInterrupted() : JNI_FALSE;
+  }
+}
+
+JNIEXPORT jboolean JVM_HoldsLock(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED, jobject jobj) {
+  art::ScopedObjectAccess soa(env);
+  art::mirror::Object* object = soa.Decode<art::mirror::Object*>(jobj);
+  if (object == NULL) {
+    art::ThrowNullPointerException("object == null");
+    return JNI_FALSE;
+  }
+  return soa.Self()->HoldsLock(object);
+}
+
+JNIEXPORT void JVM_SetNativeThreadName(JNIEnv* env, jobject jthread, jstring java_name) {
+  ScopedUtfChars name(env, java_name);
+  {
+    art::ScopedObjectAccess soa(env);
+    if (soa.Decode<art::mirror::Object*>(jthread) == soa.Self()->GetPeer()) {
+      soa.Self()->SetThreadName(name.c_str());
+      return;
+    }
+  }
+  // Suspend thread to avoid it from killing itself while we set its name. We don't just hold the
+  // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
+  // in the DDMS send code.
+  art::ThreadList* thread_list = art::Runtime::Current()->GetThreadList();
+  bool timed_out;
+  // Take suspend thread lock to avoid races with threads trying to suspend this one.
+  art::Thread* thread;
+  {
+    thread = thread_list->SuspendThreadByPeer(jthread, true, false, &timed_out);
+  }
+  if (thread != NULL) {
+    {
+      art::ScopedObjectAccess soa(env);
+      thread->SetThreadName(name.c_str());
+    }
+    thread_list->Resume(thread, false);
+  } else if (timed_out) {
+    LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
+        "failed to suspend within a generous timeout.";
+  }
+}
+
+JNIEXPORT jint JVM_IHashCode(JNIEnv* env ATTRIBUTE_UNUSED,
+                             jobject javaObject ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "JVM_IHashCode is not implemented";
+  return 0;
+}
+
+JNIEXPORT jlong JVM_NanoTime(JNIEnv* env ATTRIBUTE_UNUSED, jclass unused ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "JVM_NanoTime is not implemented";
+  return 0L;
+}
+
+JNIEXPORT void JVM_ArrayCopy(JNIEnv* /* env */, jclass /* unused */, jobject /* javaSrc */,
+                             jint /* srcPos */, jobject /* javaDst */, jint /* dstPos */,
+                             jint /* length */) {
+  UNIMPLEMENTED(FATAL) << "JVM_ArrayCopy is not implemented";
+}
+
+JNIEXPORT jint JVM_FindSignal(const char* name ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_FindSignal is not implemented";
+  return 0;
+}
+
+JNIEXPORT void* JVM_RegisterSignal(jint signum ATTRIBUTE_UNUSED, void* handler ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_RegisterSignal is not implemented";
+  return nullptr;
+}
+
+JNIEXPORT jboolean JVM_RaiseSignal(jint signum ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_RaiseSignal is not implemented";
+  return JNI_FALSE;
+}
+
+JNIEXPORT __attribute__((noreturn))  void JVM_Halt(jint code) {
+  exit(code);
+}
+
+JNIEXPORT jboolean JVM_IsNaN(jdouble d) {
+  return isnan(d);
+}
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index c32d76c..5b90c6a 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -36,18 +36,28 @@
   void* test_abort = reinterpret_cast<void*>(0xb);
   void* test_exit = reinterpret_cast<void*>(0xc);
 
-  std::string lib_core(CommonRuntimeTest::GetLibCoreDexFileName());
-
   std::string boot_class_path;
+  std::string class_path;
   boot_class_path += "-Xbootclasspath:";
-  boot_class_path += lib_core;
+
+  bool first_dex_file = true;
+  for (const std::string &dex_file_name :
+           CommonRuntimeTest::GetLibCoreDexFileNames()) {
+    if (!first_dex_file) {
+      class_path += ":";
+    } else {
+      first_dex_file = false;
+    }
+    class_path += dex_file_name;
+  }
+  boot_class_path += class_path;
 
   RuntimeOptions options;
   options.push_back(std::make_pair(boot_class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-classpath", nullptr));
-  options.push_back(std::make_pair(lib_core.c_str(), nullptr));
+  options.push_back(std::make_pair(class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-cp", nullptr));
-  options.push_back(std::make_pair(lib_core.c_str(), nullptr));
+  options.push_back(std::make_pair(class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-Ximage:boot_image", nullptr));
   options.push_back(std::make_pair("-Xcheck:jni", nullptr));
   options.push_back(std::make_pair("-Xms2048", nullptr));
@@ -71,8 +81,8 @@
 #define EXPECT_PARSED_EQ(expected, actual_key) EXPECT_EQ(expected, map.GetOrDefault(actual_key))
 #define EXPECT_PARSED_EXISTS(actual_key) EXPECT_TRUE(map.Exists(actual_key))
 
-  EXPECT_PARSED_EQ(lib_core, Opt::BootClassPath);
-  EXPECT_PARSED_EQ(lib_core, Opt::ClassPath);
+  EXPECT_PARSED_EQ(class_path, Opt::BootClassPath);
+  EXPECT_PARSED_EQ(class_path, Opt::ClassPath);
   EXPECT_PARSED_EQ(std::string("boot_image"), Opt::Image);
   EXPECT_PARSED_EXISTS(Opt::CheckJni);
   EXPECT_PARSED_EQ(2048U, Opt::MemoryInitialSize);
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 324bd9f..28c27cd 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -21,7 +21,6 @@
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
-#include "entrypoints/entrypoint_utils.h"
 #include "indirect_reference_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/abstract_method.h"
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index f3197c7..c4694ee 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -190,7 +190,6 @@
       abort_(nullptr),
       stats_enabled_(false),
       is_running_on_memory_tool_(RUNNING_ON_MEMORY_TOOL),
-      profiler_started_(false),
       instrumentation_(),
       main_thread_group_(nullptr),
       system_thread_group_(nullptr),
@@ -212,9 +211,11 @@
       safe_mode_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
+  interpreter::CheckInterpreterAsmConstants();
 }
 
 Runtime::~Runtime() {
+  ATRACE_BEGIN("Runtime shutdown");
   if (is_native_bridge_loaded_) {
     UnloadNativeBridge();
   }
@@ -229,48 +230,55 @@
   Thread* self = Thread::Current();
   const bool attach_shutdown_thread = self == nullptr;
   if (attach_shutdown_thread) {
+    ATRACE_BEGIN("Attach shutdown thread");
     CHECK(AttachCurrentThread("Shutdown thread", false, nullptr, false));
+    ATRACE_END();
     self = Thread::Current();
   } else {
     LOG(WARNING) << "Current thread not detached in Runtime shutdown";
   }
 
   {
+    ATRACE_BEGIN("Wait for shutdown cond");
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     shutting_down_started_ = true;
     while (threads_being_born_ > 0) {
       shutdown_cond_->Wait(self);
     }
     shutting_down_ = true;
+    ATRACE_END();
   }
   // Shutdown and wait for the daemons.
   CHECK(self != nullptr);
   if (IsFinishedStarting()) {
+    ATRACE_BEGIN("Waiting for Daemons");
     self->ClearException();
     self->GetJniEnv()->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
                                             WellKnownClasses::java_lang_Daemons_stop);
+    ATRACE_END();
   }
 
   Trace::Shutdown();
 
   if (attach_shutdown_thread) {
+    ATRACE_BEGIN("Detach shutdown thread");
     DetachCurrentThread();
+    ATRACE_END();
     self = nullptr;
   }
 
-  // Shut down background profiler before the runtime exits.
-  if (profiler_started_) {
-    BackgroundMethodSamplingProfiler::Shutdown();
-  }
-
   // Make sure to let the GC complete if it is running.
   heap_->WaitForGcToComplete(gc::kGcCauseBackground, self);
   heap_->DeleteThreadPool();
-  if (jit_.get() != nullptr) {
+  if (jit_ != nullptr) {
+    ATRACE_BEGIN("Delete jit");
     VLOG(jit) << "Deleting jit thread pool";
     // Delete thread pool before the thread list since we don't want to wait forever on the
     // JIT compiler threads.
     jit_->DeleteThreadPool();
+    // Similarly, stop the profile saver thread before deleting the thread list.
+    jit_->StopProfileSaver();
+    ATRACE_END();
   }
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
@@ -278,11 +286,13 @@
   delete signal_catcher_;
 
   // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
+  ATRACE_BEGIN("Delete thread list");
   delete thread_list_;
+  ATRACE_END();
 
   // Delete the JIT after thread list to ensure that there is no remaining threads which could be
   // accessing the instrumentation when we delete it.
-  if (jit_.get() != nullptr) {
+  if (jit_ != nullptr) {
     VLOG(jit) << "Deleting jit";
     jit_.reset(nullptr);
   }
@@ -290,6 +300,7 @@
   // Shutdown the fault manager if it was initialized.
   fault_manager.Shutdown();
 
+  ATRACE_BEGIN("Delete state");
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
@@ -306,10 +317,12 @@
   low_4gb_arena_pool_.reset();
   arena_pool_.reset();
   MemMap::Shutdown();
+  ATRACE_END();
 
   // TODO: acquire a static mutex on Runtime to avoid racing.
   CHECK(instance_ == nullptr || instance_ == this);
   instance_ = nullptr;
+  ATRACE_END();
 }
 
 struct AbortState {
@@ -547,15 +560,12 @@
   // Use !IsAotCompiler so that we get test coverage, tests are never the zygote.
   if (!IsAotCompiler()) {
     ScopedObjectAccess soa(self);
-    gc::space::ImageSpace* image_space = heap_->GetBootImageSpace();
-    if (image_space != nullptr) {
-      ATRACE_BEGIN("AddImageStringsToTable");
-      GetInternTable()->AddImageStringsToTable(image_space);
-      ATRACE_END();
-      ATRACE_BEGIN("MoveImageClassesToClassTable");
-      GetClassLinker()->AddBootImageClassesToClassTable();
-      ATRACE_END();
-    }
+    ATRACE_BEGIN("AddImageStringsToTable");
+    GetInternTable()->AddImagesStringsToTable(heap_->GetBootImageSpaces());
+    ATRACE_END();
+    ATRACE_BEGIN("MoveImageClassesToClassTable");
+    GetClassLinker()->AddBootImageClassesToClassTable();
+    ATRACE_END();
   }
 
   // If we are the zygote then we need to wait until after forking to create the code cache
@@ -564,7 +574,7 @@
     CreateJit();
   }
 
-  if (!IsImageDex2OatEnabled() || !GetHeap()->HasImageSpace()) {
+  if (!IsImageDex2OatEnabled() || !GetHeap()->HasBootImageSpace()) {
     ScopedObjectAccess soa(self);
     StackHandleScope<1> hs(soa.Self());
     auto klass(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
@@ -593,6 +603,7 @@
       PreInitializeNativeBridge(".");
     }
     InitNonZygoteOrPostFork(self->GetJniEnv(),
+                            /* is_system_server */ false,
                             NativeBridgeAction::kInitialize,
                             GetInstructionSetString(kRuntimeISA));
   }
@@ -616,8 +627,7 @@
     if (fd >= 0) {
       close(fd);
     } else if (errno != EEXIST) {
-      LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
-      return true;
+      LOG(WARNING) << "Failed to access the profile file. Profiler disabled.";
     }
   }
 
@@ -682,7 +692,8 @@
 #endif
 }
 
-void Runtime::InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa) {
+void Runtime::InitNonZygoteOrPostFork(
+    JNIEnv* env, bool is_system_server, NativeBridgeAction action, const char* isa) {
   is_zygote_ = false;
 
   if (is_native_bridge_loaded_) {
@@ -704,7 +715,7 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-  if (!safe_mode_ && jit_options_->UseJIT() && jit_.get() == nullptr) {
+  if (!is_system_server && !safe_mode_ && jit_options_->UseJIT() && jit_.get() == nullptr) {
     // Note that when running ART standalone (not zygote, nor zygote fork),
     // the jit may have already been created.
     CreateJit();
@@ -752,61 +763,92 @@
   VLOG(startup) << "Runtime::StartDaemonThreads exiting";
 }
 
+// Attempts to open dex files from image(s). Given the image location, try to find the oat file
+// and open it to get the stored dex file. If the image is the first for a multi-image boot
+// classpath, go on and also open the other images.
 static bool OpenDexFilesFromImage(const std::string& image_location,
                                   std::vector<std::unique_ptr<const DexFile>>* dex_files,
                                   size_t* failures) {
   DCHECK(dex_files != nullptr) << "OpenDexFilesFromImage: out-param is nullptr";
-  std::string system_filename;
-  bool has_system = false;
-  std::string cache_filename_unused;
-  bool dalvik_cache_exists_unused;
-  bool has_cache_unused;
-  bool is_global_cache_unused;
-  bool found_image = gc::space::ImageSpace::FindImageFilename(image_location.c_str(),
-                                                              kRuntimeISA,
-                                                              &system_filename,
-                                                              &has_system,
-                                                              &cache_filename_unused,
-                                                              &dalvik_cache_exists_unused,
-                                                              &has_cache_unused,
-                                                              &is_global_cache_unused);
-  *failures = 0;
-  if (!found_image || !has_system) {
-    return false;
-  }
-  std::string error_msg;
-  // We are falling back to non-executable use of the oat file because patching failed, presumably
-  // due to lack of space.
-  std::string oat_filename = ImageHeader::GetOatLocationFromImageLocation(system_filename.c_str());
-  std::string oat_location = ImageHeader::GetOatLocationFromImageLocation(image_location.c_str());
-  std::unique_ptr<File> file(OS::OpenFileForReading(oat_filename.c_str()));
-  if (file.get() == nullptr) {
-    return false;
-  }
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.release(), false, false, &error_msg));
-  if (elf_file.get() == nullptr) {
-    return false;
-  }
-  std::unique_ptr<const OatFile> oat_file(
-      OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
-  if (oat_file == nullptr) {
-    LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
-    return false;
-  }
 
-  for (const OatFile::OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
-    if (oat_dex_file == nullptr) {
-      *failures += 1;
-      continue;
+  // Use a work-list approach, so that we can easily reuse the opening code.
+  std::vector<std::string> image_locations;
+  image_locations.push_back(image_location);
+
+  for (size_t index = 0; index < image_locations.size(); ++index) {
+    std::string system_filename;
+    bool has_system = false;
+    std::string cache_filename_unused;
+    bool dalvik_cache_exists_unused;
+    bool has_cache_unused;
+    bool is_global_cache_unused;
+    bool found_image = gc::space::ImageSpace::FindImageFilename(image_locations[index].c_str(),
+                                                                kRuntimeISA,
+                                                                &system_filename,
+                                                                &has_system,
+                                                                &cache_filename_unused,
+                                                                &dalvik_cache_exists_unused,
+                                                                &has_cache_unused,
+                                                                &is_global_cache_unused);
+
+    if (!found_image || !has_system) {
+      return false;
     }
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
-    if (dex_file.get() == nullptr) {
-      *failures += 1;
-    } else {
-      dex_files->push_back(std::move(dex_file));
+
+    // We are falling back to non-executable use of the oat file because patching failed, presumably
+    // due to lack of space.
+    std::string oat_filename =
+        ImageHeader::GetOatLocationFromImageLocation(system_filename.c_str());
+    std::string oat_location =
+        ImageHeader::GetOatLocationFromImageLocation(image_locations[index].c_str());
+    // Note: in the multi-image case, the image location may end in ".jar," and not ".art." Handle
+    //       that here.
+    if (EndsWith(oat_location, ".jar")) {
+      oat_location.replace(oat_location.length() - 3, 3, "oat");
     }
+
+    std::unique_ptr<File> file(OS::OpenFileForReading(oat_filename.c_str()));
+    if (file.get() == nullptr) {
+      return false;
+    }
+    std::string error_msg;
+    std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.release(), false, false, &error_msg));
+    if (elf_file.get() == nullptr) {
+      return false;
+    }
+    std::unique_ptr<const OatFile> oat_file(
+        OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
+    if (oat_file == nullptr) {
+      LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
+      return false;
+    }
+
+    for (const OatFile::OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
+      if (oat_dex_file == nullptr) {
+        *failures += 1;
+        continue;
+      }
+      std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+      if (dex_file.get() == nullptr) {
+        *failures += 1;
+      } else {
+        dex_files->push_back(std::move(dex_file));
+      }
+    }
+
+    if (index == 0) {
+      // First file. See if this is a multi-image environment, and if so, enqueue the other images.
+      const OatHeader& boot_oat_header = oat_file->GetOatHeader();
+      const char* boot_cp = boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPath);
+      if (boot_cp != nullptr) {
+        gc::space::ImageSpace::CreateMultiImageLocations(image_locations[0],
+                                                         boot_cp,
+                                                         &image_locations);
+      }
+    }
+
+    Runtime::Current()->GetOatFileManager().RegisterOatFile(std::move(oat_file));
   }
-  Runtime::Current()->GetOatFileManager().RegisterOatFile(std::move(oat_file));
   return true;
 }
 
@@ -944,7 +986,7 @@
                        runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
   ATRACE_END();
 
-  if (heap_->GetBootImageSpace() == nullptr && !allow_dex_file_fallback_) {
+  if (!heap_->HasBootImageSpace() && !allow_dex_file_fallback_) {
     LOG(ERROR) << "Dex file fallback disabled, cannot continue without image.";
     ATRACE_END();
     return false;
@@ -1052,7 +1094,7 @@
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
-  if (GetHeap()->HasImageSpace()) {
+  if (GetHeap()->HasBootImageSpace()) {
     ATRACE_BEGIN("InitFromImage");
     std::string error_msg;
     bool result = class_linker_->InitFromImage(&error_msg);
@@ -1062,7 +1104,9 @@
       return false;
     }
     if (kIsDebugBuild) {
-      GetHeap()->GetBootImageSpace()->VerifyImageAllocations();
+      for (auto image_space : GetHeap()->GetBootImageSpaces()) {
+        image_space->VerifyImageAllocations();
+      }
     }
     if (boot_class_path_string_.empty()) {
       // The bootclasspath is not explicitly specified: construct it from the loaded dex files.
@@ -1209,14 +1253,17 @@
   // First set up JniConstants, which is used by both the runtime's built-in native
   // methods and libcore.
   JniConstants::init(env);
-  WellKnownClasses::Init(env);
 
   // Then set up the native methods provided by the runtime itself.
   RegisterRuntimeNativeMethods(env);
 
-  // Then set up libcore, which is just a regular JNI library with a regular JNI_OnLoad.
-  // Most JNI libraries can just use System.loadLibrary, but libcore can't because it's
-  // the library that implements System.loadLibrary!
+  // Initialize classes used in JNI. The initialization requires runtime native
+  // methods to be loaded first.
+  WellKnownClasses::Init(env);
+
+  // Then set up libjavacore / libopenjdk, which are just a regular JNI libraries with
+  // a regular JNI_OnLoad. Most JNI libraries can just use System.loadLibrary, but
+  // libcore can't because it's the library that implements System.loadLibrary!
   {
     std::string error_msg;
     if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr,
@@ -1225,6 +1272,17 @@
       LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << error_msg;
     }
   }
+  {
+    constexpr const char* kOpenJdkLibrary = kIsDebugBuild
+                                                ? "libopenjdkd.so"
+                                                : "libopenjdk.so";
+    std::string error_msg;
+    if (!java_vm_->LoadNativeLibrary(env, kOpenJdkLibrary, nullptr,
+                                     /* is_shared_namespace */ false,
+                                     nullptr, nullptr, &error_msg)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"" << kOpenJdkLibrary << "\": " << error_msg;
+    }
+  }
 
   // Initialize well known classes that may invoke runtime native methods.
   WellKnownClasses::LateInit(env);
@@ -1642,11 +1700,29 @@
 
 void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
                               const std::string& profile_output_filename) {
-  DCHECK(!profile_output_filename.empty());
-  if (jit_.get() != nullptr) {
-    jit_->SetDexLocationsForProfiling(code_paths);
+  if (jit_.get() == nullptr) {
+    // We are not JITing. Nothing to do.
+    return;
   }
+
+  VLOG(profiler) << "Register app with " << profile_output_filename
+      << " " << Join(code_paths, ':');
+
+  if (profile_output_filename.empty()) {
+    LOG(WARNING) << "JIT profile information will not be recorded: profile filename is empty.";
+    return;
+  }
+  if (!FileExists(profile_output_filename)) {
+    LOG(WARNING) << "JIT profile information will not be recorded: profile file does not exits.";
+    return;
+  }
+  if (code_paths.empty()) {
+    LOG(WARNING) << "JIT profile information will not be recorded: code paths is empty.";
+    return;
+  }
+
   profile_output_filename_ = profile_output_filename;
+  jit_->StartProfileSaver(profile_output_filename, code_paths);
 }
 
 // Transaction support.
@@ -1792,18 +1868,6 @@
   argv->push_back(feature_string);
 }
 
-void Runtime::MaybeSaveJitProfilingInfo() {
-  if (jit_.get() != nullptr && !profile_output_filename_.empty()) {
-    jit_->SaveProfilingInfo(profile_output_filename_);
-  }
-}
-
-void Runtime::UpdateProfilerState(int state) {
-  if (state == kProfileBackground) {
-    MaybeSaveJitProfilingInfo();
-  }
-}
-
 void Runtime::CreateJit() {
   CHECK(!IsAotCompiler());
   if (GetInstrumentation()->IsForcedInterpretOnly()) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index b45408e..20acffb 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -457,7 +457,8 @@
 
   void PreZygoteFork();
   bool InitZygote();
-  void InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa);
+  void InitNonZygoteOrPostFork(
+      JNIEnv* env, bool is_system_server, NativeBridgeAction action, const char* isa);
 
   const instrumentation::Instrumentation* GetInstrumentation() const {
     return &instrumentation_;
@@ -469,7 +470,6 @@
 
   void RegisterAppInfo(const std::vector<std::string>& code_paths,
                        const std::string& profile_output_filename);
-  void UpdateProfilerState(int state);
 
   // Transaction support.
   bool IsActiveTransaction() const {
@@ -734,7 +734,6 @@
 
   std::string profile_output_filename_;
   ProfilerOptions profiler_options_;
-  bool profiler_started_;
 
   std::unique_ptr<TraceConfig> trace_config_;
 
diff --git a/runtime/stack.h b/runtime/stack.h
index a0c44cb..4fa1a4f 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -184,11 +184,12 @@
   }
 
   uint32_t GetDexPC() const {
-    return dex_pc_;
+    return (dex_pc_ptr_ == nullptr) ? dex_pc_ : dex_pc_ptr_ - code_item_->insns_;
   }
 
   void SetDexPC(uint32_t dex_pc) {
     dex_pc_ = dex_pc;
+    dex_pc_ptr_ = nullptr;
   }
 
   ShadowFrame* GetLink() const {
@@ -206,6 +207,20 @@
     return *reinterpret_cast<const int32_t*>(vreg);
   }
 
+  uint32_t* GetVRegAddr(size_t i) {
+    return &vregs_[i];
+  }
+
+  uint32_t* GetShadowRefAddr(size_t i) {
+    DCHECK(HasReferenceArray());
+    DCHECK_LT(i, NumberOfVRegs());
+    return &vregs_[i + NumberOfVRegs()];
+  }
+
+  void SetCodeItem(const DexFile::CodeItem* code_item) {
+    code_item_ = code_item;
+  }
+
   float GetVRegFloat(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     // NOTE: Strict-aliasing?
@@ -346,6 +361,10 @@
     return lock_count_data_;
   }
 
+  static size_t LockCountDataOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, lock_count_data_);
+  }
+
   static size_t LinkOffset() {
     return OFFSETOF_MEMBER(ShadowFrame, link_);
   }
@@ -366,6 +385,18 @@
     return OFFSETOF_MEMBER(ShadowFrame, vregs_);
   }
 
+  static size_t ResultRegisterOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, result_register_);
+  }
+
+  static size_t DexPCPtrOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, dex_pc_ptr_);
+  }
+
+  static size_t CodeItemOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, code_item_);
+  }
+
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
                                             ShadowFrame* link,
@@ -375,10 +406,19 @@
     return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
   }
 
+  uint16_t* GetDexPCPtr() {
+    return dex_pc_ptr_;
+  }
+
+  JValue* GetResultRegister() {
+    return result_register_;
+  }
+
  private:
   ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method,
               uint32_t dex_pc, bool has_reference_array)
-      : number_of_vregs_(num_vregs), link_(link), method_(method), dex_pc_(dex_pc) {
+      : link_(link), method_(method), result_register_(nullptr), dex_pc_ptr_(nullptr),
+        code_item_(nullptr), number_of_vregs_(num_vregs), dex_pc_(dex_pc) {
     // TODO(iam): Remove this parameter, it's an an artifact of portable removal
     DCHECK(has_reference_array);
     if (has_reference_array) {
@@ -399,12 +439,15 @@
         const_cast<const ShadowFrame*>(this)->References());
   }
 
-  const uint32_t number_of_vregs_;
   // Link to previous shadow frame or null.
   ShadowFrame* link_;
   ArtMethod* method_;
-  uint32_t dex_pc_;
+  JValue* result_register_;
+  uint16_t* dex_pc_ptr_;
+  const DexFile::CodeItem* code_item_;
   LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
+  const uint32_t number_of_vregs_;
+  uint32_t dex_pc_;
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 13e3774..21241d2 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -76,6 +76,7 @@
 #include "verify_object-inl.h"
 #include "vmap_table.h"
 #include "well_known_classes.h"
+#include "interpreter/interpreter.h"
 
 #if ART_USE_FUTEXES
 #include "linux/futex.h"
@@ -686,6 +687,7 @@
   RemoveSuspendTrigger();
   InitCardTable();
   InitTid();
+  interpreter::InitInterpreterTls(this);
 
 #ifdef __ANDROID__
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = this;
diff --git a/runtime/thread.h b/runtime/thread.h
index 6cb895c..b25bcb2 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -601,6 +601,24 @@
   }
 
   template<size_t pointer_size>
+  static ThreadOffset<pointer_size> MterpCurrentIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_current_ibase));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> MterpDefaultIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_default_ibase));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> MterpAltIBaseOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(
+        OFFSETOF_MEMBER(tls_ptr_sized_values, mterp_alt_ibase));
+  }
+
+  template<size_t pointer_size>
   static ThreadOffset<pointer_size> ExceptionOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, exception));
   }
@@ -1001,6 +1019,30 @@
   void ProtectStack();
   bool UnprotectStack();
 
+  void SetMterpDefaultIBase(void* ibase) {
+    tlsPtr_.mterp_default_ibase = ibase;
+  }
+
+  void SetMterpCurrentIBase(void* ibase) {
+    tlsPtr_.mterp_current_ibase = ibase;
+  }
+
+  void SetMterpAltIBase(void* ibase) {
+    tlsPtr_.mterp_alt_ibase = ibase;
+  }
+
+  const void* GetMterpDefaultIBase() const {
+    return tlsPtr_.mterp_default_ibase;
+  }
+
+  const void* GetMterpCurrentIBase() const {
+    return tlsPtr_.mterp_current_ibase;
+  }
+
+  const void* GetMterpAltIBase() const {
+    return tlsPtr_.mterp_alt_ibase;
+  }
+
   void NoteSignalBeingHandled() {
     if (tls32_.handling_signal_) {
       LOG(FATAL) << "Detected signal while processing a signal";
@@ -1246,6 +1288,7 @@
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
       thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
+      mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
       nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr),
       thread_local_mark_stack(nullptr) {
@@ -1364,6 +1407,11 @@
     uint8_t* thread_local_end;
     size_t thread_local_objects;
 
+    // Mterp jump table bases.
+    void* mterp_current_ibase;
+    void* mterp_default_ibase;
+    void* mterp_alt_ibase;
+
     // There are RosAlloc::kNumThreadLocalSizeBrackets thread-local size brackets per thread.
     void* rosalloc_runs[kNumRosAllocThreadLocalSizeBrackets];
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index a390908..ae18819 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -69,6 +69,7 @@
 }
 
 ThreadList::~ThreadList() {
+  ATRACE_BEGIN(__FUNCTION__);
   // Detach the current thread if necessary. If we failed to start, there might not be any threads.
   // We need to detach the current thread here in case there's another thread waiting to join with
   // us.
@@ -79,19 +80,27 @@
     contains = Contains(self);
   }
   if (contains) {
+    ATRACE_BEGIN("DetachCurrentThread");
     Runtime::Current()->DetachCurrentThread();
+    ATRACE_END();
   }
+  ATRACE_BEGIN("WaitForOtherNonDaemonThreadsToExit");
   WaitForOtherNonDaemonThreadsToExit();
+  ATRACE_END();
   // Disable GC and wait for GC to complete in case there are still daemon threads doing
   // allocations.
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   heap->DisableGCForShutdown();
   // In case a GC is in progress, wait for it to finish.
+  ATRACE_BEGIN("WaitForGcToComplete");
   heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current());
-
+  ATRACE_END();
   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
   //       Thread::Init.
-  SuspendAllDaemonThreads();
+  ATRACE_BEGIN("SuspendAllDaemonThreadsForShutdown");
+  SuspendAllDaemonThreadsForShutdown();
+  ATRACE_END();
+  ATRACE_END();
 }
 
 bool ThreadList::Contains(Thread* thread) {
@@ -1133,7 +1142,7 @@
   }
 }
 
-void ThreadList::SuspendAllDaemonThreads() {
+void ThreadList::SuspendAllDaemonThreadsForShutdown() {
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::thread_list_lock_);
   {  // Tell all the daemons it's time to suspend.
@@ -1145,12 +1154,16 @@
       if (thread != self) {
         thread->ModifySuspendCount(self, +1, nullptr, false);
       }
+      // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
+      // the sleep forever one.
+      thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions();
     }
   }
   // Give the threads a chance to suspend, complaining if they're slow.
   bool have_complained = false;
-  for (int i = 0; i < 10; ++i) {
-    usleep(200 * 1000);
+  static constexpr size_t kTimeoutMicroseconds = 2000 * 1000;
+  static constexpr size_t kSleepMicroseconds = 1000;
+  for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
     bool all_suspended = true;
     for (const auto& thread : list_) {
       if (thread != self && thread->GetState() == kRunnable) {
@@ -1164,8 +1177,9 @@
     if (all_suspended) {
       return;
     }
+    usleep(kSleepMicroseconds);
   }
-  LOG(ERROR) << "suspend all daemons failed";
+  LOG(WARNING) << "timed out suspending all daemon threads";
 }
 void ThreadList::Register(Thread* self) {
   DCHECK_EQ(self, Thread::Current());
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 07ea10d..2e73f6a 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -164,7 +164,7 @@
   void DumpUnattachedThreads(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_);
 
-  void SuspendAllDaemonThreads()
+  void SuspendAllDaemonThreadsForShutdown()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
   void WaitForOtherNonDaemonThreadsToExit()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
diff --git a/runtime/utils.cc b/runtime/utils.cc
index eddc3a4..8e9f12b 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1446,6 +1446,11 @@
   return true;
 }
 
+bool FileExists(const std::string& filename) {
+  struct stat buffer;
+  return stat(filename.c_str(), &buffer) == 0;
+}
+
 std::string PrettyDescriptor(Primitive::Type type) {
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
@@ -1860,4 +1865,16 @@
   *parsed_value = value;
 }
 
+int64_t GetFileSizeBytes(const std::string& filename) {
+  struct stat stat_buf;
+  int rc = stat(filename.c_str(), &stat_buf);
+  return rc == 0 ? stat_buf.st_size : -1;
+}
+
+void SleepForever() {
+  while (true) {
+    usleep(1000000);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 5b9e963..153749e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -99,6 +99,18 @@
   return (ch < ' ' || ch > '~');
 }
 
+template <typename T> T SafeAbs(T value) {
+  // std::abs has undefined behavior on min limits.
+  DCHECK_NE(value, std::numeric_limits<T>::min());
+  return std::abs(value);
+}
+
+template <typename T> T AbsOrMin(T value) {
+  return (value == std::numeric_limits<T>::min())
+      ? value
+      : std::abs(value);
+}
+
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
@@ -276,6 +288,9 @@
 // Wrapper on fork/execv to run a command in a subprocess.
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg);
 
+// Returns true if the file exists.
+bool FileExists(const std::string& filename);
+
 class VoidFunctor {
  public:
   template <typename A>
@@ -367,6 +382,12 @@
   return dist(rng);
 }
 
+// Return the file size in bytes or -1 if the file does not exists.
+int64_t GetFileSizeBytes(const std::string& filename);
+
+// Sleep forever and never come back.
+NO_RETURN void SleepForever();
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index a47a08e..8300921 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -140,6 +140,7 @@
 jfieldID WellKnownClasses::java_lang_Thread_uncaughtHandler;
 jfieldID WellKnownClasses::java_lang_Thread_nativePeer;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_groups;
+jfieldID WellKnownClasses::java_lang_ThreadGroup_ngroups;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_name;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_parent;
@@ -268,7 +269,7 @@
   java_lang_Thread_init = CacheMethod(env, java_lang_Thread, false, "<init>", "(Ljava/lang/ThreadGroup;Ljava/lang/String;IZ)V");
   java_lang_Thread_run = CacheMethod(env, java_lang_Thread, false, "run", "()V");
   java_lang_Thread__UncaughtExceptionHandler_uncaughtException = CacheMethod(env, java_lang_Thread__UncaughtExceptionHandler, false, "uncaughtException", "(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
-  java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "removeThread", "(Ljava/lang/Thread;)V");
+  java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "threadTerminated", "(Ljava/lang/Thread;)V");
   java_nio_DirectByteBuffer_init = CacheMethod(env, java_nio_DirectByteBuffer, false, "<init>", "(JI)V");
   libcore_reflect_AnnotationFactory_createAnnotation = CacheMethod(env, libcore_reflect_AnnotationFactory, true, "createAnnotation", "(Ljava/lang/Class;[Llibcore/reflect/AnnotationMember;)Ljava/lang/annotation/Annotation;");
   libcore_reflect_AnnotationMember_init = CacheMethod(env, libcore_reflect_AnnotationMember, false, "<init>", "(Ljava/lang/String;Ljava/lang/Object;Ljava/lang/Class;Ljava/lang/reflect/Method;)V");
@@ -340,9 +341,10 @@
   java_lang_Thread_lock = CacheField(env, java_lang_Thread, false, "lock", "Ljava/lang/Object;");
   java_lang_Thread_name = CacheField(env, java_lang_Thread, false, "name", "Ljava/lang/String;");
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
-  java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
+  java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtExceptionHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
   java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
-  java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "Ljava/util/List;");
+  java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "[Ljava/lang/ThreadGroup;");
+  java_lang_ThreadGroup_ngroups = CacheField(env, java_lang_ThreadGroup, false, "ngroups", "I");
   java_lang_ThreadGroup_mainThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "mainThreadGroup", "Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_name = CacheField(env, java_lang_ThreadGroup, false, "name", "Ljava/lang/String;");
   java_lang_ThreadGroup_parent = CacheField(env, java_lang_ThreadGroup, false, "parent", "Ljava/lang/ThreadGroup;");
@@ -350,13 +352,13 @@
   java_lang_Throwable_cause = CacheField(env, java_lang_Throwable, false, "cause", "Ljava/lang/Throwable;");
   java_lang_Throwable_detailMessage = CacheField(env, java_lang_Throwable, false, "detailMessage", "Ljava/lang/String;");
   java_lang_Throwable_stackTrace = CacheField(env, java_lang_Throwable, false, "stackTrace", "[Ljava/lang/StackTraceElement;");
-  java_lang_Throwable_stackState = CacheField(env, java_lang_Throwable, false, "stackState", "Ljava/lang/Object;");
+  java_lang_Throwable_stackState = CacheField(env, java_lang_Throwable, false, "backtrace", "Ljava/lang/Object;");
   java_lang_Throwable_suppressedExceptions = CacheField(env, java_lang_Throwable, false, "suppressedExceptions", "Ljava/util/List;");
   java_lang_reflect_AbstractMethod_artMethod = CacheField(env, java_lang_reflect_AbstractMethod, false, "artMethod", "J");
   java_lang_reflect_Proxy_h = CacheField(env, java_lang_reflect_Proxy, false, "h", "Ljava/lang/reflect/InvocationHandler;");
   java_nio_DirectByteBuffer_capacity = CacheField(env, java_nio_DirectByteBuffer, false, "capacity", "I");
-  java_nio_DirectByteBuffer_effectiveDirectAddress = CacheField(env, java_nio_DirectByteBuffer, false, "effectiveDirectAddress", "J");
-  java_util_ArrayList_array = CacheField(env, java_util_ArrayList, false, "array", "[Ljava/lang/Object;");
+  java_nio_DirectByteBuffer_effectiveDirectAddress = CacheField(env, java_nio_DirectByteBuffer, false, "address", "J");
+  java_util_ArrayList_array = CacheField(env, java_util_ArrayList, false, "elementData", "[Ljava/lang/Object;");
   java_util_ArrayList_size = CacheField(env, java_util_ArrayList, false, "size", "I");
   java_util_Collections_EMPTY_LIST = CacheField(env, java_util_Collections, true, "EMPTY_LIST", "Ljava/util/List;");
   libcore_util_EmptyArray_STACK_TRACE_ELEMENT = CacheField(env, libcore_util_EmptyArray, true, "STACK_TRACE_ELEMENT", "[Ljava/lang/StackTraceElement;");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index c856291..55158a7 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -153,6 +153,7 @@
   static jfieldID java_lang_Thread_uncaughtHandler;
   static jfieldID java_lang_Thread_nativePeer;
   static jfieldID java_lang_ThreadGroup_groups;
+  static jfieldID java_lang_ThreadGroup_ngroups;
   static jfieldID java_lang_ThreadGroup_mainThreadGroup;
   static jfieldID java_lang_ThreadGroup_name;
   static jfieldID java_lang_ThreadGroup_parent;
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index aded30c..4fc7ee2 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -32,7 +32,7 @@
 
 TEST_F(ZipArchiveTest, FindAndExtract) {
   std::string error_msg;
-  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileName().c_str(), &error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileNames()[0].c_str(), &error_msg));
   ASSERT_TRUE(zip_archive.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
   std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find("classes.dex", &error_msg));
diff --git a/test/003-omnibus-opcodes/expected.txt b/test/003-omnibus-opcodes/expected.txt
index b591a7a..ee25ec1 100644
--- a/test/003-omnibus-opcodes/expected.txt
+++ b/test/003-omnibus-opcodes/expected.txt
@@ -31,15 +31,7 @@
 FloatMath.checkConvI
 FloatMath.checkConvL
 FloatMath.checkConvF
- 0: -2.0054409E9
- 1: -8.613303E18
- 2: -3.1415927
--2.0054409E9, -8.6133031E18, -3.1415927
 FloatMath.checkConvD
- 0: -2.005440939E9
- 1: -8.613303245920329E18
- 2: 123.45600128173828
--2.005440939E9, -8.6133032459203287E18, 123.4560012817382
 FloatMath.checkConsts
 FloatMath.jlmTests
 IntMath.testIntCompare
diff --git a/test/003-omnibus-opcodes/src/FloatMath.java b/test/003-omnibus-opcodes/src/FloatMath.java
index a0bc9f4..96befe9 100644
--- a/test/003-omnibus-opcodes/src/FloatMath.java
+++ b/test/003-omnibus-opcodes/src/FloatMath.java
@@ -245,10 +245,9 @@
     }
     static void checkConvF(float[] results) {
         System.out.println("FloatMath.checkConvF");
-        // TODO: Main.assertTrue values
-        for (int i = 0; i < results.length; i++)
-            System.out.println(" " + i + ": " + results[i]);
-        System.out.println("-2.0054409E9, -8.6133031E18, -3.1415927");
+        Main.assertTrue(results[0] == -2.0054409E9f);
+        Main.assertTrue(results[1] == -8.613303E18f);
+        Main.assertTrue(results[2] == -3.1415927f);
     }
 
     static double[] convD(int i, long l, float f) {
@@ -260,10 +259,9 @@
     }
     static void checkConvD(double[] results) {
         System.out.println("FloatMath.checkConvD");
-        // TODO: Main.assertTrue values
-        for (int i = 0; i < results.length; i++)
-            System.out.println(" " + i + ": " + results[i]);
-        System.out.println("-2.005440939E9, -8.6133032459203287E18, 123.4560012817382");
+        Main.assertTrue(results[0] == -2.005440939E9);
+        Main.assertTrue(results[1] == -8.6133032459203287E18);
+        Main.assertTrue(results[2] == 123.45600128173828);
     }
 
     static void checkConsts() {
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index 9461c0b..b9a46de 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -57,12 +57,14 @@
     }
 
     private final static class OOM extends Operation {
+        private final static int ALLOC_SIZE = 1024;
+
         @Override
         public boolean perform() {
             try {
                 List<byte[]> l = new ArrayList<byte[]>();
                 while (true) {
-                    l.add(new byte[1024]);
+                    l.add(new byte[ALLOC_SIZE]);
                 }
             } catch (OutOfMemoryError e) {
             }
@@ -115,12 +117,33 @@
     }
 
     private final static class Alloc extends Operation {
+        private final static int ALLOC_SIZE = 1024;  // Needs to be small enough to not be in LOS.
+        private final static int ALLOC_COUNT = 1024;
+
         @Override
         public boolean perform() {
             try {
                 List<byte[]> l = new ArrayList<byte[]>();
-                for (int i = 0; i < 1024; i++) {
-                    l.add(new byte[1024]);
+                for (int i = 0; i < ALLOC_COUNT; i++) {
+                    l.add(new byte[ALLOC_SIZE]);
+                }
+            } catch (OutOfMemoryError e) {
+            }
+            return true;
+        }
+    }
+
+    private final static class LargeAlloc extends Operation {
+        private final static int PAGE_SIZE = 4096;
+        private final static int PAGE_SIZE_MODIFIER = 10;  // Needs to be large enough for LOS.
+        private final static int ALLOC_COUNT = 100;
+
+        @Override
+        public boolean perform() {
+            try {
+                List<byte[]> l = new ArrayList<byte[]>();
+                for (int i = 0; i < ALLOC_COUNT; i++) {
+                    l.add(new byte[PAGE_SIZE_MODIFIER * PAGE_SIZE]);
                 }
             } catch (OutOfMemoryError e) {
             }
@@ -144,10 +167,12 @@
     }
 
     private final static class Sleep extends Operation {
+        private final static int SLEEP_TIME = 100;
+
         @Override
         public boolean perform() {
             try {
-                Thread.sleep(100);
+                Thread.sleep(SLEEP_TIME);
             } catch (InterruptedException ignored) {
             }
             return true;
@@ -155,6 +180,8 @@
     }
 
     private final static class TimedWait extends Operation {
+        private final static int SLEEP_TIME = 100;
+
         private final Object lock;
 
         public TimedWait(Object lock) {
@@ -165,7 +192,7 @@
         public boolean perform() {
             synchronized (lock) {
                 try {
-                    lock.wait(100, 0);
+                    lock.wait(SLEEP_TIME, 0);
                 } catch (InterruptedException ignored) {
                 }
             }
@@ -215,7 +242,8 @@
         Map<Operation, Double> frequencyMap = new HashMap<Operation, Double>();
         frequencyMap.put(new OOM(), 0.005);             //  1/200
         frequencyMap.put(new SigQuit(), 0.095);         // 19/200
-        frequencyMap.put(new Alloc(), 0.3);             // 60/200
+        frequencyMap.put(new Alloc(), 0.25);            // 50/200
+        frequencyMap.put(new LargeAlloc(), 0.05);       // 10/200
         frequencyMap.put(new StackTrace(), 0.1);        // 20/200
         frequencyMap.put(new Exit(), 0.25);             // 50/200
         frequencyMap.put(new Sleep(), 0.125);           // 25/200
@@ -261,6 +289,8 @@
             op = new SigQuit();
         } else if (split[0].equals("-alloc")) {
             op = new Alloc();
+        } else if (split[0].equals("-largealloc")) {
+            op = new LargeAlloc();
         } else if (split[0].equals("-stacktrace")) {
             op = new StackTrace();
         } else if (split[0].equals("-exit")) {
diff --git a/test/031-class-attributes/expected.txt b/test/031-class-attributes/expected.txt
index de99872..72656ae 100644
--- a/test/031-class-attributes/expected.txt
+++ b/test/031-class-attributes/expected.txt
@@ -84,7 +84,7 @@
   enclosingCon: null
   enclosingMeth: null
   modifiers: 1
-  package: package otherpackage
+  package: package otherpackage, Unknown, version 0.0
   declaredClasses: [0]
   member classes: [0]
   isAnnotation: false
diff --git a/test/034-call-null/expected.txt b/test/034-call-null/expected.txt
index 343226f..4e0281e 100644
--- a/test/034-call-null/expected.txt
+++ b/test/034-call-null/expected.txt
@@ -1,2 +1,2 @@
-java.lang.NullPointerException: Attempt to invoke direct method 'void Main.doStuff(int, int[][], java.lang.String, java.lang.String[][])' on a null object reference
+Exception in thread "main" java.lang.NullPointerException: Attempt to invoke direct method 'void Main.doStuff(int, int[][], java.lang.String, java.lang.String[][])' on a null object reference
 	at Main.main(Main.java:26)
diff --git a/test/038-inner-null/expected.txt b/test/038-inner-null/expected.txt
index ba411f0..2e92564 100644
--- a/test/038-inner-null/expected.txt
+++ b/test/038-inner-null/expected.txt
@@ -1,4 +1,4 @@
 new Special()
-java.lang.NullPointerException: Attempt to invoke virtual method 'void Main$Blort.repaint()' on a null object reference
+Exception in thread "main" java.lang.NullPointerException: Attempt to invoke virtual method 'void Main$Blort.repaint()' on a null object reference
 	at Main$Special.callInner(Main.java:31)
 	at Main.main(Main.java:20)
diff --git a/test/042-new-instance/expected.txt b/test/042-new-instance/expected.txt
index 7d843d1..c5de313 100644
--- a/test/042-new-instance/expected.txt
+++ b/test/042-new-instance/expected.txt
@@ -9,3 +9,4 @@
 Cons got expected PackageAccess complaint
 Cons got expected InstantationException
 Cons got expected PackageAccess2 complaint
+Cons ConstructorAccess succeeded
diff --git a/test/042-new-instance/src/Main.java b/test/042-new-instance/src/Main.java
index b0a5fd4..8cd6b2e 100644
--- a/test/042-new-instance/src/Main.java
+++ b/test/042-new-instance/src/Main.java
@@ -156,6 +156,14 @@
             ex.printStackTrace();
         }
 
+        // should succeed
+        try {
+            otherpackage.ConstructorAccess.newConstructorInstance();
+            System.out.println("Cons ConstructorAccess succeeded");
+        } catch (Exception ex) {
+            System.err.println("Cons ConstructorAccess failed");
+            ex.printStackTrace();
+        }
     }
 
     class InnerClass {
@@ -173,7 +181,6 @@
     public LocalClass2() {}
 }
 
-
 class LocalClass3 {
     public static void main() {
         try {
diff --git a/test/042-new-instance/src/otherpackage/ConstructorAccess.java b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
new file mode 100644
index 0000000..a74e9a0
--- /dev/null
+++ b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package otherpackage;
+
+import java.lang.reflect.Constructor;
+
+public class ConstructorAccess {
+
+    static class Inner {
+      Inner() {}
+    }
+
+    // Test for regression in b/25817515. Inner class constructor should
+    // be accessible from this static method, but if we over-shoot and check
+    // accessibility using the frame below (in Main class), we will see an
+    // IllegalAccessException from #newInstance
+    static public void newConstructorInstance() throws Exception {
+      Class c = Inner.class;
+      Constructor cons = c.getDeclaredConstructor((Class[]) null);
+      Object obj = cons.newInstance();
+    }
+}
diff --git a/test/046-reflect/expected.txt b/test/046-reflect/expected.txt
index d657d44..06932b9 100644
--- a/test/046-reflect/expected.txt
+++ b/test/046-reflect/expected.txt
@@ -96,8 +96,8 @@
 got expected exception for Constructor.newInstance
 ReflectTest done!
 public method
-static java.lang.Object java.util.Collections.checkType(java.lang.Object,java.lang.Class) accessible=false
-static java.lang.Object java.util.Collections.checkType(java.lang.Object,java.lang.Class) accessible=true
+private static void java.util.Collections.swap(java.lang.Object[],int,int) accessible=false
+private static void java.util.Collections.swap(java.lang.Object[],int,int) accessible=true
 checkType invoking null
 checkType got expected exception
 calling const-class FieldNoisyInitUser.class
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 0c90109..67a0d11 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -407,12 +407,13 @@
         System.out.println("ReflectTest done!");
     }
 
-    public static void checkType() {
+    public static void checkSwap() {
         Method m;
 
+        final Object[] objects = new Object[2];
         try {
-            m = Collections.class.getDeclaredMethod("checkType",
-                            Object.class, Class.class);
+            m = Collections.class.getDeclaredMethod("swap",
+                            Object[].class, int.class, int.class);
         } catch (NoSuchMethodException nsme) {
             nsme.printStackTrace();
             return;
@@ -421,7 +422,7 @@
         m.setAccessible(true);
         System.out.println(m + " accessible=" + m.isAccessible());
         try {
-            m.invoke(null, new Object(), Object.class);
+            m.invoke(null, objects, 0, 1);
         } catch (IllegalAccessException iae) {
             iae.printStackTrace();
             return;
@@ -432,7 +433,7 @@
 
         try {
             String s = "Should be ignored";
-            m.invoke(s, new Object(), Object.class);
+            m.invoke(s, objects, 0, 1);
         } catch (IllegalAccessException iae) {
             iae.printStackTrace();
             return;
@@ -443,7 +444,8 @@
 
         try {
             System.out.println("checkType invoking null");
-            m.invoke(null, new Object(), int.class);
+            // Trigger an NPE at the target.
+            m.invoke(null, null, 0, 1);
             System.out.println("ERROR: should throw InvocationTargetException");
         } catch (InvocationTargetException ite) {
             System.out.println("checkType got expected exception");
@@ -710,27 +712,27 @@
     private static void checkGetDeclaredConstructor() {
         try {
             Method.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Method.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Method.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
         try {
             Field.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Field.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Field.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
         try {
             Class.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Class.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Class.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
     }
 
@@ -744,7 +746,7 @@
 
         checkGetDeclaredConstructor();
         checkAccess();
-        checkType();
+        checkSwap();
         checkClinitForFields();
         checkClinitForMethods();
         checkGeneric();
diff --git a/test/055-enum-performance/src/Main.java b/test/055-enum-performance/src/Main.java
index d5903af..d6bb21145 100644
--- a/test/055-enum-performance/src/Main.java
+++ b/test/055-enum-performance/src/Main.java
@@ -20,7 +20,7 @@
             throw new AssertionError();
         } catch (InvocationTargetException expected) {
             IllegalArgumentException iae = (IllegalArgumentException) expected.getCause();
-            if (!iae.getMessage().equals("class java.lang.String is not an enum type")) {
+            if (!iae.getMessage().equals("class java.lang.String is not an enum type.")) {
                 throw new AssertionError();
             }
         }
diff --git a/test/063-process-manager/expected.txt b/test/063-process-manager/expected.txt
index 8360239..8c01bf0 100644
--- a/test/063-process-manager/expected.txt
+++ b/test/063-process-manager/expected.txt
@@ -4,12 +4,12 @@
 spawning child
 process manager: RUNNABLE
 child died
-process manager: WAITING
+process manager: TIMED_WAITING
 
 spawning child #2
 spawning child
 process manager: RUNNABLE
 child died
-process manager: WAITING
+process manager: TIMED_WAITING
 
 done!
diff --git a/test/063-process-manager/src/Main.java b/test/063-process-manager/src/Main.java
index 68bf878..e9e522c 100644
--- a/test/063-process-manager/src/Main.java
+++ b/test/063-process-manager/src/Main.java
@@ -30,7 +30,7 @@
                  traces.entrySet()) {
             Thread t = entry.getKey();
             String name = t.getName();
-            if (name.equals("java.lang.ProcessManager")) {
+            if (name.indexOf("process reaper") >= 0) {
                 System.out.println("process manager: " + t.getState());
                 found = true;
             }
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 5913c40..af25d9b 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -803,7 +803,7 @@
     Assert.assertEquals(Math.round(-2.5d), -2l);
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
-    Assert.assertEquals(Math.round(0.49999999999999994d), 1l);
+    Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -1034,7 +1034,7 @@
     Assert.assertEquals(StrictMath.round(-2.5d), -2l);
     Assert.assertEquals(StrictMath.round(-2.9d), -3l);
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
-    Assert.assertEquals(StrictMath.round(0.49999999999999994d), 1l);
+    Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index 0b87a4f..e4988c9 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -32,8 +32,8 @@
 62 (class java.lang.Long)
 14 (class java.lang.Short)
 [java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
-[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char java.lang.String.REPLACEMENT_CHAR, private static final char[] java.lang.String.ASCII, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
-[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private char java.lang.String.foldCase(char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public native [C java.lang.String.toCharArray(), public native char java.lang.String.charAt(int), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int)]
+[private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, private static int java.lang.String.HASHING_SEED, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
+[int java.lang.String.hash32(), native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private static int java.lang.String.getHashingSeed(), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index b6b1c43..1337442 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -35,8 +35,9 @@
   }
 
   static bool isRelocationDeltaZero() {
-    gc::space::ImageSpace* space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-    return space != nullptr && space->GetImageHeader().GetPatchDelta() == 0;
+    std::vector<gc::space::ImageSpace*> spaces =
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
+    return !spaces.empty() && spaces[0]->GetImageHeader().GetPatchDelta() == 0;
   }
 
   static bool hasExecutableOat(jclass cls) {
diff --git a/test/118-noimage-dex2oat/run b/test/118-noimage-dex2oat/run
index 4b1d0ce..07bdb08 100644
--- a/test/118-noimage-dex2oat/run
+++ b/test/118-noimage-dex2oat/run
@@ -41,7 +41,6 @@
 bpath="${framework}/core-libart${bpath_suffix}.jar"
 bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
 bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
-bpath="${bpath}:${framework}/core-junit${bpath_suffix}.jar"
 bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
 bpath_arg="--runtime-option -Xbootclasspath:${bpath}"
 
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 7762b2d..9bfe429 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -92,9 +92,10 @@
 // detecting this.
 #if __linux__
 static bool IsPicImage() {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
-  CHECK(image_space != nullptr);  // We should be running with an image.
-  const OatFile* oat_file = image_space->GetOatFile();
+  std::vector<gc::space::ImageSpace*> image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  CHECK(!image_spaces.empty());  // We should be running with an image.
+  const OatFile* oat_file = image_spaces[0]->GetOatFile();
   CHECK(oat_file != nullptr);     // We should have an oat file to go with the image.
   return oat_file->IsPic();
 }
diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java
index dc3ef7e..5474c9b 100644
--- a/test/137-cfi/src/Main.java
+++ b/test/137-cfi/src/Main.java
@@ -117,7 +117,7 @@
       // Could do reflection for the private pid field, but String parsing is easier.
       String s = p.toString();
       if (s.startsWith("Process[pid=")) {
-          return Integer.parseInt(s.substring("Process[pid=".length(), s.length() - 1));
+          return Integer.parseInt(s.substring("Process[pid=".length(), s.indexOf(",")));
       } else {
           return -1;
       }
diff --git a/test/143-string-value/check b/test/143-string-value/check
new file mode 100755
index 0000000..cdf7b78
--- /dev/null
+++ b/test/143-string-value/check
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Strip run-specific numbers (pid and line number)
+sed -e 's/^art E[ ]\+[0-9]\+[ ]\+[0-9]\+ art\/runtime\/native\/java_lang_Class.cc:[0-9]\+\] //' "$2" > "$2.tmp"
+
+diff --strip-trailing-cr -q "$1" "$2.tmp" >/dev/null
diff --git a/test/143-string-value/expected.txt b/test/143-string-value/expected.txt
new file mode 100644
index 0000000..06cdb89
--- /dev/null
+++ b/test/143-string-value/expected.txt
@@ -0,0 +1 @@
+The String#value field is not present on Android versions >= 6.0
diff --git a/test/143-string-value/info.txt b/test/143-string-value/info.txt
new file mode 100644
index 0000000..61ec816
--- /dev/null
+++ b/test/143-string-value/info.txt
@@ -0,0 +1,2 @@
+Test to ensure we emit an error message when being asked
+for String#value.
diff --git a/test/143-string-value/src/Main.java b/test/143-string-value/src/Main.java
new file mode 100644
index 0000000..e970692
--- /dev/null
+++ b/test/143-string-value/src/Main.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    try {
+      String.class.getDeclaredField("value");
+      throw new Error("Expected to fail");
+    } catch (ReflectiveOperationException e) {
+      // Ignore...
+    }
+  }
+}
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 43bc9d0..0e07f47 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -120,9 +120,10 @@
   /// CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
   /// CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
   /// CHECK-DAG:     <<Const6:i\d+>>  IntConstant 6
+  /// CHECK-DAG:     <<Const11:i\d+>> IntConstant 11
   /// CHECK-DAG:     <<Add1:i\d+>>    Add [<<Const1>>,<<Const2>>]
-  /// CHECK-DAG:     <<Add2:i\d+>>    Add [<<Const5>>,<<Const6>>]
-  /// CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Add2>>]
+  /// CHECK-DAG:                      Add [<<Const5>>,<<Const6>>]
+  /// CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Const11>>]
   /// CHECK-DAG:                      Return [<<Add3>>]
 
   /// CHECK-START: int Main.IntAddition2() constant_folding (after)
@@ -522,7 +523,7 @@
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<And:j\d+>>      And [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
   /// CHECK-START: long Main.AndLongInt() constant_folding (after)
@@ -567,7 +568,7 @@
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<Or:j\d+>>       Or [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<Or:j\d+>>       Or [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
   /// CHECK-START: long Main.OrLongInt() constant_folding (after)
@@ -612,7 +613,7 @@
   /// CHECK-DAG:     <<Const10L:j\d+>> LongConstant 10
   /// CHECK-DAG:     <<Const3:i\d+>>   IntConstant 3
   /// CHECK-DAG:     <<TypeConv:j\d+>> TypeConversion [<<Const3>>]
-  /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<Const10L>>,<<TypeConv>>]
+  /// CHECK-DAG:     <<Xor:j\d+>>      Xor [<<TypeConv>>,<<Const10L>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
   /// CHECK-START: long Main.XorLongInt() constant_folding (after)
@@ -749,7 +750,7 @@
   /// CHECK-START: long Main.Mul0(long) constant_folding (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
-  /// CHECK-DAG:     <<Mul:j\d+>>      Mul [<<Arg>>,<<Const0>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>      Mul [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Mul>>]
 
   /// CHECK-START: long Main.Mul0(long) constant_folding (after)
diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java
index 32122e4..865355c 100644
--- a/test/444-checker-nce/src/Main.java
+++ b/test/444-checker-nce/src/Main.java
@@ -16,11 +16,11 @@
 
 public class Main {
 
-  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.keepTest(Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main keepTest(Main m) {
@@ -31,7 +31,7 @@
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.thisTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.thisTest() instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main thisTest() {
@@ -45,7 +45,7 @@
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main newInstanceRemoveTest() {
     Main m = new Main();
@@ -57,7 +57,7 @@
   /// CHECK:         NullCheck
   /// CHECK:         ArrayGet
 
-  /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier (after)
   /// CHECK:         NewArray
   /// CHECK-NOT:     NullCheck
   /// CHECK:         ArrayGet
@@ -66,11 +66,11 @@
     return ms[0];
   }
 
-  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier (before)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.ifRemoveTest(boolean) instruction_simplifier (after)
   /// CHECK:         NewInstance
   /// CHECK-NOT:     NullCheck
   public Main ifRemoveTest(boolean flag) {
@@ -83,11 +83,11 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier (before)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.ifKeepTest(boolean) instruction_simplifier (after)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
   public Main ifKeepTest(boolean flag) {
@@ -98,10 +98,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.forRemoveTest(int) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main forRemoveTest(int count) {
     Main a = new Main();
@@ -114,10 +114,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.forKeepTest(int) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main forKeepTest(int count) {
     Main a = new Main();
@@ -132,10 +132,10 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.phiFlowRemoveTest(int) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main phiFlowRemoveTest(int count) {
     Main a = new Main();
@@ -154,10 +154,10 @@
     return n.g();
   }
 
-  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.phiFlowKeepTest(int) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main phiFlowKeepTest(int count) {
     Main a = new Main();
@@ -181,7 +181,7 @@
   /// CHECK-START: Main Main.scopeRemoveTest(int, Main) ssa_builder (after)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeRemoveTest(int count, Main a) {
     Main m = null;
@@ -196,10 +196,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeKeepTest(int, Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main scopeKeepTest(int count, Main a) {
     Main m = new Main();
@@ -214,10 +214,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeIfNotNullRemove(Main) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeIfNotNullRemove(Main m) {
     if (m != null) {
@@ -226,10 +226,10 @@
     return m;
   }
 
-  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier_after_types (before)
+  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier (before)
   /// CHECK:         NullCheck
 
-  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier_after_types (after)
+  /// CHECK-START: Main Main.scopeIfKeep(Main) instruction_simplifier (after)
   /// CHECK:         NullCheck
   public Main scopeIfKeep(Main m) {
     if (m == null) {
@@ -258,11 +258,11 @@
 class ListElement {
   private ListElement next;
 
-  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (before)
+  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier (before)
   /// CHECK:         NullCheck
   /// CHECK:         NullCheck
 
-  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier_after_types (after)
+  /// CHECK-START: boolean ListElement.isShorter(ListElement, ListElement) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   static boolean isShorter(ListElement x, ListElement y) {
     ListElement xTail = x;
diff --git a/test/445-checker-licm/src/Main.java b/test/445-checker-licm/src/Main.java
index 6ee8a4d..061fe6e 100644
--- a/test/445-checker-licm/src/Main.java
+++ b/test/445-checker-licm/src/Main.java
@@ -52,13 +52,13 @@
     return result;
   }
 
-  /// CHECK-START: int Main.innerDiv2() licm (before)
+  /// CHECK-START: int Main.innerMul() licm (before)
   /// CHECK-DAG: Mul loop:B4
 
-  /// CHECK-START: int Main.innerDiv2() licm (after)
+  /// CHECK-START: int Main.innerMul() licm (after)
   /// CHECK-DAG: Mul loop:B2
 
-  public static int innerDiv2() {
+  public static int innerMul() {
     int result = 0;
     for (int i = 0; i < 10; ++i) {
       for (int j = 0; j < 10; ++j) {
@@ -71,13 +71,13 @@
     return result;
   }
 
-  /// CHECK-START: int Main.innerDiv3(int, int) licm (before)
+  /// CHECK-START: int Main.divByA(int, int) licm (before)
   /// CHECK-DAG: Div loop:{{B\d+}}
 
-  /// CHECK-START: int Main.innerDiv3(int, int) licm (after)
+  /// CHECK-START: int Main.divByA(int, int) licm (after)
   /// CHECK-DAG: Div loop:{{B\d+}}
 
-  public static int innerDiv3(int a, int b) {
+  public static int divByA(int a, int b) {
     int result = 0;
     while (b < 5) {
       // a might be null, so we can't hoist the operation.
@@ -107,6 +107,63 @@
     return result;
   }
 
+  /// CHECK-START: int Main.divAndIntrinsic(int[]) licm (before)
+  /// CHECK-DAG: Div loop:{{B\d+}}
+
+  /// CHECK-START: int Main.divAndIntrinsic(int[]) licm (after)
+  /// CHECK-NOT: Div loop:{{B\d+}}
+
+  /// CHECK-START: int Main.divAndIntrinsic(int[]) licm (after)
+  /// CHECK-DAG: Div loop:none
+
+  public static int divAndIntrinsic(int[] array) {
+    int result = 0;
+    for (int i = 0; i < array.length; i++) {
+      // An intrinsic call, unlike a general method call, cannot modify the field value.
+      // As a result, the invariant division on the field can be moved out of the loop.
+      result += (staticField / 42) + Math.abs(array[i]);
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.invariantBoundIntrinsic(int) licm (before)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBoundIntrinsic(int) licm (after)
+  /// CHECK-NOT: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBoundIntrinsic(int) licm (after)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:none
+
+  public static int invariantBoundIntrinsic(int x) {
+    int result = 0;
+    // The intrinsic call to abs used as loop bound is invariant.
+    // As a result, the call itself can be moved out of the loop header.
+    for (int i = 0; i < Math.abs(x); i++) {
+      result += i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (before)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (after)
+  /// CHECK-NOT: InvokeStaticOrDirect loop:{{B\d+}}
+
+  /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (after)
+  /// CHECK-DAG: InvokeStaticOrDirect loop:none
+
+  public static int invariantBodyIntrinsic(int x, int y) {
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      // The intrinsic call to max used inside the loop is invariant.
+      // As a result, the call itself can be moved out of the loop body.
+      result += Math.max(x, y);
+    }
+    return result;
+  }
+
   public static int staticField = 42;
 
   public static void assertEquals(int expected, int actual) {
@@ -118,6 +175,11 @@
   public static void main(String[] args) {
     assertEquals(10, div());
     assertEquals(100, innerDiv());
+    assertEquals(18900, innerMul());
+    assertEquals(105, divByA(2, 0));
     assertEquals(12, arrayLength(new int[] { 4, 8 }));
+    assertEquals(21, divAndIntrinsic(new int[] { 4, -2, 8, -3 }));
+    assertEquals(45, invariantBoundIntrinsic(-10));
+    assertEquals(30, invariantBodyIntrinsic(2, 3));
   }
 }
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index c3d2759..3e6d1f4 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -127,7 +127,7 @@
   }
 
 
-  /// CHECK-START: void Main.constantIndexing2(int[]) BCE (before)
+  /// CHECK-START: void Main.$opt$noinline$constantIndexing2(int[]) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK: BoundsCheck
@@ -137,7 +137,7 @@
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
 
-  /// CHECK-START: void Main.constantIndexing2(int[]) BCE (after)
+  /// CHECK-START: void Main.$opt$noinline$constantIndexing2(int[]) BCE (after)
   /// CHECK: LessThanOrEqual
   /// CHECK: Deoptimize
   /// CHECK-NOT: BoundsCheck
@@ -151,12 +151,15 @@
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
 
-  static void constantIndexing2(int[] array) {
+  static void $opt$noinline$constantIndexing2(int[] array) {
     array[1] = 1;
     array[2] = 1;
     array[3] = 1;
     array[4] = 1;
     array[-1] = 1;
+    if (array[1] == 1) {
+      throw new Error("");
+    }
   }
 
 
@@ -616,15 +619,49 @@
     }
   }
 
+  static int[][] mA;
+
+  /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: InvokeStaticOrDirect
+  /// CHECK-DAG: ArraySet
+
+  /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (after)
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: ArrayGet
+  /// CHECK-DAG: InvokeStaticOrDirect
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+
+  static void dynamicBCEAndIntrinsic(int n) {
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j < n; j++) {
+        // Since intrinsic call cannot modify fields or arrays,
+        // dynamic BCE and hoisting can be applied to the inner loop.
+        mA[i][j] = Math.abs(mA[i][j]);
+      }
+    }
+  }
 
   static int foo() {
     try {
       assertIsManaged();
       // This will cause AIOOBE.
-      constantIndexing2(new int[3]);
+      $opt$noinline$constantIndexing2(new int[3]);
     } catch (ArrayIndexOutOfBoundsException e) {
       assertIsManaged();  // This is to ensure that single-frame deoptimization works.
-                          // Will need to be updated if constantIndexing2 is inlined.
+                          // Will need to be updated if $opt$noinline$constantIndexing2 is inlined.
       try {
         // This will cause AIOOBE.
         constantIndexingForward6(new int[3]);
@@ -1225,6 +1262,21 @@
       }
     }
 
+    mA = new int[4][4];
+    for (int i = 0; i < 4; i++) {
+      for (int j = 0; j < 4; j++) {
+        mA[i][j] = -1;
+      }
+    }
+    dynamicBCEAndIntrinsic(4);
+    for (int i = 0; i < 4; i++) {
+      for (int j = 0; j < 4; j++) {
+        if (mA[i][i] != 1) {
+          System.out.println("dynamic bce failed!");
+        }
+      }
+    }
+
     array = new int[7];
     pyramid1(array);
     if (!isPyramid(array)) {
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index f1f80ca..92cf807 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -72,49 +72,49 @@
 
 public class Main {
 
-  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testSimpleRemove() {
     Super s = new SubclassA();
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testSimpleKeep(Super) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testSimpleKeep(Super s) {
     ((SubclassA)s).$noinline$f();
   }
 
-  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.testClassRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public String testClassRemove() {
     Object s = SubclassA.class;
     return ((Class)s).getName();
   }
 
-  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier_after_types (before)
+  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.testClassKeep() instruction_simplifier (after)
   /// CHECK:         CheckCast
   public String testClassKeep() {
     Object s = SubclassA.class;
     return ((SubclassA)s).$noinline$h();
   }
 
-  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testIfRemove(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testIfRemove(int x) {
     Super s;
@@ -126,10 +126,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testIfKeep(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testIfKeep(int x) {
     Super s;
@@ -141,10 +141,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testForRemove(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testForRemove(int x) {
     Super s = new SubclassA();
@@ -156,10 +156,10 @@
     ((SubclassA)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testForKeep(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testForKeep(int x) {
     Super s = new SubclassA();
@@ -171,10 +171,10 @@
     ((SubclassC)s).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testPhiFromCall(int) instruction_simplifier (after)
   /// CHECK:         CheckCast
   public void testPhiFromCall(int i) {
     Object x;
@@ -186,11 +186,12 @@
     ((SubclassC)x).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
 
-  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOf(java.lang.Object) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOf(Object o) {
     if (o instanceof SubclassC) {
@@ -201,11 +202,101 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier_after_types (before)
+  public static boolean $inline$InstanceofSubclassB(Object o) { return o instanceof SubclassB; }
+  public static boolean $inline$InstanceofSubclassC(Object o) { return o instanceof SubclassC; }
+
+  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) ssa_builder (after)
+  /// CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
+  /// CHECK-DAG:     <<IOf1:z\d+>> InstanceOf
+  /// CHECK-DAG:                   NotEqual [<<IOf1>>,<<Cst1>>]
+  /// CHECK-DAG:     <<IOf2:z\d+>> InstanceOf
+  /// CHECK-DAG:                   Equal [<<IOf2>>,<<Cst0>>]
+
+  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (before)
+  /// CHECK:         CheckCast
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (after)
+  /// CHECK-NOT:     CheckCast
+  public void testInstanceOf_NotInlined(Object o) {
+    if ((o instanceof SubclassC) == true) {
+      ((SubclassC)o).$noinline$g();
+    }
+    if ((o instanceof SubclassB) != false) {
+      ((SubclassB)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) ssa_builder (after)
+  /// CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
+  /// CHECK-DAG:     <<IOf1:z\d+>> InstanceOf
+  /// CHECK-DAG:                   Equal [<<IOf1>>,<<Cst1>>]
+  /// CHECK-DAG:     <<IOf2:z\d+>> InstanceOf
+  /// CHECK-DAG:                   NotEqual [<<IOf2>>,<<Cst0>>]
+
+  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (before)
+  /// CHECK:         CheckCast
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) instruction_simplifier (after)
+  /// CHECK-NOT:     CheckCast
+  public void testNotInstanceOf_NotInlined(Object o) {
+    if ((o instanceof SubclassC) != true) {
+      // Empty branch to flip the condition.
+    } else {
+      ((SubclassC)o).$noinline$g();
+    }
+    if ((o instanceof SubclassB) == false) {
+      // Empty branch to flip the condition.
+    } else {
+      ((SubclassB)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) inliner (after)
+  /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+  /// CHECK-DAG:                   If [<<IOf>>]
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (before)
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:     CheckCast
+  public void testInstanceOf_Inlined(Object o) {
+    if (!$inline$InstanceofSubclassC(o)) {
+      // Empty branch to flip the condition.
+    } else {
+      ((SubclassC)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) inliner (after)
+  /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
+  /// CHECK-DAG:     <<Not:z\d+>>  BooleanNot [<<IOf>>]
+  /// CHECK-DAG:                   If [<<Not>>]
+
+  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (before)
+  /// CHECK:         CheckCast
+  /// CHECK-NOT:     CheckCast
+
+  /// CHECK-START: void Main.testNotInstanceOf_Inlined(java.lang.Object) instruction_simplifier_after_bce (after)
+  /// CHECK-NOT:     CheckCast
+  public void testNotInstanceOf_Inlined(Object o) {
+    if ($inline$InstanceofSubclassC(o)) {
+      ((SubclassC)o).$noinline$g();
+    }
+  }
+
+  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfKeep(java.lang.Object) instruction_simplifier (after)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
   public void testInstanceOfKeep(Object o) {
@@ -217,11 +308,11 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier (before)
   /// CHECK:         CheckCast
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfNested(java.lang.Object) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfNested(Object o) {
     if (o instanceof SubclassC) {
@@ -233,10 +324,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhi(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhi(int i) {
     Object o;
@@ -251,10 +342,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfInFor(int n) {
     Object o = new SubclassA();
@@ -268,10 +359,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfSubclass() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfSubclass() {
     Object o = new SubclassA();
@@ -280,10 +371,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhiSubclass(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhiSubclass(int i) {
     Object o;
@@ -298,10 +389,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfWithPhiTop(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfWithPhiTop(int i) {
     Object o;
@@ -316,10 +407,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfSubclassInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfSubclassInFor(int n) {
     Object o = new SubclassA();
@@ -333,10 +424,10 @@
     }
   }
 
-  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceOfTopInFor(int) instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOfTopInFor(int n) {
     Object o = new SubclassA();
@@ -361,10 +452,10 @@
   public SubclassA a = new SubclassA();
   public static SubclassA b = new SubclassA();
 
-  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInstanceFieldGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceFieldGetSimpleRemove() {
     Main m = new Main();
@@ -372,10 +463,10 @@
     ((SubclassA)a).$noinline$g();
   }
 
-  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testStaticFieldGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testStaticFieldGetSimpleRemove() {
     Super b = Main.b;
@@ -384,36 +475,36 @@
 
   public SubclassA $noinline$getSubclass() { throw new RuntimeException(); }
 
-  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testArraySimpleRemove() {
     Super[] b = new SubclassA[10];
     SubclassA[] c = (SubclassA[])b;
   }
 
-  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testInvokeSimpleRemove() {
     Super b = $noinline$getSubclass();
     ((SubclassA)b).$noinline$g();
   }
-  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (before)
+  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier (before)
   /// CHECK:         CheckCast
 
-  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (after)
+  /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier (after)
   /// CHECK-NOT:     CheckCast
   public void testArrayGetSimpleRemove() {
     Super[] a = new SubclassA[10];
     ((SubclassA)a[0]).$noinline$g();
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) ssa_builder (after)
   /// CHECK:         LoadException klass:java.lang.ArithmeticException can_be_null:false exact:false
   public int testLoadExceptionInCatchNonExact(int x, int y) {
     try {
@@ -423,7 +514,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) ssa_builder (after)
   /// CHECK:         LoadException klass:FinalException can_be_null:false exact:true
   public int testLoadExceptionInCatchExact(int x) {
     try {
@@ -437,7 +528,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) reference_type_propagation (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) ssa_builder (after)
   /// CHECK:         LoadException klass:java.lang.Throwable can_be_null:false exact:false
   public int testLoadExceptionInCatchAll(int x, int y) {
     try {
@@ -458,7 +549,7 @@
     return genericFinal.get();
   }
 
-  /// CHECK-START: SubclassC Main.inlineGenerics() reference_type_propagation (after)
+  /// CHECK-START: SubclassC Main.inlineGenerics() ssa_builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SubclassC exact:false
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
@@ -470,7 +561,7 @@
     return c;
   }
 
-  /// CHECK-START: Final Main.inlineGenericsFinal() reference_type_propagation (after)
+  /// CHECK-START: Final Main.inlineGenericsFinal() ssa_builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Final exact:true
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
@@ -512,7 +603,7 @@
     return new SubclassA();
   }
 
-  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>> Phi klass:Super
   /// CHECK:                   NullCheck [<<Phi>>] klass:Super
 
@@ -534,7 +625,7 @@
   /// CHECK:                        CheckCast [<<Param>>,<<Clazz>>]
   /// CHECK:                        BoundType [<<Param>>] can_be_null:true
 
-  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) instruction_simplifier (after)
   /// CHECK:      <<This:l\d+>>     ParameterValue
   /// CHECK:      <<Param:l\d+>>    ParameterValue
   /// CHECK:      <<Clazz:l\d+>>    LoadClass
@@ -546,7 +637,7 @@
   }
 
 
-  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) reference_type_propagation (after)
+  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) ssa_builder (after)
   /// CHECK:      ParameterValue klass:Main can_be_null:false exact:false
   /// CHECK:      ParameterValue klass:Super can_be_null:true exact:false
   /// CHECK:      ParameterValue
@@ -562,7 +653,7 @@
 
   private int mainField = 0;
 
-  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) reference_type_propagation (after)
+  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private SuperInterface getWiderType(boolean cond, Interface a, OtherInterface b) {
@@ -618,7 +709,7 @@
     getSuper();
   }
 
-  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) ssa_builder (after)
   /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
   /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
   /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
@@ -631,23 +722,7 @@
     }
   }
 
-  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
-  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
-  /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
-  /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
-  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
-    Main a = null;
-    Main b = null;
-    while (a == null) {
-      if (cond) {
-        a = b;
-      } else {
-        b = a;
-      }
-    }
-  }
-
-  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() ssa_builder (after)
   /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
   /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
   /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
diff --git a/test/455-checker-gvn/expected.txt b/test/455-checker-gvn/expected.txt
index 8351c19..c1679c7 100644
--- a/test/455-checker-gvn/expected.txt
+++ b/test/455-checker-gvn/expected.txt
@@ -1 +1,3 @@
 14
+0
+10
diff --git a/test/455-checker-gvn/src/Main.java b/test/455-checker-gvn/src/Main.java
index 9824f27..cea0959 100644
--- a/test/455-checker-gvn/src/Main.java
+++ b/test/455-checker-gvn/src/Main.java
@@ -15,8 +15,14 @@
  */
 
 public class Main {
+
+  private static int mX = 2;
+  private static int mY = -3;
+
   public static void main(String[] args) {
     System.out.println(foo(3, 4));
+    System.out.println(mulAndIntrinsic());
+    System.out.println(directIntrinsic(-5));
   }
 
   /// CHECK-START: int Main.foo(int, int) GVN (before)
@@ -35,7 +41,50 @@
     return sum1 + sum2;
   }
 
-  public static long bar(int i) {
-    return i;
+  /// CHECK-START: int Main.mulAndIntrinsic() GVN (before)
+  /// CHECK: StaticFieldGet
+  /// CHECK: StaticFieldGet
+  /// CHECK: Mul
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK: StaticFieldGet
+  /// CHECK: StaticFieldGet
+  /// CHECK: Mul
+  /// CHECK: Add
+
+  /// CHECK-START: int Main.mulAndIntrinsic() GVN (after)
+  /// CHECK: StaticFieldGet
+  /// CHECK: StaticFieldGet
+  /// CHECK: Mul
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: Mul
+  /// CHECK: Add
+
+  public static int mulAndIntrinsic() {
+    // The intermediate call to abs() does not kill
+    // the common subexpression on the multiplication.
+    int mul1 = mX * mY;
+    int abs  = Math.abs(mul1);
+    int mul2 = mY * mX;
+    return abs + mul2;
   }
+
+  /// CHECK-START: int Main.directIntrinsic(int) GVN (before)
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK: Add
+
+  /// CHECK-START: int Main.directIntrinsic(int) GVN (after)
+  /// CHECK: InvokeStaticOrDirect
+  /// CHECK-NOT: InvokeStaticOrDirect
+  /// CHECK: Add
+
+  public static int directIntrinsic(int x) {
+    // Here, the two calls to abs() themselves can be replaced with just one.
+    int abs1 = Math.abs(x);
+    int abs2 = Math.abs(x);
+    return abs1 + abs2;
+  }
+
 }
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 6151fc1..0fd7801 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -288,7 +288,7 @@
   /// CHECK-START: long Main.Mul1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
-  /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Arg>>,<<Const1>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const1>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
   /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
@@ -323,7 +323,7 @@
   /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const128:j\d+>>  LongConstant 128
-  /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Arg>>,<<Const128>>]
+  /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Const128>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
   /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
@@ -705,7 +705,7 @@
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Neg1>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg2>>,<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
   /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
@@ -841,13 +841,13 @@
   /// CHECK-DAG:     <<ConstF1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Xor1:i\d+>>     Xor [<<Arg>>,<<ConstF1>>]
   /// CHECK-DAG:     <<Xor2:i\d+>>     Xor [<<Xor1>>,<<ConstF1>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Xor1>>,<<Xor2>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Xor2>>,<<Xor1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
   /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Not>>,<<Arg>>]
+  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Not>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
   /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
@@ -1005,7 +1005,7 @@
   /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       If [<<Cond>>]
 
   /// CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (after)
@@ -1064,7 +1064,7 @@
   /// CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
-  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const0>>,<<Arg>>]
+  /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       If [<<Cond>>]
 
   /// CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (after)
@@ -1234,7 +1234,7 @@
   /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
-  /// CHECK:                            Mul [<<Arg>>,<<Const31>>]
+  /// CHECK:                            Mul [<<Const31>>,<<Arg>>]
 
   /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 5080f142..2222e0f 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -16,6 +16,14 @@
 
 public final class Main {
 
+  public final static class Helper {
+    private int foo = 3;
+
+    public int getFoo() {
+        return foo;
+    }
+  }
+
   public void invokeVirtual() {
   }
 
@@ -31,25 +39,25 @@
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() ssa_builder (after)
-  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual
+  /// CHECK-START: int Main.inlineSharpenHelperInvoke() ssa_builder (after)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual {{.*\.getFoo.*}}
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
-  /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      InvokeVirtual
+  /// CHECK-START: int Main.inlineSharpenHelperInvoke() inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect {{.*\.getFoo.*}}
+  /// CHECK-NOT:                      InvokeVirtual {{.*\.getFoo.*}}
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
+  /// CHECK-START: int Main.inlineSharpenHelperInvoke() inliner (after)
   /// CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
   /// CHECK-DAG:                      Return [<<Field>>]
 
-  public static int inlineSharpenStringInvoke() {
-    return "Foo".length();
+  public static int inlineSharpenHelperInvoke() {
+    return new Helper().getFoo();
   }
 
   public static void main(String[] args) {
     inlineSharpenInvokeVirtual(new Main());
-    if (inlineSharpenStringInvoke() != 3) {
+    if (inlineSharpenHelperInvoke() != 3) {
       throw new Error("Expected 3");
     }
   }
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index 375a3fc..4f89e91 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -40,15 +40,17 @@
       uint32_t value = 0;
       CHECK(GetVReg(m, 0, kIntVReg, &value));
       CHECK_EQ(value, 42u);
-    } else if (m_name.compare("testIntervalHole") == 0) {
+    } else if (m_name.compare("$opt$noinline$testIntervalHole") == 0) {
+      uint32_t number_of_dex_registers = m->GetCodeItem()->registers_size_;
+      uint32_t dex_register_of_first_parameter = number_of_dex_registers - 2;
       found_method_ = true;
       uint32_t value = 0;
       if (GetCurrentQuickFrame() != nullptr &&
           GetCurrentOatQuickMethodHeader()->IsOptimized() &&
           !Runtime::Current()->IsDebuggable()) {
-        CHECK_EQ(GetVReg(m, 0, kIntVReg, &value), false);
+        CHECK_EQ(GetVReg(m, dex_register_of_first_parameter, kIntVReg, &value), false);
       } else {
-        CHECK(GetVReg(m, 0, kIntVReg, &value));
+        CHECK(GetVReg(m, dex_register_of_first_parameter, kIntVReg, &value));
         CHECK_EQ(value, 1u);
       }
     }
diff --git a/test/466-get-live-vreg/src/Main.java b/test/466-get-live-vreg/src/Main.java
index d036a24..19032601 100644
--- a/test/466-get-live-vreg/src/Main.java
+++ b/test/466-get-live-vreg/src/Main.java
@@ -31,7 +31,7 @@
     }
   }
 
-  static void testIntervalHole(int arg, boolean test) {
+  static void $opt$noinline$testIntervalHole(int arg, boolean test) {
     // Move the argument to callee save to ensure it is in
     // a readable register.
     moveArgToCalleeSave();
@@ -44,6 +44,9 @@
       // The environment use of `arg` should not make it live.
       doStaticNativeCallLiveVreg();
     }
+    if (staticField1 == 2) {
+      throw new Error("");
+    }
   }
 
   static native void doStaticNativeCallLiveVreg();
@@ -67,7 +70,7 @@
   static void testWrapperIntervalHole(int arg, boolean test) {
     try {
       Thread.sleep(0);
-      testIntervalHole(arg, test);
+      $opt$noinline$testIntervalHole(arg, test);
     } catch (Exception e) {
       throw new Error(e);
     }
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 41bec05..c2a2a10 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -25,13 +25,14 @@
 class ClassWithFinals {
   public final int x;
   public ClassWithFinals obj;
+  public static boolean doThrow = false;
 
   /// CHECK-START: void ClassWithFinals.<init>(boolean) register (after)
   /// CHECK:      MemoryBarrier kind:StoreStore
   /// CHECK-NEXT: ReturnVoid
   public ClassWithFinals(boolean cond) {
     x = 0;
-    if (cond) {
+    if (doThrow) {
       // avoid inlining
       throw new RuntimeException();
     }
diff --git a/test/477-checker-bound-type/src/Main.java b/test/477-checker-bound-type/src/Main.java
index c873702..0f65e44 100644
--- a/test/477-checker-bound-type/src/Main.java
+++ b/test/477-checker-bound-type/src/Main.java
@@ -17,7 +17,7 @@
 
 public class Main {
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) ssa_builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForIf(Object a) {
     if (a != null) {
@@ -27,7 +27,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) ssa_builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
@@ -37,7 +37,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) ssa_builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForIf(Object a) {
     if (a == null) {
@@ -47,7 +47,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) ssa_builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index 6b4da9d..d0b33b9 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -163,8 +163,8 @@
   /// CHECK:         <<Arg:z\d+>>  StaticFieldGet  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>]
   /// CHECK:                       If [<<Arg>>]    liveness:<<IfLiv:\d+>>
   /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
-  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
   /// CHECK:                       Exit
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
   /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse>>
   /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
   /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse>>
diff --git a/test/490-checker-inline/src/Main.java b/test/490-checker-inline/src/Main.java
index 21a0189..2e2deea 100644
--- a/test/490-checker-inline/src/Main.java
+++ b/test/490-checker-inline/src/Main.java
@@ -39,7 +39,7 @@
   /// CHECK-DAG:     InvokeInterface
 
   /// CHECK-START: void Main.testMethod() inliner (after)
-  /// CHECK-NOT:     Invoke{{.*}}
+  /// CHECK-NOT:     Invoke{{.*Object\.<init>.*}}
 
   public static void testMethod() {
     createMain().invokeVirtual();
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index a8b6307..3106ce4 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -32,14 +32,14 @@
   }
 
   /// CHECK-START: void Main.main(java.lang.String[]) ssa_builder (after)
-  /// CHECK:           InvokeStaticOrDirect
+  /// CHECK:           InvokeStaticOrDirect {{.*Main.<init>.*}}
   /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
   /// CHECK-NOT:       ClinitCheck
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
-  /// CHECK-NOT:       InvokeStaticOrDirect
+  /// CHECK-NOT:       InvokeStaticOrDirect {{.*Main.<init>.*}}
   /// CHECK-NOT:       InvokeVirtual
   /// CHECK-NOT:       InvokeInterface
 
diff --git a/test/493-checker-inline-invoke-interface/src/Main.java b/test/493-checker-inline-invoke-interface/src/Main.java
index 44b727f..171405c 100644
--- a/test/493-checker-inline-invoke-interface/src/Main.java
+++ b/test/493-checker-inline-invoke-interface/src/Main.java
@@ -36,7 +36,7 @@
   /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
-  /// CHECK-NOT:       Invoke{{.*}}
+  /// CHECK-NOT:       Invoke{{.*Object\.<init>.*}}
   public static void main(String[] args) {
     Itf itf = bar();
     itf.foo();
diff --git a/test/529-checker-unresolved/expected.txt b/test/529-checker-unresolved/expected.txt
index 1e7dbfe..1590a2a 100644
--- a/test/529-checker-unresolved/expected.txt
+++ b/test/529-checker-unresolved/expected.txt
@@ -5,3 +5,6 @@
 UnresolvedClass.superMethod()
 instanceof ok
 checkcast ok
+UnresolvedClass.directCall()
+UnresolvedClass.directCall()
+UnresolvedClass.directCall()
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 5219c04..872fa6d 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -138,6 +138,27 @@
     callUnresolvedInstanceFieldAccess(c);
     testInstanceOf(m);
     testCheckCast(m);
+    testLicm(2);
+  }
+
+  /// CHECK-START: void Main.testLicm(int) licm (before)
+  /// CHECK:      <<Class:l\d+>>        LoadClass                                     loop:B2
+  /// CHECK-NEXT: <<Clinit:l\d+>>       ClinitCheck [<<Class>>]                       loop:B2
+  /// CHECK-NEXT: <<New:l\d+>>          NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>]  loop:B2
+  /// CHECK-NEXT:                       InvokeUnresolved [<<New>>]                    loop:B2
+
+  /// CHECK-START: void Main.testLicm(int) licm (after)
+  /// CHECK:      <<Class:l\d+>>        LoadClass                                     loop:none
+  /// CHECK-NEXT: <<Clinit:l\d+>>       ClinitCheck [<<Class>>]                       loop:none
+  /// CHECK:      <<New:l\d+>>          NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>]  loop:B2
+  /// CHECK-NEXT:                       InvokeUnresolved [<<New>>]                    loop:B2
+  static public void testLicm(int count) {
+    // Test to make sure we keep the initialization check after loading an unresolved class.
+    UnresolvedClass c;
+    int i = 0;
+    do {
+      c = new UnresolvedClass();
+    } while (i++ != count);
   }
 
   public static void expectEquals(byte expected, byte result) {
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index e827b1e..f1d9a37 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -26,7 +26,7 @@
   //
 
   /// CHECK-START: int Main.linear(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -40,7 +40,7 @@
   }
 
   /// CHECK-START: int Main.linearDown(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -54,7 +54,7 @@
   }
 
   /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -69,7 +69,7 @@
   }
 
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -84,7 +84,7 @@
   }
 
   /// CHECK-START: int Main.hiddenStride(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.hiddenStride(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -101,7 +101,7 @@
   }
 
   /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -116,7 +116,7 @@
   }
 
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -134,7 +134,7 @@
   }
 
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -156,7 +156,7 @@
   }
 
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -173,7 +173,7 @@
   }
 
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -194,7 +194,7 @@
   }
 
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -208,7 +208,7 @@
   }
 
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -223,8 +223,8 @@
   }
 
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -241,7 +241,7 @@
   }
 
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -255,10 +255,10 @@
   }
 
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static int linearByTwoSkip2(int x[]) {
     int result = 0;
@@ -270,7 +270,7 @@
   }
 
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -287,7 +287,7 @@
   }
 
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -305,10 +305,10 @@
   }
 
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
@@ -323,7 +323,7 @@
   }
 
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -341,10 +341,10 @@
   }
 
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
@@ -359,7 +359,7 @@
   }
 
   /// CHECK-START: int Main.linearForNEUp() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -374,7 +374,7 @@
   }
 
   /// CHECK-START: int Main.linearForNEDown() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -389,7 +389,7 @@
   }
 
   /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -405,7 +405,7 @@
   }
 
   /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -421,10 +421,10 @@
   }
 
   /// CHECK-START: int Main.linearShort() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.linearShort() BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -437,7 +437,7 @@
   }
 
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -458,20 +458,20 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   //
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnTwoArrayLengths(int n) {
     int[] a = new int[n];
@@ -488,20 +488,20 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   //
   /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnOneArrayLength(int n) {
     int[] a = new int[n];
@@ -518,20 +518,20 @@
   }
 
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   //
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnParameter(int n) {
     int[] a = new int[n];
@@ -548,32 +548,32 @@
   }
 
   /// CHECK-START: void Main.linearTriangularVariations(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   //
   /// CHECK-START: void Main.linearTriangularVariations(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void linearTriangularVariations(int n) {
     int[] a = new int[n];
@@ -616,22 +616,22 @@
   }
 
   /// CHECK-START: void Main.bubble(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: If
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: If
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
   //
   /// CHECK-START: void Main.bubble(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: If
-  /// CHECK-DAG: ArraySet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: If
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static void bubble(int[] a) {
     for (int i = a.length; --i >= 0;) {
@@ -646,7 +646,7 @@
   }
 
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -664,7 +664,7 @@
   }
 
   /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -685,10 +685,10 @@
   }
 
   /// CHECK-START: int Main.periodicSequence4(int) BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -713,7 +713,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp1() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -728,7 +728,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp2() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -743,7 +743,7 @@
   }
 
   /// CHECK-START: int Main.justRightUp3() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -758,10 +758,10 @@
   }
 
   /// CHECK-START: int Main.justOOBUp() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justOOBUp() BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -774,7 +774,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown1() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -789,7 +789,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown2() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -804,7 +804,7 @@
   }
 
   /// CHECK-START: int Main.justRightDown3() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
@@ -819,10 +819,10 @@
   }
 
   /// CHECK-START: int Main.justOOBDown() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.justOOBDown() BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -835,10 +835,10 @@
   }
 
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
     for (int i = -1; i < x.length; i++) {
@@ -847,10 +847,10 @@
   }
 
   /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
     for (int i = 0; i <= x.length; i++) {
@@ -859,10 +859,10 @@
   }
 
   /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -873,10 +873,10 @@
   }
 
   /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@@ -887,14 +887,14 @@
   }
 
   /// CHECK-START: int[] Main.multiply1() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   //
   /// CHECK-START: int[] Main.multiply1() BCE (after)
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   /// CHECK-NOT: Deoptimize
   private static int[] multiply1() {
     int[] a = new int[10];
@@ -912,14 +912,14 @@
   }
 
   /// CHECK-START: int[] Main.multiply2() BCE (before)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   //
   /// CHECK-START: int[] Main.multiply2() BCE (after)
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
   static int[] multiply2() {
     int[] a = new int[10];
     try {
@@ -936,24 +936,24 @@
   }
 
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
-  /// CHECK-DAG: StaticFieldGet
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: StaticFieldSet
+  /// CHECK: StaticFieldGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: StaticFieldSet
   //
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
-  /// CHECK-DAG: StaticFieldGet
+  /// CHECK: StaticFieldGet
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: ArrayLength
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: StaticFieldSet
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
+  /// CHECK: ArrayGet
+  /// CHECK: StaticFieldSet
+  /// CHECK: Exit
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   private static int linearDynamicBCE1(int[] x, int lo, int hi) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
@@ -963,24 +963,24 @@
   }
 
   /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
-  /// CHECK-DAG: StaticFieldGet
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: StaticFieldSet
+  /// CHECK: StaticFieldGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: StaticFieldSet
   //
   /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
-  /// CHECK-DAG: StaticFieldGet
+  /// CHECK: StaticFieldGet
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: ArrayLength
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: StaticFieldSet
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
+  /// CHECK: ArrayGet
+  /// CHECK: StaticFieldSet
+  /// CHECK: Exit
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
     int result = 0;
     for (int i = lo; i < hi; i++) {
@@ -990,19 +990,19 @@
   }
 
   /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   //
   /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: ArrayLength
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: ArrayGet
   private static int wrapAroundDynamicBCE(int[] x) {
     int w = 9;
     int result = 0;
@@ -1014,19 +1014,19 @@
   }
 
   /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   //
   /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: ArrayLength
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: ArrayGet
   private static int periodicDynamicBCE(int[] x) {
     int k = 0;
     int result = 0;
@@ -1038,20 +1038,20 @@
   }
 
   /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   //
   /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: ArrayLength
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
+  /// CHECK: ArrayGet
+  /// CHECK: Exit
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
     // This loop could be infinite for hi = max int. Since i is also used
     // as subscript, however, dynamic bce can proceed.
@@ -1063,16 +1063,16 @@
   }
 
   /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   //
   /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   /// CHECK-NOT: Deoptimize
   static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
     // As above, but now the index is not used as subscript,
@@ -1085,16 +1085,16 @@
   }
 
   /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   //
   /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   /// CHECK-NOT: Deoptimize
   static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
     int result = 0;
@@ -1107,41 +1107,41 @@
   }
 
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: NotEqual
-  /// CHECK-DAG: If
-  /// CHECK-DAG: If
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: If
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: NotEqual
+  /// CHECK: If
+  /// CHECK: If
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: If
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
   //
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
-  /// CHECK-DAG: NullCheck
-  /// CHECK-DAG: ArrayLength
-  /// CHECK-DAG: NotEqual
-  /// CHECK-DAG: If
-  /// CHECK-DAG: If
+  /// CHECK: NullCheck
+  /// CHECK: ArrayLength
+  /// CHECK: NotEqual
+  /// CHECK: If
+  /// CHECK: If
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: If
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: If
+  /// CHECK: Deoptimize
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK-NOT: BoundsCheck
-  /// CHECK-DAG: Exit
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
-  /// CHECK-DAG: Deoptimize
+  /// CHECK: Exit
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: ArrayGet
   static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
     // Deliberately test array length on a before the loop so that only bounds checks
@@ -1167,27 +1167,27 @@
   }
 
   /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
-  /// CHECK-DAG: If
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
-  /// CHECK-DAG: BoundsCheck
-  /// CHECK-DAG: ArrayGet
+  /// CHECK: If
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArrayGet
   //
   /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
   /// CHECK-DAG: If
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index cadf706..baee7b3 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -458,16 +458,14 @@
   }
 
   /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (before)
-  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
 
   /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (after)
-  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
-  /// CHECK: ArraySet
-  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+  /// CHECK:     {{f\d+}} ArrayGet
+  /// CHECK-NOT: {{f\d+}} ArrayGet
 
-  // I/F, J/D aliasing should keep the load/store.
+  // I/F, J/D aliasing should not happen any more and LSE should eliminate the load.
   static float test19(float[] fa1, float[] fa2) {
     fa1[0] = fa2[0];
     return fa1[0];
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 3f65d5a..be666e9 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -47,7 +47,7 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeVirtual
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}}
   /// CHECK-NOT:      test
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java
index e9f16c0..9a9f0b6 100644
--- a/test/540-checker-rtp-bug/src/Main.java
+++ b/test/540-checker-rtp-bug/src/Main.java
@@ -21,14 +21,14 @@
 }
 
 public class Main {
-  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) ssa_builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
   /// CHECK:    <<Ret:l\d+>>     BoundType [<<Phi>>] klass:Final
   /// CHECK:                     Return [<<Ret>>]
 
-  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier_after_types (after)
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier (after)
   /// CHECK:    <<Phi:l\d+>>     Phi
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
@@ -43,7 +43,7 @@
     return (Final) x;
   }
 
-  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) ssa_builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
@@ -65,7 +65,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) ssa_builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<NC:l\d+>>      NullCheck [<<Phi>>]
   /// CHECK:    <<Ret:l\d+>>     InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString
diff --git a/test/549-checker-types-merge/src/Main.java b/test/549-checker-types-merge/src/Main.java
index dc27f10..917073b 100644
--- a/test/549-checker-types-merge/src/Main.java
+++ b/test/549-checker-types-merge/src/Main.java
@@ -38,14 +38,14 @@
 
 public class Main {
 
-  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:Main
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeNullContant(boolean cond) {
     return cond ? null : new Main();
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassExtendsB b) {
@@ -53,7 +53,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassSuper b) {
@@ -61,7 +61,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassSuper a, ClassSuper b) {
@@ -69,7 +69,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassOtherSuper a, ClassSuper b) {
@@ -77,7 +77,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClassWithInterface(boolean cond, ClassImplementsInterfaceA a, InterfaceSuper b) {
@@ -85,7 +85,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClassWithInterface(boolean cond, ClassSuper a, InterfaceSuper b) {
@@ -93,7 +93,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceSuper b) {
@@ -101,7 +101,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceSuper b) {
@@ -109,7 +109,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) reference_type_propagation (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceExtendsB b) {
@@ -117,7 +117,7 @@
     return cond ? a : b;
   }
 
-    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) reference_type_propagation (after)
+    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) ssa_builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceOtherSuper b) {
diff --git a/test/552-checker-primitive-typeprop/expected.txt b/test/552-checker-primitive-typeprop/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/expected.txt
diff --git a/test/552-checker-primitive-typeprop/info.txt b/test/552-checker-primitive-typeprop/info.txt
new file mode 100644
index 0000000..9d69056
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/info.txt
@@ -0,0 +1,2 @@
+Test that phis with environment uses which can be properly typed are kept
+in --debuggable mode.
\ No newline at end of file
diff --git a/test/552-checker-primitive-typeprop/smali/ArrayGet.smali b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
new file mode 100644
index 0000000..042fa0c
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
@@ -0,0 +1,245 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LArrayGet;
+.super Ljava/lang/Object;
+
+
+# Test phi with fixed-type ArrayGet as an input and a matching second input.
+# The phi should be typed accordingly.
+
+## CHECK-START: void ArrayGet.matchingFixedType(float[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFixedType(float[], float) ssa_builder (after)
+## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
+## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
+## CHECK-DAG:  {{f\d+}}      Phi [<<Aget>>,<<Arg1>>] reg:0
+.method public static matchingFixedType([FF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-float v2, v0, v1  # float use fixes type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with fixed-type ArrayGet as an input and a conflicting second input.
+# The phi should be eliminated due to the conflict.
+
+## CHECK-START: void ArrayGet.conflictingFixedType(float[], int) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType(float[], int) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static conflictingFixedType([FI)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-float v2, v0, v1  # float use fixes type
+
+  if-eqz p1, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Same test as the one above, only this time tests that type of ArrayGet is not
+# changed.
+
+## CHECK-START: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK:     {{i\d+}} ArrayGet
+.method public static conflictingFixedType2([IF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value
+  add-int v2, v0, v1    # int use fixes type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with free-type ArrayGet as an input and a matching second input.
+# The phi should be typed accordingly.
+
+## CHECK-START: void ArrayGet.matchingFreeType(float[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFreeType(float[], float) ssa_builder (after)
+## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
+## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
+## CHECK-DAG:                ArraySet [{{l\d+}},{{i\d+}},<<Aget>>]
+## CHECK-DAG:  {{f\d+}}      Phi [<<Aget>>,<<Arg1>>] reg:0
+.method public static matchingFreeType([FF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value, should be float but has no typed use
+  aput v0, p0, v1       # aput does not disambiguate the type
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test phi with free-type ArrayGet as an input and a conflicting second input.
+# The phi will be kept and typed according to the second input despite the
+# conflict.
+
+## CHECK-START: void ArrayGet.conflictingFreeType(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFreeType(int[], float) ssa_builder (after)
+## CHECK-NOT: Phi
+
+.method public static conflictingFreeType([IF)V
+  .registers 8
+
+  const v0, 0x0
+  const v1, 0x1
+
+  aget v0, p0, v0       # read value, should be int but has no typed use
+  aput v0, p0, v1
+
+  float-to-int v2, p1
+  if-eqz v2, :after
+  move v0, p1
+  :after
+  # v0 = Phi [ArrayGet, Arg1] => float
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+
+# Test that real use of ArrayGet is propagated through phis. The following test
+# case uses ArrayGet indirectly through two phis. It also creates an unused
+# conflicting phi which should not be preserved.
+
+## CHECK-START: void ArrayGet.conflictingPhiUses(int[], float, boolean, boolean, boolean) ssa_builder (after)
+## CHECK:         InvokeStaticOrDirect env:[[{{i\d+}},{{i\d+}},_,{{i\d+}},{{.*}}
+
+.method public static conflictingPhiUses([IFZZZ)V
+  .registers 10
+
+  const v0, 0x0
+
+  # Create v1 = Phi [0x0, int ArrayGet]
+  move v1, v0
+  if-eqz p2, :else1
+  aget v1, p0, v0
+  :else1
+
+  # Create v2 = Phi [v1, float]
+  move v2, v1
+  if-eqz p3, :else2
+  move v2, p1
+  :else2
+
+  # Create v3 = Phi [v1, int]
+  move v3, v1
+  if-eqz p4, :else3
+  move v3, v0
+  :else3
+
+  # Use v3 as int.
+  add-int/lit8 v4, v3, 0x2a
+
+  # Create env uses.
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+
+  return-void
+.end method
+
+# Test that the right ArrayGet equivalent is always selected. The following test
+# case uses ArrayGet as float through one phi and as an indeterminate type through
+# another. The situation needs to be resolved so that only one instruction
+# remains.
+
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) ssa_builder (after)
+## CHECK:         {{f\d+}} ArrayGet
+
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) ssa_builder (after)
+## CHECK-NOT:     {{i\d+}} ArrayGet
+
+.method public static typedVsUntypedPhiUse([FFZZ)V
+  .registers 10
+
+  const v0, 0x0
+
+  # v1 = float ArrayGet
+  aget v1, p0, v0
+
+  # Create v2 = Phi [v1, 0.0f]
+  move v2, v1
+  if-eqz p2, :else1
+  move v2, v0
+  :else1
+
+  # Use v2 as float
+  cmpl-float v2, v2, p1
+
+  # Create v3 = Phi [v1, 0.0f]
+  move v3, v1
+  if-eqz p3, :else2
+  move v3, v0
+  :else2
+
+  # Use v3 without a determinate type.
+  aput v3, p0, v0
+
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/smali/ArraySet.smali b/test/552-checker-primitive-typeprop/smali/ArraySet.smali
new file mode 100644
index 0000000..57d8606
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/ArraySet.smali
@@ -0,0 +1,51 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LArraySet;
+.super Ljava/lang/Object;
+
+# Test ArraySet on int[] and float[] arrays. The input should be typed accordingly.
+# Note that the input is a Phi to make sure primitive type propagation is re-run
+# on the replaced inputs.
+
+## CHECK-START: void ArraySet.ambiguousSet(int[], float[], boolean) ssa_builder (after)
+## CHECK-DAG:     <<IntArray:l\d+>>    ParameterValue klass:int[]
+## CHECK-DAG:     <<IntA:i\d+>>        IntConstant 0
+## CHECK-DAG:     <<IntB:i\d+>>        IntConstant 1073741824
+## CHECK-DAG:     <<IntPhi:i\d+>>      Phi [<<IntA>>,<<IntB>>] reg:0
+## CHECK-DAG:     <<IntNC:l\d+>>       NullCheck [<<IntArray>>]
+## CHECK-DAG:                          ArraySet [<<IntNC>>,{{i\d+}},<<IntPhi>>]
+
+## CHECK-DAG:     <<FloatArray:l\d+>>  ParameterValue klass:float[]
+## CHECK-DAG:     <<FloatA:f\d+>>      FloatConstant 0
+## CHECK-DAG:     <<FloatB:f\d+>>      FloatConstant 2
+## CHECK-DAG:     <<FloatPhi:f\d+>>    Phi [<<FloatA>>,<<FloatB>>] reg:0
+## CHECK-DAG:     <<FloatNC:l\d+>>     NullCheck [<<FloatArray>>]
+## CHECK-DAG:                          ArraySet [<<FloatNC>>,{{i\d+}},<<FloatPhi>>]
+
+.method public static ambiguousSet([I[FZ)V
+  .registers 8
+
+  const v0, 0x0
+  if-eqz p2, :else
+  const v0, 0x40000000
+  :else
+  # v0 = Phi [0.0f, 2.0f]
+
+  const v1, 0x1
+  aput v0, p0, v1
+  aput v0, p1, v1
+
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
new file mode 100644
index 0000000..395feaa
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
@@ -0,0 +1,52 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSsaBuilder;
+.super Ljava/lang/Object;
+
+# Check that a dead phi with a live equivalent is replaced in an environment. The
+# following test case throws an exception and uses v0 afterwards. However, v0
+# contains a phi that is interpreted as int for the environment, and as float for
+# instruction use. SsaBuilder must substitute the int variant before removing it,
+# otherwise running the code with an array short enough to throw will crash at
+# runtime because v0 is undefined.
+
+## CHECK-START: int SsaBuilder.environmentPhi(boolean, int[]) ssa_builder (after)
+## CHECK-DAG:     <<Cst0:f\d+>>  FloatConstant 0
+## CHECK-DAG:     <<Cst2:f\d+>>  FloatConstant 2
+## CHECK-DAG:     <<Phi:f\d+>>   Phi [<<Cst0>>,<<Cst2>>]
+## CHECK-DAG:                    BoundsCheck env:[[<<Phi>>,{{i\d+}},{{z\d+}},{{l\d+}}]]
+
+.method public static environmentPhi(Z[I)I
+  .registers 4
+
+  const v0, 0x0
+  if-eqz p0, :else
+  const v0, 0x40000000
+  :else
+  # v0 = phi that can be both int and float
+
+  :try_start
+  const v1, 0x3
+  aput v1, p1, v1
+  const v0, 0x1     # generate catch phi for v0
+  const v1, 0x4
+  aput v1, p1, v1
+  :try_end
+  .catchall {:try_start .. :try_end} :use_as_float
+
+  :use_as_float
+  float-to-int v0, v0
+  return v0
+.end method
\ No newline at end of file
diff --git a/test/552-checker-primitive-typeprop/smali/TypePropagation.smali b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
new file mode 100644
index 0000000..58682a1
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
@@ -0,0 +1,136 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTypePropagation;
+.super Ljava/lang/Object;
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDeadPhi(boolean, boolean, int, float, float) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeDeadPhi(ZZIFF)V
+  .registers 8
+
+  if-eqz p0, :after1
+  move p2, p3
+  :after1
+  # p2 = merge(int,float) = conflict
+
+  if-eqz p1, :after2
+  move p2, p4
+  :after2
+  # p2 = merge(conflict,float) = conflict
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeSameType(boolean, int, int) ssa_builder (after)
+## CHECK:     {{i\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeSameType(ZII)V
+  .registers 8
+  if-eqz p0, :after
+  move p1, p2
+  :after
+  # p1 = merge(int,int) = int
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeVoidInput(boolean, boolean, int, int) ssa_builder (after)
+## CHECK:     {{i\d+}} Phi
+## CHECK:     {{i\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeVoidInput(ZZII)V
+  .registers 8
+  :loop
+  # p2 = void (loop phi) => p2 = merge(int,int) = int
+  if-eqz p0, :after
+  move p2, p3
+  :after
+  # p2 = merge(void,int) = int
+  if-eqz p1, :loop
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDifferentSize(boolean, int, long) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeDifferentSize(ZIJ)V
+  .registers 8
+  if-eqz p0, :after
+  move-wide p1, p2
+  :after
+  # p1 = merge(int,long) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeRefFloat(boolean, float, java.lang.Object) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeRefFloat(ZFLjava/lang/Object;)V
+  .registers 8
+  if-eqz p0, :after
+  move-object p1, p2
+  :after
+  # p1 = merge(float,reference) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Success(boolean, float) ssa_builder (after)
+## CHECK:     {{f\d+}} Phi
+## CHECK-NOT:          Phi
+.method public static mergeIntFloat_Success(ZF)V
+  .registers 8
+  if-eqz p0, :after
+  const/4 p1, 0x0
+  :after
+  # p1 = merge(float,0x0) = float
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Fail(boolean, int, float) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static mergeIntFloat_Fail(ZIF)V
+  .registers 8
+  if-eqz p0, :after
+  move p1, p2
+  :after
+  # p1 = merge(int,float) = conflict
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
+
+## CHECK-START-DEBUGGABLE: void TypePropagation.updateAllUsersOnConflict(boolean, boolean, int, float, int) ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static updateAllUsersOnConflict(ZZIFI)V
+  .registers 8
+
+  :loop1
+  # loop phis for all args
+  # p2 = merge(int,float) = float? => conflict
+  move p2, p3
+  if-eqz p0, :loop1
+
+  :loop2
+  # loop phis for all args
+  # requests float equivalent of p4 phi in loop1 => conflict
+  # propagates conflict to loop2's phis
+  move p2, p4
+  if-eqz p1, :loop2
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+  return-void
+.end method
diff --git a/test/552-checker-primitive-typeprop/src/Main.java b/test/552-checker-primitive-typeprop/src/Main.java
new file mode 100644
index 0000000..fe2343e
--- /dev/null
+++ b/test/552-checker-primitive-typeprop/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Wrong result, expected=" + expected + ", actual=" + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("SsaBuilder");
+    Method m = c.getMethod("environmentPhi", new Class[] { boolean.class, int[].class });
+
+    int[] array = new int[3];
+    int result;
+
+    result = (Integer) m.invoke(null, new Object[] { true, array } );
+    assertEquals(2, result);
+
+    result = (Integer) m.invoke(null, new Object[] { false, array } );
+    assertEquals(0, result);
+  }
+}
diff --git a/test/554-checker-rtp-checkcast/expected.txt b/test/554-checker-rtp-checkcast/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/554-checker-rtp-checkcast/expected.txt
diff --git a/test/554-checker-rtp-checkcast/info.txt b/test/554-checker-rtp-checkcast/info.txt
new file mode 100644
index 0000000..2a60971
--- /dev/null
+++ b/test/554-checker-rtp-checkcast/info.txt
@@ -0,0 +1 @@
+Tests that phis with check-casted reference type inputs are typed.
diff --git a/test/554-checker-rtp-checkcast/src/Main.java b/test/554-checker-rtp-checkcast/src/Main.java
new file mode 100644
index 0000000..607f71a
--- /dev/null
+++ b/test/554-checker-rtp-checkcast/src/Main.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+
+  public static Object returnIntArray() { return new int[10]; }
+
+  /// CHECK-START: void Main.boundTypeForMergingPhi() ssa_builder (after)
+  /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
+  /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
+  /// CHECK-DAG:     <<Phi>>  Phi klass:int[]
+
+  public static void boundTypeForMergingPhi() {
+    int[] array = new int[20];
+    if (array.hashCode() > 5) {
+      array = (int[]) returnIntArray();
+    }
+    array[0] = 14;
+  }
+
+  /// CHECK-START: void Main.boundTypeForLoopPhi() ssa_builder (after)
+  /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
+  /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
+  /// CHECK-DAG:     <<Phi>>  Phi klass:int[]
+
+  public static void boundTypeForLoopPhi() {
+    int[] array = new int[20];
+    int i = 0;
+    while (i < 4) {
+      ++i;
+      array[i] = i;
+      if (i > 2) {
+        array = (int[]) returnIntArray();
+      }
+    }
+    array[0] = 14;
+  }
+
+  /// CHECK-START: void Main.boundTypeForCatchPhi() ssa_builder (after)
+  /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
+  /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
+  /// CHECK-DAG:     <<Phi>>  Phi is_catch_phi:true klass:int[]
+
+  public static void boundTypeForCatchPhi() {
+    int[] array1 = new int[20];
+    int[] array2 = (int[]) returnIntArray();
+
+    int[] catch_phi = array1;
+    try {
+      System.nanoTime();
+      catch_phi = array2;
+      System.nanoTime();
+    } catch (Throwable ex) {
+      catch_phi[0] = 14;
+    }
+  }
+
+  public static void main(String[] args) {  }
+}
diff --git a/test/557-checker-ref-equivalent/expected.txt b/test/557-checker-ref-equivalent/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/557-checker-ref-equivalent/expected.txt
diff --git a/test/557-checker-ref-equivalent/info.txt b/test/557-checker-ref-equivalent/info.txt
new file mode 100644
index 0000000..30e763b
--- /dev/null
+++ b/test/557-checker-ref-equivalent/info.txt
@@ -0,0 +1 @@
+Checker tests to ensure we do not get reference and integer phi equivalents.
diff --git a/test/557-checker-ref-equivalent/smali/TestCase.smali b/test/557-checker-ref-equivalent/smali/TestCase.smali
new file mode 100644
index 0000000..2472957
--- /dev/null
+++ b/test/557-checker-ref-equivalent/smali/TestCase.smali
@@ -0,0 +1,51 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: void TestCase.testIntRefEquivalent() ssa_builder (after)
+## CHECK-NOT: Phi
+.method public static testIntRefEquivalent()V
+    .registers 4
+
+    const v0, 0
+
+    :try_start
+    invoke-static {v0,v0}, LTestCase;->foo(ILjava/lang/Object;)V
+    if-eqz v0, :end_if
+    const v0, 0
+    :end_if
+    invoke-static {v0,v0}, LTestCase;->foo(ILjava/lang/Object;)V
+    goto :no_catch
+    :try_end
+
+    .catch Ljava/lang/Exception; {:try_start .. :try_end} :exception
+    :exception
+    # We used to have a reference and an integer phi equivalents here, which
+    # broke the invariant of not sharing the same spill slot between those two
+    # types.
+    invoke-static {v0,v0}, LTestCase;->foo(ILjava/lang/Object;)V
+
+    :no_catch
+    goto :try_start
+    return-void
+
+.end method
+
+.method public static foo(ILjava/lang/Object;)V
+    .registers 4
+    return-void
+.end method
diff --git a/test/557-checker-ref-equivalent/src/Main.java b/test/557-checker-ref-equivalent/src/Main.java
new file mode 100644
index 0000000..a970af5
--- /dev/null
+++ b/test/557-checker-ref-equivalent/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: void Main.testRedundantPhiCycle(boolean) ssa_builder (after)
+  /// CHECK-NOT:  Phi
+  private void testRedundantPhiCycle(boolean cond) {
+    Object o = null;
+    while (true) {
+      if (cond) {
+        o = null;
+      }
+      System.out.println(o);
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) ssa_builder (after)
+  /// CHECK-NOT:  Phi
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+  }
+}
diff --git a/test/559-bce-ssa/expected.txt b/test/559-bce-ssa/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/559-bce-ssa/expected.txt
diff --git a/test/559-bce-ssa/info.txt b/test/559-bce-ssa/info.txt
new file mode 100644
index 0000000..e104515
--- /dev/null
+++ b/test/559-bce-ssa/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler which used
+to hit a bogus DCHECK on the test case.
diff --git a/test/559-bce-ssa/src/Main.java b/test/559-bce-ssa/src/Main.java
new file mode 100644
index 0000000..88f06b4
--- /dev/null
+++ b/test/559-bce-ssa/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+
+  public static void foo(int[] array, int[] array2, int start, int end) {
+    for (int i = start; i < end; ++i) {
+      array[i] = array2[array.length] + 1;
+    }
+  }
+
+  public static void main(String[] args) {
+    int[]a = new int[1];
+    foo(a, new int[2], 0, 1);
+    if (a[0] != 1) {
+      throw new Error("test failed");
+    }
+  }
+}
diff --git a/test/559-checker-rtp-ifnotnull/expected.txt b/test/559-checker-rtp-ifnotnull/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/559-checker-rtp-ifnotnull/expected.txt
diff --git a/test/559-checker-rtp-ifnotnull/info.txt b/test/559-checker-rtp-ifnotnull/info.txt
new file mode 100644
index 0000000..c08aa0c
--- /dev/null
+++ b/test/559-checker-rtp-ifnotnull/info.txt
@@ -0,0 +1,2 @@
+Tests that BoundType created for if-not-null does not force untyped loop phis
+to Object.
\ No newline at end of file
diff --git a/test/559-checker-rtp-ifnotnull/src/Main.java b/test/559-checker-rtp-ifnotnull/src/Main.java
new file mode 100644
index 0000000..8f40129
--- /dev/null
+++ b/test/559-checker-rtp-ifnotnull/src/Main.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+
+  /// CHECK-START: void Main.boundTypeForIfNotNull() ssa_builder (after)
+  /// CHECK-DAG:     <<Method:(i|j)\d+>>  CurrentMethod
+  /// CHECK-DAG:     <<Null:l\d+>>        NullConstant
+  /// CHECK-DAG:     <<Cst5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:     <<Cst10:i\d+>>       IntConstant 10
+
+  /// CHECK-DAG:                          InvokeVirtual [<<NullCheck:l\d+>>]
+  /// CHECK-DAG:     <<NullCheck>>        NullCheck [<<LoopPhi:l\d+>>] klass:int[]
+  /// CHECK-DAG:     <<LoopPhi>>          Phi [<<Null>>,<<MergePhi:l\d+>>] klass:int[]
+
+  /// CHECK-DAG:     <<BoundType:l\d+>>   BoundType [<<LoopPhi>>] klass:int[] can_be_null:false
+  /// CHECK-DAG:     <<NewArray10:l\d+>>  NewArray [<<Cst10>>,<<Method>>] klass:int[]
+  /// CHECK-DAG:     <<NotNullPhi:l\d+>>  Phi [<<BoundType>>,<<NewArray10>>] klass:int[]
+
+  /// CHECK-DAG:     <<NewArray5:l\d+>>   NewArray [<<Cst5>>,<<Method>>] klass:int[]
+  /// CHECK-DAG:     <<MergePhi>>         Phi [<<NewArray5>>,<<NotNullPhi>>] klass:int[]
+
+  public static void boundTypeForIfNotNull() {
+    int[] array = null;
+    for (int i = -1; i < 10; ++i) {
+      if (array == null) {
+        array = new int[5];
+      } else {
+        if (i == 5) {
+          array = new int[10];
+        }
+        array[i] = i;
+      }
+    }
+    array.hashCode();
+  }
+
+  public static void main(String[] args) {  }
+}
diff --git a/test/560-packed-switch/expected.txt b/test/560-packed-switch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/560-packed-switch/expected.txt
diff --git a/test/560-packed-switch/info.txt b/test/560-packed-switch/info.txt
new file mode 100644
index 0000000..41d4562
--- /dev/null
+++ b/test/560-packed-switch/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to emit wrong code
+for a HPackedSwitch.
diff --git a/test/560-packed-switch/src/Main.java b/test/560-packed-switch/src/Main.java
new file mode 100644
index 0000000..3b0b425
--- /dev/null
+++ b/test/560-packed-switch/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    switch (staticField) {
+      case -1:
+        return;
+      case -4:
+        // We add this case to make it an odd number of case/default.
+        // The code generation for it used to be bogus.
+        throw new Error("Cannot happen");
+      default:
+        throw new Error("Cannot happen");
+    }
+  }
+  static int staticField = -1;
+}
diff --git a/test/561-divrem/expected.txt b/test/561-divrem/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/561-divrem/expected.txt
diff --git a/test/561-divrem/info.txt b/test/561-divrem/info.txt
new file mode 100644
index 0000000..71c9601
--- /dev/null
+++ b/test/561-divrem/info.txt
@@ -0,0 +1,2 @@
+Regression test for div/rem taking Integer.MIN_VALUE and
+Long.MIN_VALUE.
diff --git a/test/561-divrem/src/Main.java b/test/561-divrem/src/Main.java
new file mode 100644
index 0000000..082783d
--- /dev/null
+++ b/test/561-divrem/src/Main.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void assertEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEquals(0, $noinline$divInt(1));
+    assertEquals(1, $noinline$remInt(1));
+
+    assertEquals(0, $noinline$divInt(-1));
+    assertEquals(-1, $noinline$remInt(-1));
+
+    assertEquals(0, $noinline$divInt(0));
+    assertEquals(0, $noinline$remInt(0));
+
+    assertEquals(1, $noinline$divInt(Integer.MIN_VALUE));
+    assertEquals(0, $noinline$remInt(Integer.MIN_VALUE));
+
+    assertEquals(0, $noinline$divInt(Integer.MAX_VALUE));
+    assertEquals(Integer.MAX_VALUE, $noinline$remInt(Integer.MAX_VALUE));
+
+    assertEquals(0, $noinline$divInt(Integer.MAX_VALUE - 1));
+    assertEquals(Integer.MAX_VALUE - 1, $noinline$remInt(Integer.MAX_VALUE - 1));
+
+    assertEquals(0, $noinline$divInt(Integer.MIN_VALUE + 1));
+    assertEquals(Integer.MIN_VALUE + 1, $noinline$remInt(Integer.MIN_VALUE + 1));
+
+    assertEquals(0L, $noinline$divLong(1L));
+    assertEquals(1L, $noinline$remLong(1L));
+
+    assertEquals(0L, $noinline$divLong(-1L));
+    assertEquals(-1L, $noinline$remLong(-1L));
+
+    assertEquals(0L, $noinline$divLong(0L));
+    assertEquals(0L, $noinline$remLong(0L));
+
+    assertEquals(1L, $noinline$divLong(Long.MIN_VALUE));
+    assertEquals(0L, $noinline$remLong(Long.MIN_VALUE));
+
+    assertEquals(0L, $noinline$divLong(Long.MAX_VALUE));
+    assertEquals(Long.MAX_VALUE, $noinline$remLong(Long.MAX_VALUE));
+
+    assertEquals(0L, $noinline$divLong(Long.MAX_VALUE - 1));
+    assertEquals(Long.MAX_VALUE - 1, $noinline$remLong(Long.MAX_VALUE - 1));
+
+    assertEquals(0L, $noinline$divLong(Integer.MIN_VALUE + 1));
+    assertEquals(Long.MIN_VALUE + 1, $noinline$remLong(Long.MIN_VALUE + 1));
+  }
+
+  public static int $noinline$divInt(int value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value / Integer.MIN_VALUE;
+  }
+
+  public static int $noinline$remInt(int value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value % Integer.MIN_VALUE;
+  }
+
+  public static long $noinline$divLong(long value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value / Long.MIN_VALUE;
+  }
+
+  public static long $noinline$remLong(long value) {
+    if (doThrow) {
+      throw new Error("");
+    }
+    return value % Long.MIN_VALUE;
+  }
+
+  static boolean doThrow = false;
+}
diff --git a/test/701-easy-div-rem/genMain.py b/test/701-easy-div-rem/genMain.py
index 75eee17..b6c769f 100644
--- a/test/701-easy-div-rem/genMain.py
+++ b/test/701-easy-div-rem/genMain.py
@@ -13,25 +13,27 @@
 # limitations under the License.
 
 upper_bound_int_pow2 = 31
+upper_bound_int_pow2_neg = 32
 upper_bound_long_pow2 = 63
+upper_bound_long_pow2_neg = 64
 upper_bound_constant = 100
 all_tests = [
     ({'@INT@': 'int', '@SUFFIX@':''},
      [('CheckDiv', 'idiv_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
-      ('CheckDiv', 'idiv_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckDiv', 'idiv_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2_neg)]),
       ('CheckDiv', 'idiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckDiv', 'idiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'irem_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
-      ('CheckRem', 'irem_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckRem', 'irem_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2_neg)]),
       ('CheckRem', 'irem_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'irem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])]),
     ({'@INT@': 'long', '@SUFFIX@': 'l'},
      [('CheckDiv', 'ldiv_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
-      ('CheckDiv', 'ldiv_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckDiv', 'ldiv_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2_neg)]),
       ('CheckDiv', 'ldiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckDiv', 'ldiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'lrem_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
-      ('CheckRem', 'lrem_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckRem', 'lrem_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2_neg)]),
       ('CheckRem', 'lrem_by_constant_', [i for i in range(1, upper_bound_constant)]),
       ('CheckRem', 'lrem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])])
 ]
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 7589f8f..53d0f10 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -38,11 +38,13 @@
   $(HOST_OUT_EXECUTABLES)/jasmin \
   $(HOST_OUT_EXECUTABLES)/smali \
   $(HOST_OUT_EXECUTABLES)/dexmerger
+TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES :=
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
   TEST_ART_RUN_TEST_DEPENDENCIES += \
     $(JACK) \
     $(JILL_JAR)
+  TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES += setup-jack-server
 endif
 
 ifeq ($(ART_TEST_DEBUG_GC),true)
@@ -63,7 +65,7 @@
     run_test_options += --quiet
   endif
 $$(dmart_target): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
-$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
+$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
@@ -153,8 +155,14 @@
 ifeq ($(ART_TEST_RUN_TEST_NO_IMAGE),true)
   IMAGE_TYPES += no-image
 endif
+ifeq ($(ART_TEST_RUN_TEST_MULTI_IMAGE),true)
+  IMAGE_TYPES := multiimage
+endif
 ifeq ($(ART_TEST_PIC_IMAGE),true)
   IMAGE_TYPES += picimage
+  ifeq ($(ART_TEST_RUN_TEST_MULTI_IMAGE),true)
+    IMAGE_TYPES := multipicimage
+  endif
 endif
 PICTEST_TYPES := npictest
 ifeq ($(ART_TEST_PIC_TEST),true)
@@ -214,6 +222,7 @@
 
 # Tests that are timing sensitive and flaky on heavily loaded systems.
 TEST_ART_TIMING_SENSITIVE_RUN_TESTS := \
+  002-sleep \
   053-wait-some \
   055-enum-performance \
   133-static-invoke-super
@@ -258,6 +267,26 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
+# b/26483935
+TEST_ART_BROKEN_HOST_RUN_TESTS := \
+  132-daemon-locks-shutdown \
+
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,host,$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_HOST_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+
+TEST_ART_BROKEN_HOST_RUN_TESTS :=
+
+# 143-string-value tests for a LOG(E) tag, which is only supported on host.
+TEST_ART_BROKEN_TARGET_RUN_TESTS := \
+  143-string-value \
+
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_TARGET_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+
+TEST_ART_BROKEN_TARGET_RUN_TESTS :=
+
 # 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
   117-nopatchoat \
@@ -579,6 +608,18 @@
 TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS :=
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
 
+# Tests broken by multi-image.
+TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS := \
+  476-checker-ctor-memory-barrier \
+  530-checker-lse
+
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), \
+    $(TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS),  $(ALL_ADDRESS_SIZES))
+
+TEST_ART_BROKEN_MULTI_IMAGE_RUN_TESTS :=
+
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
 $(foreach target, $(TARGET_TYPES), \
@@ -640,14 +681,18 @@
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
 endif
 
 # Create a rule to build and run a tests following the form:
@@ -835,7 +880,27 @@
           prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_$(13))
         endif
       else
-        $$(error found $(9) expected $(IMAGE_TYPES))
+        ifeq ($(9),multiimage)
+          test_groups += ART_RUN_TEST_$$(uc_host_or_target)_IMAGE_RULES
+          run_test_options += --multi-image
+      		ifeq ($(1),host)
+        		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+      		else
+        		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+      		endif
+        else
+          ifeq ($(9),multipicimage)
+            test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES
+        		run_test_options += --pic-image --multi-image
+        		ifeq ($(1),host)
+          		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+        		else
+          		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+        		endif
+          else
+            $$(error found $(9) expected $(IMAGE_TYPES))
+          endif
+        endif
       endif
     endif
   endif
@@ -896,7 +961,7 @@
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_JACK_CLASSPATH := $$(jack_classpath)
 .PHONY: $$(run_test_rule_name)
-$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule)
+$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  DX=$(abspath $(DX)) \
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 082c9b3..fd41fd2 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -56,7 +56,7 @@
 
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasImage(JNIEnv* env ATTRIBUTE_UNUSED,
                                                          jclass cls ATTRIBUTE_UNUSED) {
-  return Runtime::Current()->GetHeap()->HasImageSpace();
+  return Runtime::Current()->GetHeap()->HasBootImageSpace();
 }
 
 // public static native boolean isImageDex2OatEnabled();
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 3efa6ff..e004b6c 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -361,13 +361,15 @@
 dex2oat_cmdline="true"
 mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
 
+app_image="--app-image-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.art | cut -d/ -f 2- | sed "s:/:@:g")"
+
 if [ "$PREBUILD" = "y" ]; then
   dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
                       --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \
-                      --app-image-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.art | cut -d/ -f 2- | sed "s:/:@:g") \
+                      ${app_image} \
                       --instruction-set=$ISA"
   if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
     dex2oat_cmdline="${dex2oat_cmdline} --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
@@ -390,6 +392,12 @@
   DALVIKVM_ISA_FEATURES_ARGS="-Xcompiler-option --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
 fi
 
+# java.io.tmpdir can only be set at launch time.
+TMP_DIR_OPTION=""
+if [ "$HOST" = "n" ]; then
+  TMP_DIR_OPTION="-Djava.io.tmpdir=/data/local/tmp"
+fi
+
 dalvikvm_cmdline="$INVOKE_WITH $GDB $ANDROID_ROOT/bin/$DALVIKVM \
                   $GDB_ARGS \
                   $FLAGS \
@@ -403,6 +411,7 @@
                   $INT_OPTS \
                   $DEBUGGER_OPTS \
                   $DALVIKVM_BOOT_OPT \
+                  $TMP_DIR_OPTION \
                   -cp $DEX_LOCATION/$TEST_NAME.jar$SECONDARY_DEX $MAIN $ARGS"
 
 # Remove whitespace.
diff --git a/test/run-test b/test/run-test
index 60e008c..4f111d2 100755
--- a/test/run-test
+++ b/test/run-test
@@ -85,7 +85,7 @@
 
 # If JACK_CLASSPATH is not set, assume it only contains core-libart.
 if [ -z "$JACK_CLASSPATH" ]; then
-  export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack"
+  export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
 fi
 
 # If JILL_JAR is not set, assume it is located in the prebuilts directory.
@@ -135,6 +135,7 @@
 have_image="yes"
 image_suffix=""
 pic_image_suffix=""
+multi_image_suffix=""
 android_root="/system"
 
 while true; do
@@ -184,6 +185,9 @@
     elif [ "x$1" = "x--pic-image" ]; then
         pic_image_suffix="-pic"
         shift
+    elif [ "x$1" = "x--multi-image" ]; then
+        multi_image_suffix="-multi"
+        shift
     elif [ "x$1" = "x--pic-test" ]; then
         run_args="${run_args} --pic-test"
         shift
@@ -458,7 +462,7 @@
 if [ "$runtime" = "dalvik" ]; then
     if [ "$target_mode" = "no" ]; then
         framework="${ANDROID_PRODUCT_OUT}/system/framework"
-        bpath="${framework}/core.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/core-junit.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
+        bpath="${framework}/core-libart.jar:${framework}/core-oj.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
         run_args="${run_args} --boot -Xbootclasspath:${bpath}"
     else
         true # defaults to using target BOOTCLASSPATH
@@ -470,12 +474,12 @@
             export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
         fi
         guess_host_arch_name
-        run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art"
+        run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}${multi_image_suffix}.art"
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
     else
         guess_target_arch_name
         run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}"
-        run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}.art"
+        run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}${multi_image_suffix}.art"
     fi
     if [ "$relocate" = "yes" ]; then
       run_args="${run_args} --relocate"
@@ -502,9 +506,9 @@
     # TODO If the target was compiled WITH_DEXPREOPT=true then these tests will
     # fail since these jar files will be stripped.
     bpath="${framework}/core-libart${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/core-oj${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/core-junit${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
     # Pass down the bootclasspath
     run_args="${run_args} --runtime-option -Xbootclasspath:${bpath}"
@@ -611,6 +615,8 @@
         echo "                          Set instruction-set-features for compilation."
         echo "    --pic-image           Use an image compiled with position independent code for the"
         echo "                          boot class path."
+        echo "    --multi-image         Use a set of images compiled with dex2oat multi-image for"
+        echo "                          the boot class path."
         echo "    --pic-test            Compile the test code position independent."
         echo "    --quiet               Don't print anything except failure messages"
     ) 1>&2  # Direct to stderr so usage is not printed if --quiet is set.
@@ -677,11 +683,6 @@
 # Tests named '<number>-checker-*' will also have their CFGs verified with
 # Checker when compiled with Optimizing on host.
 if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then
-  # Build Checker DEX files without dx's optimizations so the input to dex2oat
-  # better resembles the Java source. We always build the DEX the same way, even
-  # if Checker is not invoked and the test only runs the program.
-  build_args="${build_args} --dx-option --no-optimize"
-
   # Jack does not necessarily generate the same DEX output than dx. Because these tests depend
   # on a particular DEX output, keep building them with dx for now (b/19467889).
   USE_JACK="false"
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index adc4d03..a3ecf86 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -23,8 +23,6 @@
    - Make sortable by clicking on headers.
  * For HeapTable with single heap shown, the heap name isn't centered?
  * Consistently document functions.
- * Should help be part of an AhatHandler, that automatically gets the menu and
-   stylesheet link rather than duplicating that?
  * Show version number with --version.
  * Show somewhere where to send bugs.
  * Include a link to /objects in the overview and menu?
@@ -79,6 +77,12 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
+ 0.3 Dec 15, 2015
+   Fix page loading performance by showing a limited number of entries by default.
+   Fix mismatch between overview and "roots" totals.
+   Annotate root objects and show their types.
+   Annotate references with their referents.
+
  0.2 Oct 20, 2015
    Take into account 'count' and 'offset' when displaying strings.
 
diff --git a/tools/ahat/src/AhatHttpHandler.java b/tools/ahat/src/AhatHttpHandler.java
index 178747c..1d05a66 100644
--- a/tools/ahat/src/AhatHttpHandler.java
+++ b/tools/ahat/src/AhatHttpHandler.java
@@ -41,15 +41,7 @@
     PrintStream ps = new PrintStream(exchange.getResponseBody());
     try {
       HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
-      DocString menu = new DocString();
-      menu.appendLink(DocString.uri("/"), DocString.text("overview"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("rooted"), DocString.text("rooted"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("sites"), DocString.text("allocations"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("help"), DocString.text("help"));
-      doc.menu(menu);
+      doc.menu(Menu.getMenu());
       mAhatHandler.handle(doc, new Query(exchange.getRequestURI()));
       doc.close();
     } catch (RuntimeException e) {
diff --git a/tools/ahat/src/HelpHandler.java b/tools/ahat/src/HelpHandler.java
new file mode 100644
index 0000000..8de3c85
--- /dev/null
+++ b/tools/ahat/src/HelpHandler.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.google.common.io.ByteStreams;
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpHandler;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+
+/**
+ * HelpHandler.
+ *
+ * HttpHandler to show the help page.
+ */
+class HelpHandler implements HttpHandler {
+
+  @Override
+  public void handle(HttpExchange exchange) throws IOException {
+    ClassLoader loader = HelpHandler.class.getClassLoader();
+    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
+    exchange.sendResponseHeaders(200, 0);
+    PrintStream ps = new PrintStream(exchange.getResponseBody());
+    HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
+    doc.menu(Menu.getMenu());
+
+    InputStream is = loader.getResourceAsStream("help.html");
+    if (is == null) {
+      ps.println("No help available.");
+    } else {
+      ByteStreams.copy(is, ps);
+    }
+
+    doc.close();
+    ps.close();
+  }
+}
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index ebd49d7..091820f 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -79,7 +79,7 @@
     server.createContext("/objects", new AhatHttpHandler(new ObjectsHandler(ahat)));
     server.createContext("/site", new AhatHttpHandler(new SiteHandler(ahat)));
     server.createContext("/bitmap", new BitmapHandler(ahat));
-    server.createContext("/help", new StaticHandler("help.html", "text/html"));
+    server.createContext("/help", new HelpHandler());
     server.createContext("/style.css", new StaticHandler("style.css", "text/css"));
     server.setExecutor(Executors.newFixedThreadPool(1));
     System.out.println("Server started on localhost:" + port);
diff --git a/tools/ahat/src/Menu.java b/tools/ahat/src/Menu.java
new file mode 100644
index 0000000..018e019
--- /dev/null
+++ b/tools/ahat/src/Menu.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+/**
+ * A menu showed in the UI that can be used to jump to common pages.
+ */
+class Menu {
+  private static DocString mMenu =
+    DocString.link(DocString.uri("/"), DocString.text("overview"))
+      .append(" - ")
+      .appendLink(DocString.uri("rooted"), DocString.text("rooted"))
+      .append(" - ")
+      .appendLink(DocString.uri("sites"), DocString.text("allocations"))
+      .append(" - ")
+      .appendLink(DocString.uri("help"), DocString.text("help"));
+
+  /**
+   * Returns the menu as a DocString.
+   */
+  public static DocString getMenu() {
+    return mMenu;
+  }
+}
diff --git a/tools/ahat/src/OverviewHandler.java b/tools/ahat/src/OverviewHandler.java
index 0fe4fba..720fcb4 100644
--- a/tools/ahat/src/OverviewHandler.java
+++ b/tools/ahat/src/OverviewHandler.java
@@ -48,14 +48,7 @@
 
     doc.section("Heap Sizes");
     printHeapSizes(doc, query);
-
-    DocString menu = new DocString();
-    menu.appendLink(DocString.uri("rooted"), DocString.text("Rooted"));
-    menu.append(" - ");
-    menu.appendLink(DocString.uri("site"), DocString.text("Allocations"));
-    menu.append(" - ");
-    menu.appendLink(DocString.uri("help"), DocString.text("Help"));
-    doc.big(menu);
+    doc.big(Menu.getMenu());
   }
 
   private void printHeapSizes(Doc doc, Query query) {
diff --git a/tools/ahat/src/help.html b/tools/ahat/src/help.html
index 92ec37d..ff04ad2 100644
--- a/tools/ahat/src/help.html
+++ b/tools/ahat/src/help.html
@@ -14,17 +14,6 @@
 limitations under the License.
 -->
 
-<head>
-<link rel="stylesheet" type="text/css" href="style.css">
-</head>
-
-<div class="menu">
-  <a href="/">overview</a> -
-  <a href="rooted">rooted</a> -
-  <a href="sites">allocations</a> -
-  <a href="help">help</a>
-</div>
-
 <h1>Help</h1>
 <h2>Information shown by ahat:</h2>
 <ul>
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index 421de17..368b744 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 0.3
+Implementation-Version: 0.4
 Main-Class: com.android.ahat.Main
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 02787fb..9e02ce2 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -48,7 +48,7 @@
 if [[ $mode == "host" ]]; then
   make_command="make $j_arg $showcommands build-art-host-tests $common_targets ${out_dir}/host/linux-x86/lib/libjavacoretests.so ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh ${out_dir}/host/linux-x86/bin/adb"
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh ${out_dir}/host/linux-x86/bin/adb libstdc++"
 fi
 
 echo "Executing $make_command"
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index b323456..839e1e6 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -66,8 +66,7 @@
           "libcore.java.text.SimpleDateFormatTest#testDstZoneNameWithNonDstTimestamp",
           "libcore.java.text.SimpleDateFormatTest#testDstZoneWithNonDstTimestampForNonHourDstZone",
           "libcore.java.text.SimpleDateFormatTest#testNonDstZoneNameWithDstTimestamp",
-          "libcore.java.text.SimpleDateFormatTest#testNonDstZoneWithDstTimestampForNonHourDstZone",
-          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition"]
+          "libcore.java.text.SimpleDateFormatTest#testNonDstZoneWithDstTimestampForNonHourDstZone"]
 },
 {
   description: "Failing due to missing localhost on hammerhead and volantis.",
@@ -172,9 +171,76 @@
   bug: 25437292
 },
 {
-  description: "Assertion failing on the concurrent collector configuration.",
+  description: "Failing tests after enso move.",
   result: EXEC_FAILED,
-  names: ["jsr166.LinkedTransferQueueTest#testTransfer2"],
-  bug: 25883050
+  bug: 26326992,
+  names: ["libcore.icu.RelativeDateTimeFormatterTest#test_getRelativeDateTimeStringDST",
+          "libcore.java.lang.OldSystemTest#test_load",
+          "libcore.java.text.NumberFormatTest#test_currencyWithPatternDigits",
+          "libcore.java.text.NumberFormatTest#test_setCurrency",
+          "libcore.java.text.OldNumberFormatTest#test_getIntegerInstanceLjava_util_Locale",
+          "libcore.java.util.CalendarTest#testAddOneDayAndOneDayOver30MinuteDstForwardAdds48Hours",
+          "libcore.java.util.CalendarTest#testNewCalendarKoreaIsSelfConsistent",
+          "libcore.java.util.CalendarTest#testSetTimeInZoneWhereDstIsNoLongerUsed",
+          "libcore.java.util.CalendarTest#test_nullLocale",
+          "libcore.java.util.FormatterTest#test_numberLocalization",
+          "libcore.java.util.FormatterTest#test_uppercaseConversions",
+          "libcore.javax.crypto.CipherTest#testCipher_getInstance_WrongType_Failure",
+          "libcore.javax.crypto.CipherTest#testDecryptBufferZeroSize_mustDecodeToEmptyString",
+          "libcore.javax.security.auth.x500.X500PrincipalTest#testExceptionsForWrongDNs",
+          "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getDate",
+          "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getExpiration",
+          "org.apache.harmony.regex.tests.java.util.regex.PatternSyntaxExceptionTest#testPatternSyntaxException",
+          "org.apache.harmony.tests.java.lang.FloatTest#test_parseFloat_LString_Harmony6261",
+          "org.apache.harmony.tests.java.lang.ThreadTest#test_isDaemon",
+          "org.apache.harmony.tests.java.text.DecimalFormatSymbolsTest#test_setInternationalCurrencySymbolLjava_lang_String",
+          "org.apache.harmony.tests.java.text.DecimalFormatTest#testSerializationHarmonyRICompatible",
+          "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition",
+          "org.apache.harmony.tests.java.util.jar.JarFileTest#test_getInputStreamLjava_util_jar_JarEntry_subtest0",
+          "libcore.java.util.CalendarTest#test_clear_45877",
+          "org.apache.harmony.crypto.tests.javax.crypto.spec.SecretKeySpecTest#testGetFormat",
+          "org.apache.harmony.tests.java.util.TimerTaskTest#test_scheduledExecutionTime"]
+},
+{
+  description: "'cat -' does not work anymore",
+  result: EXEC_FAILED,
+  bug: 26395656,
+  modes: [device],
+  names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_getOutputStream"]
+},
+{
+  description: "Missing resource in classpath",
+  result: EXEC_FAILED,
+  modes: [device],
+  names: ["libcore.java.util.prefs.OldAbstractPreferencesTest#testClear",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testExportNode",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testExportSubtree",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGet",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetBoolean",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetByteArray",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetDouble",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetFloat",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetInt",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testGetLong",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testKeys",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testNodeExists",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPut",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutBoolean",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutByteArray",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutDouble",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutFloat",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutInt",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testPutLong",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testRemove",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testRemoveNode",
+          "libcore.java.util.prefs.OldAbstractPreferencesTest#testSync",
+          "libcore.java.util.prefs.PreferencesTest#testHtmlEncoding",
+          "libcore.java.util.prefs.PreferencesTest#testPreferencesClobbersExistingFiles",
+          "org.apache.harmony.tests.java.util.PropertiesTest#test_storeToXMLLjava_io_OutputStreamLjava_lang_StringLjava_lang_String",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testExportNode",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testExportSubtree",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testFlush",
+          "org.apache.harmony.tests.java.util.prefs.AbstractPreferencesTest#testSync",
+          "org.apache.harmony.tests.java.util.prefs.FilePreferencesImplTest#testPutGet"]
 }
 ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
new file mode 100644
index 0000000..75d1eff
--- /dev/null
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -0,0 +1,26 @@
+/*
+ * This file contains expectations for ART's buildbot's concurrent collector
+ * configurations. The purpose of this file is to temporary and quickly list
+ * failing tests and not break the bots on the CC configurations, until they
+ * are fixed or until the libcore expectation files get properly updated. The
+ * script that uses this file is art/tools/run-libcore-tests.sh.
+ *
+ * It is also used to enable AOSP experiments, and not mess up with CTS's
+ * expectations.
+ */
+
+[
+{
+  description: "Assertion failing on the concurrent collector configuration.",
+  result: EXEC_FAILED,
+  names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
+          "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
+  bug: 25883050
+},
+{
+  description: "libcore.java.lang.OldSystemTest#test_gc failure on armv8-concurrent-collector.",
+  result: EXEC_FAILED,
+  names: ["libcore.java.lang.OldSystemTest#test_gc"],
+  bug: 26155567
+}
+]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index c79f4b9..f29e51f 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -65,7 +65,7 @@
     art="bash ${OUT_DIR-out}/host/linux-x86/bin/art"
     art_debugee="bash ${OUT_DIR-out}/host/linux-x86/bin/art"
     # We force generation of a new image to avoid build-time and run-time classpath differences.
-    image="-Ximage:/system/non/existent"
+    image="-Ximage:/system/non/existent/vogar.art"
     # We do not need a device directory on host.
     device_dir=""
     # Vogar knows which VM to use on host.
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 4b5a5ca..f346239 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -32,6 +32,12 @@
   exit 1
 fi
 
+expectations="--expectations art/tools/libcore_failures.txt"
+if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
+  # Tolerate some more failures on the concurrent collector configurations.
+  expectations="$expectations --expectations art/tools/libcore_failures_concurrent_collector.txt"
+fi
+
 emulator="no"
 if [ "$ANDROID_SERIAL" = "emulator-5554" ]; then
   emulator="yes"
@@ -83,7 +89,7 @@
     # will create a boot image with the default compiler. Note that
     # giving an existing image on host does not work because of
     # classpath/resources differences when compiling the boot image.
-    vogar_args="$vogar_args --vm-arg -Ximage:/non/existent"
+    vogar_args="$vogar_args --vm-arg -Ximage:/non/existent/vogar.art"
     shift
   elif [[ "$1" == "--debug" ]]; then
     # Remove the --debug from the arguments.
@@ -105,4 +111,4 @@
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --vm-arg -Xusejit:true --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}
+vogar $vogar_args --vm-arg -Xusejit:true $expectations --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}