Merge "Fix access to long/double stack values from debugger"
diff --git a/Android.mk b/Android.mk
index 7a95dfe..c5e90f2 100644
--- a/Android.mk
+++ b/Android.mk
@@ -127,7 +127,6 @@
 
 include $(art_path)/build/Android.common_test.mk
 include $(art_path)/build/Android.gtest.mk
-include $(art_path)/test/Android.oat.mk
 include $(art_path)/test/Android.run-test.mk
 
 # Sync test files to the target, depends upon all things that must be pushed to the target.
@@ -169,65 +168,59 @@
 
 # "mm test-art-host" to build and run all host tests.
 .PHONY: test-art-host
-test-art-host: test-art-host-gtest test-art-host-oat test-art-host-run-test test-art-host-vixl
+test-art-host: test-art-host-gtest test-art-host-run-test test-art-host-vixl
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # All host tests that run solely with the default compiler.
 .PHONY: test-art-host-default
-test-art-host-default: test-art-host-oat-default test-art-host-run-test-default
+test-art-host-default: test-art-host-run-test-default
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # All host tests that run solely with the optimizing compiler.
 .PHONY: test-art-host-optimizing
-test-art-host-optimizing: test-art-host-oat-optimizing test-art-host-run-test-optimizing
+test-art-host-optimizing: test-art-host-run-test-optimizing
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # All host tests that run solely on the interpreter.
 .PHONY: test-art-host-interpreter
-test-art-host-interpreter: test-art-host-oat-interpreter test-art-host-run-test-interpreter
+test-art-host-interpreter: test-art-host-run-test-interpreter
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # Primary host architecture variants:
 .PHONY: test-art-host$(ART_PHONY_TEST_HOST_SUFFIX)
 test-art-host$(ART_PHONY_TEST_HOST_SUFFIX): test-art-host-gtest$(ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-oat$(ART_PHONY_TEST_HOST_SUFFIX) test-art-host-run-test$(ART_PHONY_TEST_HOST_SUFFIX)
+    test-art-host-run-test$(ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-host-default$(ART_PHONY_TEST_HOST_SUFFIX)
-test-art-host-default$(ART_PHONY_TEST_HOST_SUFFIX): test-art-host-oat-default$(ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-run-test-default$(ART_PHONY_TEST_HOST_SUFFIX)
+test-art-host-default$(ART_PHONY_TEST_HOST_SUFFIX): test-art-host-run-test-default$(ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-host-optimizing$(ART_PHONY_TEST_HOST_SUFFIX)
-test-art-host-optimizing$(ART_PHONY_TEST_HOST_SUFFIX): test-art-host-oat-optimizing$(ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-run-test-optimizing$(ART_PHONY_TEST_HOST_SUFFIX)
+test-art-host-optimizing$(ART_PHONY_TEST_HOST_SUFFIX): test-art-host-run-test-optimizing$(ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-host-interpreter$(ART_PHONY_TEST_HOST_SUFFIX)
-test-art-host-interpreter$(ART_PHONY_TEST_HOST_SUFFIX): test-art-host-oat-interpreter$(ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-run-test-interpreter$(ART_PHONY_TEST_HOST_SUFFIX)
+test-art-host-interpreter$(ART_PHONY_TEST_HOST_SUFFIX): test-art-host-run-test-interpreter$(ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # Secondary host architecture variants:
 ifneq ($(HOST_PREFER_32_BIT),true)
 .PHONY: test-art-host$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
 test-art-host$(2ND_ART_PHONY_TEST_HOST_SUFFIX): test-art-host-gtest$(2ND_ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-oat$(2ND_ART_PHONY_TEST_HOST_SUFFIX) test-art-host-run-test$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
+    test-art-host-run-test$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-host-default$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
-test-art-host-default$(2ND_ART_PHONY_TEST_HOST_SUFFIX): test-art-host-oat-default$(2ND_ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-run-test-default$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
+test-art-host-default$(2ND_ART_PHONY_TEST_HOST_SUFFIX): test-art-host-run-test-default$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-host-optimizing$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
-test-art-host-optimizing$(2ND_ART_PHONY_TEST_HOST_SUFFIX): test-art-host-oat-optimizing$(2ND_ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-run-test-optimizing$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
+test-art-host-optimizing$(2ND_ART_PHONY_TEST_HOST_SUFFIX): test-art-host-run-test-optimizing$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-host-interpreter$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
-test-art-host-interpreter$(2ND_ART_PHONY_TEST_HOST_SUFFIX): test-art-host-oat-interpreter$(2ND_ART_PHONY_TEST_HOST_SUFFIX) \
-    test-art-host-run-test-interpreter$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
+test-art-host-interpreter$(2ND_ART_PHONY_TEST_HOST_SUFFIX): test-art-host-run-test-interpreter$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 endif
 
@@ -236,65 +229,59 @@
 
 # "mm test-art-target" to build and run all target tests.
 .PHONY: test-art-target
-test-art-target: test-art-target-gtest test-art-target-oat test-art-target-run-test
+test-art-target: test-art-target-gtest test-art-target-run-test
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # All target tests that run solely with the default compiler.
 .PHONY: test-art-target-default
-test-art-target-default: test-art-target-oat-default test-art-target-run-test-default
+test-art-target-default: test-art-target-run-test-default
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # All target tests that run solely with the optimizing compiler.
 .PHONY: test-art-target-optimizing
-test-art-target-optimizing: test-art-target-oat-optimizing test-art-target-run-test-optimizing
+test-art-target-optimizing: test-art-target-run-test-optimizing
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # All target tests that run solely on the interpreter.
 .PHONY: test-art-target-interpreter
-test-art-target-interpreter: test-art-target-oat-interpreter test-art-target-run-test-interpreter
+test-art-target-interpreter: test-art-target-run-test-interpreter
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # Primary target architecture variants:
 .PHONY: test-art-target$(ART_PHONY_TEST_TARGET_SUFFIX)
 test-art-target$(ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-gtest$(ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-oat$(ART_PHONY_TEST_TARGET_SUFFIX) test-art-target-run-test$(ART_PHONY_TEST_TARGET_SUFFIX)
+    test-art-target-run-test$(ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-target-default$(ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-default$(ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-oat-default$(ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-run-test-default$(ART_PHONY_TEST_TARGET_SUFFIX)
+test-art-target-default$(ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-run-test-default$(ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-target-optimizing$(ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-optimizing$(ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-oat-optimizing$(ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-run-test-optimizing$(ART_PHONY_TEST_TARGET_SUFFIX)
+test-art-target-optimizing$(ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-run-test-optimizing$(ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-target-interpreter$(ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-interpreter$(ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-oat-interpreter$(ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-run-test-interpreter$(ART_PHONY_TEST_TARGET_SUFFIX)
+test-art-target-interpreter$(ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-run-test-interpreter$(ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 # Secondary target architecture variants:
 ifdef TARGET_2ND_ARCH
 .PHONY: test-art-target$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
 test-art-target$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-gtest$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-oat$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) test-art-target-run-test$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
+    test-art-target-run-test$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-target-default$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-default$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-oat-default$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-run-test-default$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
+test-art-target-default$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-run-test-default$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-target-optimizing$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-optimizing$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-oat-optimizing$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-run-test-optimizing$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
+test-art-target-optimizing$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-run-test-optimizing$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 
 .PHONY: test-art-target-interpreter$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-interpreter$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-oat-interpreter$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) \
-    test-art-target-run-test-interpreter$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
+test-art-target-interpreter$(2ND_ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-run-test-interpreter$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
 	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
 endif
 
@@ -322,14 +309,14 @@
 .PHONY: oat-target-$(1)
 oat-target-$(1): $$(OUT_OAT_FILE)
 
-$$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OATD_DEPENDENCY)
+$$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OATD)
 	@mkdir -p $$(dir $$@)
 	$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
 		--boot-image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --dex-file=$(PRODUCT_OUT)/$(1) \
 		--dex-location=/$(1) --oat-file=$$@ \
 		--instruction-set=$(DEX2OAT_TARGET_ARCH) \
 		--instruction-set-features=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
-		--android-root=$(PRODUCT_OUT)/system
+		--android-root=$(PRODUCT_OUT)/system --include-patch-information
 
 endif
 
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index c39bc5d..207dc55 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -183,8 +183,16 @@
 art_non_debug_cflags := \
   -O3
 
+art_host_non_debug_cflags := \
+  $(art_non_debug_cflags)
+
+art_target_non_debug_cflags := \
+  $(art_non_debug_cflags)
+
 ifeq ($(HOST_OS),linux)
-  art_non_debug_cflags += -Wframe-larger-than=1728
+  # Larger frame-size for host clang builds today
+  art_host_non_debug_cflags += -Wframe-larger-than=2600
+  art_target_non_debug_cflags += -Wframe-larger-than=1728
 endif
 
 # FIXME: upstream LLVM has a vectorizer bug that needs to be fixed
@@ -207,6 +215,25 @@
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
 ART_TARGET_CFLAGS := $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
+
+ifndef LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA
+  LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA=-0x1000000
+endif
+ifndef LIBART_IMG_HOST_MAX_BASE_ADDRESS_DELTA
+  LIBART_IMG_HOST_MAX_BASE_ADDRESS_DELTA=0x1000000
+endif
+ART_HOST_CFLAGS += -DART_BASE_ADDRESS_MIN_DELTA=$(LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA)
+ART_HOST_CFLAGS += -DART_BASE_ADDRESS_MAX_DELTA=$(LIBART_IMG_HOST_MAX_BASE_ADDRESS_DELTA)
+
+ifndef LIBART_IMG_TARGET_MIN_BASE_ADDRESS_DELTA
+  LIBART_IMG_TARGET_MIN_BASE_ADDRESS_DELTA=-0x1000000
+endif
+ifndef LIBART_IMG_TARGET_MAX_BASE_ADDRESS_DELTA
+  LIBART_IMG_TARGET_MAX_BASE_ADDRESS_DELTA=0x1000000
+endif
+ART_TARGET_CFLAGS += -DART_BASE_ADDRESS_MIN_DELTA=$(LIBART_IMG_TARGET_MIN_BASE_ADDRESS_DELTA)
+ART_TARGET_CFLAGS += -DART_BASE_ADDRESS_MAX_DELTA=$(LIBART_IMG_TARGET_MAX_BASE_ADDRESS_DELTA)
+
 ART_TARGET_LDFLAGS :=
 ifeq ($(TARGET_CPU_SMP),true)
   ART_TARGET_CFLAGS += -DANDROID_SMP=1
@@ -266,8 +293,8 @@
   endif
 endif
 
-ART_HOST_NON_DEBUG_CFLAGS := $(art_non_debug_cflags)
-ART_TARGET_NON_DEBUG_CFLAGS := $(art_non_debug_cflags)
+ART_HOST_NON_DEBUG_CFLAGS := $(art_host_non_debug_cflags)
+ART_TARGET_NON_DEBUG_CFLAGS := $(art_target_non_debug_cflags)
 
 # TODO: move -fkeep-inline-functions to art_debug_cflags when target gcc > 4.4 (and -lsupc++)
 ART_HOST_DEBUG_CFLAGS := $(art_debug_cflags) -fkeep-inline-functions
@@ -329,5 +356,8 @@
 ART_DEFAULT_GC_TYPE :=
 ART_DEFAULT_GC_TYPE_CFLAGS :=
 art_cflags :=
+art_target_non_debug_cflags :=
+art_host_non_debug_cflags :=
+art_non_debug_cflags :=
 
 endif # ANDROID_COMMON_BUILD_MK
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index 8c0d9f2..bd1e8aa 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -31,7 +31,11 @@
 ART_TARGET_TEST_OUT := $(TARGET_OUT_DATA)/art-test
 
 # Directory used for temporary test files on the host.
+ifneq ($(TMPDIR),)
+ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID)
+else
 ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID)
+endif
 
 # Core.oat location on the device.
 TARGET_CORE_OAT := $(ART_TARGET_TEST_DIR)/$(DEX2OAT_TARGET_ARCH)/core.oat
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index ed07129..c50b4ae 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -22,8 +22,26 @@
 # List of known broken tests that we won't attempt to execute. The test name must be the full
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
 ART_TEST_KNOWN_BROKEN := \
-  test-art-host-oat-optimizing-SignalTest64 \
-  test-art-host-oat-optimizing-SignalTest32
+  test-art-host-run-test-gcstress-optimizing-no-prebuild-004-SignalTest32 \
+  test-art-host-run-test-gcstress-optimizing-prebuild-004-SignalTest32 \
+  test-art-host-run-test-gcstress-optimizing-norelocate-004-SignalTest32 \
+  test-art-host-run-test-gcstress-optimizing-relocate-004-SignalTest32 \
+  test-art-host-run-test-gcverify-optimizing-no-prebuild-004-SignalTest32 \
+  test-art-host-run-test-gcverify-optimizing-prebuild-004-SignalTest32 \
+  test-art-host-run-test-gcverify-optimizing-norelocate-004-SignalTest32 \
+  test-art-host-run-test-gcverify-optimizing-relocate-004-SignalTest32 \
+  test-art-host-run-test-optimizing-no-prebuild-004-SignalTest32 \
+  test-art-host-run-test-optimizing-prebuild-004-SignalTest32 \
+  test-art-host-run-test-optimizing-norelocate-004-SignalTest32 \
+  test-art-host-run-test-optimizing-relocate-004-SignalTest32 \
+  test-art-target-run-test-gcstress-optimizing-prebuild-004-SignalTest32 \
+  test-art-target-run-test-gcstress-optimizing-norelocate-004-SignalTest32 \
+  test-art-target-run-test-gcstress-default-prebuild-004-SignalTest32 \
+  test-art-target-run-test-gcstress-default-norelocate-004-SignalTest32 \
+  test-art-target-run-test-gcstress-optimizing-relocate-004-SignalTest32 \
+  test-art-target-run-test-gcstress-default-relocate-004-SignalTest32 \
+  test-art-target-run-test-gcstress-optimizing-no-prebuild-004-SignalTest32 \
+  test-art-target-run-test-gcstress-default-no-prebuild-004-SignalTest32
 
 # List of known failing tests that when executed won't cause test execution to not finish.
 # The test name must be the full rule name such as test-art-host-oat-optimizing-HelloWorld64.
@@ -32,13 +50,43 @@
 # Keep going after encountering a test failure?
 ART_TEST_KEEP_GOING ?= false
 
+# Do you want all tests, even those that are time consuming?
+ART_TEST_FULL ?= true
+
+# Do you want optimizing compiler tests run?
+ART_TEST_OPTIMIZING ?= $(ART_TEST_FULL)
+
+# Do you want tracing tests run?
+ART_TEST_TRACE ?= $(ART_TEST_FULL)
+
+# Do you want tests with GC verification enabled run?
+ART_TEST_GC_VERIFY ?= $(ART_TEST_FULL)
+
+# Do you want tests with the GC stress mode enabled run?
+ART_TEST_GC_STRESS ?= $(ART_TEST_FULL)
+
+# Do you want oat tests with relocation enabled?
+ART_TEST_OAT_RELOCATE ?= true
+
+# Do you want oat tests with relocation disabled?
+ART_TEST_OAT_NO_RELOCATE ?= $(ART_TEST_FULL)
+
+# Do you want run-tests with relocation enabled?
+ART_TEST_RUN_TEST_RELOCATE ?= $(ART_TEST_FULL)
+
+# Do you want run-tests with relocation disabled?
+ART_TEST_RUN_TEST_NO_RELOCATE ?= $(ART_TEST_FULL)
+
+# Do you want run-tests with prebuild disabled?
+ART_TEST_RUN_TEST_NO_PREBUILD ?= $(ART_TEST_FULL)
+
 # Define the command run on test failure. $(1) is the name of the test. Executed by the shell.
 define ART_TEST_FAILED
   ( [ -f $(ART_HOST_TEST_DIR)/skipped/$(1) ] || \
     (mkdir -p $(ART_HOST_TEST_DIR)/failed/ && touch $(ART_HOST_TEST_DIR)/failed/$(1) && \
       echo $(ART_TEST_KNOWN_FAILING) | grep -q $(1) \
         && (echo -e "$(1) \e[91mKNOWN FAILURE\e[0m") \
-        || (echo -e "$(1) \e[91mFAILED\e[0m")))
+        || (echo -e "$(1) \e[91mFAILED\e[0m" >&2 )))
 endef
 
 # Define the command run on test success. $(1) is the name of the test. Executed by the shell.
@@ -64,7 +112,7 @@
         && (echo -e "\e[93mSKIPPED TESTS\e[0m" && ls -1 $(ART_HOST_TEST_DIR)/skipped/) \
         || (echo -e "\e[92mNO TESTS SKIPPED\e[0m")) && \
       ([ -d $(ART_HOST_TEST_DIR)/failed/ ] \
-        && (echo -e "\e[91mFAILING TESTS\e[0m" && ls -1 $(ART_HOST_TEST_DIR)/failed/) \
+        && (echo -e "\e[91mFAILING TESTS\e[0m" >&2 && ls -1 $(ART_HOST_TEST_DIR)/failed/ >&2) \
         || (echo -e "\e[92mNO TESTS FAILED\e[0m")) \
       && ([ ! -d $(ART_HOST_TEST_DIR)/failed/ ] && rm -r $(ART_HOST_TEST_DIR) \
           || (rm -r $(ART_HOST_TEST_DIR) && false)))))
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index ee51fcd..2cba0ec 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -127,6 +127,7 @@
   runtime/reflection_test.cc \
   compiler/dex/global_value_numbering_test.cc \
   compiler/dex/local_value_numbering_test.cc \
+  compiler/dex/mir_graph_test.cc \
   compiler/dex/mir_optimization_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
@@ -171,6 +172,7 @@
 
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
+  compiler/utils//assembler_thumb_test.cc \
   compiler/utils/x86/assembler_x86_test.cc \
   compiler/utils/x86_64/assembler_x86_64_test.cc
 
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 10936a4..cd6b13a 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -26,7 +26,7 @@
 # Use dex2oat debug version for better error reporting
 # $(1): 2ND_ or undefined, 2ND_ for 32-bit host builds.
 define create-core-oat-host-rules
-$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
+$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD)
 	@echo "host dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
@@ -49,7 +49,7 @@
 endif
 
 define create-core-oat-target-rules
-$$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
+$$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD)
 	@echo "target dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 02dad2a..61ff109 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -95,8 +95,8 @@
 	optimizing/register_allocator.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
-	optimizing/ssa_type_propagation.cc \
 	optimizing/ssa_phi_elimination.cc \
+	optimizing/ssa_type_propagation.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
@@ -118,6 +118,7 @@
 	compilers.cc \
 	compiler.cc \
 	elf_fixup.cc \
+	elf_patcher.cc \
 	elf_stripper.cc \
 	elf_writer.cc \
 	elf_writer_quick.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index d1d5ad9..7395324 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -172,7 +172,7 @@
 class ClassInitCheckElimination : public PassME {
  public:
   ClassInitCheckElimination()
-    : PassME("ClInitCheckElimination", kRepeatingTopologicalSortTraversal) {
+    : PassME("ClInitCheckElimination", kLoopRepeatingTopologicalSortTraversal) {
   }
 
   bool Gate(const PassDataHolder* data) const {
@@ -207,17 +207,17 @@
 class GlobalValueNumberingPass : public PassME {
  public:
   GlobalValueNumberingPass()
-    : PassME("GVN", kRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") {
+    : PassME("GVN", kLoopRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") {
   }
 
-  bool Gate(const PassDataHolder* data) const {
+  bool Gate(const PassDataHolder* data) const OVERRIDE {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->ApplyGlobalValueNumberingGate();
   }
 
-  bool Worker(const PassDataHolder* data) const {
+  bool Worker(const PassDataHolder* data) const OVERRIDE {
     DCHECK(data != nullptr);
     const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
     CompilationUnit* cUnit = pass_me_data_holder->c_unit;
@@ -227,7 +227,7 @@
     return cUnit->mir_graph->ApplyGlobalValueNumbering(bb);
   }
 
-  void End(PassDataHolder* data) const {
+  void End(PassDataHolder* data) const OVERRIDE {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index f8b9c1a..d1abf7f 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -121,6 +121,56 @@
   return res;
 }
 
+inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) {
+  if (idx_ != 0) {
+    // Mark last processed block visited.
+    BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx_ - 1));
+    bb->visited = true;
+    if (had_change) {
+      // If we had a change we need to revisit the children.
+      ChildBlockIterator iter(bb, mir_graph_);
+      for (BasicBlock* child_bb = iter.Next(); child_bb != nullptr; child_bb = iter.Next()) {
+        child_bb->visited = false;
+      }
+    }
+  }
+
+  while (true) {
+    // Pop loops we have left and check if we need to recalculate one of them.
+    // NOTE: We need to do this even if idx_ == end_idx_.
+    while (loop_head_stack_->Size() != 0u &&
+        loop_ends_->Get(loop_head_stack_->Peek().first) == idx_) {
+      auto top = loop_head_stack_->Peek();
+      uint16_t loop_head_idx = top.first;
+      bool recalculated = top.second;
+      loop_head_stack_->Pop();
+      BasicBlock* loop_head = mir_graph_->GetBasicBlock(block_id_list_->Get(loop_head_idx));
+      DCHECK(loop_head != nullptr);
+      if (!recalculated || !loop_head->visited) {
+        loop_head_stack_->Insert(std::make_pair(loop_head_idx, true));  // Recalculating this loop.
+        idx_ = loop_head_idx + 1;
+        return loop_head;
+      }
+    }
+
+    if (idx_ == end_idx_) {
+      return nullptr;
+    }
+
+    // Get next block and return it if unvisited.
+    BasicBlockId idx = idx_;
+    idx_ += 1;
+    BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx));
+    DCHECK(bb != nullptr);
+    if (!bb->visited) {
+      if (loop_ends_->Get(idx) != 0u) {
+        loop_head_stack_->Insert(std::make_pair(idx, false));  // Not recalculating.
+      }
+      return bb;
+    }
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_DATAFLOW_ITERATOR_INL_H_
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 66c524f..06d6832 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -388,6 +388,52 @@
      }
   };
 
+  /**
+   * @class LoopRepeatingTopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph, repeating loops as needed.
+   * @details The iterator uses the visited flags to keep track of the blocks that need
+   * recalculation and keeps a stack of loop heads in the MIRGraph. At the end of the loop
+   * it returns back to the loop head if it needs to be recalculated. Due to the use of
+   * the visited flags and the loop head stack in the MIRGraph, it's not possible to use
+   * two iterators at the same time or modify this data during iteration (though inspection
+   * of this data is allowed and sometimes even expected).
+   *
+   * NOTE: This iterator is not suitable for passes that need to propagate changes to
+   * predecessors, such as type inferrence.
+   */
+  class LoopRepeatingTopologicalSortIterator : public DataflowIterator {
+    public:
+     /**
+      * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+      * @param mir_graph The MIRGraph considered.
+      */
+     explicit LoopRepeatingTopologicalSortIterator(MIRGraph* mir_graph)
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()),
+           loop_ends_(mir_graph->GetTopologicalSortOrderLoopEnds()),
+           loop_head_stack_(mir_graph_->GetTopologicalSortOrderLoopHeadStack()) {
+       // Extra setup for RepeatingTopologicalSortIterator.
+       idx_ = start_idx_;
+       block_id_list_ = mir_graph->GetTopologicalSortOrder();
+       // Clear visited flags and check that the loop head stack is empty.
+       mir_graph->ClearAllVisitedFlags();
+       DCHECK_EQ(loop_head_stack_->Size(), 0u);
+     }
+
+     ~LoopRepeatingTopologicalSortIterator() {
+       DCHECK_EQ(loop_head_stack_->Size(), 0u);
+     }
+
+     /**
+      * @brief Get the next BasicBlock depending on iteration order.
+      * @param had_change did the user of the iteration change the previous BasicBlock.
+      * @return the next BasicBlock following the iteration order, 0 if finished.
+      */
+     virtual BasicBlock* Next(bool had_change = false) OVERRIDE;
+
+    private:
+     const GrowableArray<BasicBlockId>* const loop_ends_;
+     GrowableArray<std::pair<uint16_t, bool>>* const loop_head_stack_;
+  };
 
 }  // namespace art
 
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index d097500..51446f6 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <cstdint>
+
 #include "compiler.h"
 #include "compiler_internals.h"
 #include "driver/compiler_driver.h"
@@ -470,6 +472,10 @@
 COMPILE_ASSERT(sizeof(kUnsupportedOpcodesSize) == 8 * sizeof(size_t),
                kUnsupportedOpcodesSize_unexp);
 
+// The maximum amount of Dalvik register in a method for which we will start compiling. Tries to
+// avoid an abort when we need to manage more SSA registers than we can.
+static constexpr size_t kMaxAllowedDalvikRegisters = INT16_MAX / 2;
+
 CompilationUnit::CompilationUnit(ArenaPool* pool)
   : compiler_driver(nullptr),
     class_linker(nullptr),
@@ -548,6 +554,12 @@
 // Skip the method that we do not support currently.
 static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                              CompilationUnit& cu) {
+  // This is a limitation in mir_graph. See MirGraph::SetNumSSARegs.
+  if (cu.num_dalvik_registers > kMaxAllowedDalvikRegisters) {
+    VLOG(compiler) << "Too many dalvik registers : " << cu.num_dalvik_registers;
+    return false;
+  }
+
   // Check whether we do have limitations at all.
   if (kSupportedTypes[cu.instruction_set] == nullptr &&
       kUnsupportedOpcodesSize[cu.instruction_set] == 0U) {
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index 614e826..d86be4e 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -22,8 +22,10 @@
 
 GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator)
     : cu_(cu),
+      mir_graph_(cu->mir_graph.get()),
       allocator_(allocator),
-      repeat_count_(0u),
+      bbs_processed_(0u),
+      max_bbs_to_process_(kMaxBbsToProcessMultiplyFactor * mir_graph_->GetNumReachableBlocks()),
       last_value_(0u),
       modifications_allowed_(false),
       global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
@@ -32,10 +34,9 @@
       array_location_map_(ArrayLocationComparator(), allocator->Adapter()),
       array_location_reverse_map_(allocator->Adapter()),
       ref_set_map_(std::less<ValueNameSet>(), allocator->Adapter()),
-      lvns_(cu_->mir_graph->GetNumBlocks(), nullptr, allocator->Adapter()),
+      lvns_(mir_graph_->GetNumBlocks(), nullptr, allocator->Adapter()),
       work_lvn_(nullptr),
       merge_lvns_(allocator->Adapter()) {
-  cu_->mir_graph->ClearAllVisitedFlags();
 }
 
 GlobalValueNumbering::~GlobalValueNumbering() {
@@ -46,21 +47,15 @@
   if (UNLIKELY(!Good())) {
     return nullptr;
   }
-  if (bb->data_flow_info == nullptr) {
+  if (UNLIKELY(bb->data_flow_info == nullptr)) {
     return nullptr;
   }
-  if (bb->block_type == kEntryBlock) {
-    repeat_count_ += 1u;
-    if (repeat_count_ > kMaxRepeatCount) {
-      last_value_ = kNoValue;  // Make bad.
-      return nullptr;
-    }
-  }
-  if (bb->block_type == kExitBlock) {
+  if (UNLIKELY(bb->block_type == kExitBlock)) {
     DCHECK(bb->first_mir_insn == nullptr);
     return nullptr;
   }
-  if (bb->visited) {
+  if (UNLIKELY(bbs_processed_ == max_bbs_to_process_)) {
+    last_value_ = kNoValue;  // Make bad.
     return nullptr;
   }
   DCHECK(work_lvn_.get() == nullptr);
@@ -72,13 +67,34 @@
       work_lvn_->SetSRegNullChecked(this_reg);
     }
   } else {
-    // Merge all incoming arcs.
     // To avoid repeated allocation on the ArenaStack, reuse a single vector kept as a member.
     DCHECK(merge_lvns_.empty());
+    // If we're running the full GVN, the RepeatingTopologicalSortIterator keeps the loop
+    // head stack in the MIRGraph up to date and for a loop head we need to check whether
+    // we're making the initial computation and need to merge only preceding blocks in the
+    // topological order, or we're recalculating a loop head and need to merge all incoming
+    // LVNs. When we're not at a loop head (including having an empty loop head stack) all
+    // predecessors should be preceding blocks and we shall merge all of them anyway.
+    //
+    // If we're running the modification phase of the full GVN, the loop head stack will be
+    // empty and we need to merge all incoming LVNs. If we're running just a simple LVN,
+    // the loop head stack will also be empty and there will be nothing to merge anyway.
+    bool use_all_predecessors = true;
+    uint16_t loop_head_idx = 0u;  // Used only if !use_all_predecessors.
+    if (mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Size() != 0) {
+      // Full GVN inside a loop, see if we're at the loop head for the first time.
+      auto top = mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Peek();
+      loop_head_idx = top.first;
+      bool recalculating = top.second;
+      use_all_predecessors = recalculating ||
+          loop_head_idx != mir_graph_->GetTopologicalSortOrderIndexes()->Get(bb->id);
+    }
     GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-    for (BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next());
-         pred_bb != nullptr; pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next())) {
-      if (lvns_[pred_bb->id] != nullptr) {
+    for (BasicBlock* pred_bb = mir_graph_->GetBasicBlock(iter.Next());
+         pred_bb != nullptr; pred_bb = mir_graph_->GetBasicBlock(iter.Next())) {
+      if (lvns_[pred_bb->id] != nullptr &&
+          (use_all_predecessors ||
+              mir_graph_->GetTopologicalSortOrderIndexes()->Get(pred_bb->id) < loop_head_idx)) {
         merge_lvns_.push_back(lvns_[pred_bb->id]);
       }
     }
@@ -87,19 +103,22 @@
     if (bb->catch_entry) {
       merge_type = LocalValueNumbering::kCatchMerge;
     } else if (bb->last_mir_insn != nullptr &&
-        (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN ||
+        (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_VOID ||
+         bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN ||
          bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_OBJECT ||
          bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_WIDE) &&
         (bb->first_mir_insn == bb->last_mir_insn ||
-         (bb->first_mir_insn->next == bb->last_mir_insn &&
-          static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi))) {
+         (static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi &&
+          (bb->first_mir_insn->next == bb->last_mir_insn ||
+           (static_cast<int>(bb->first_mir_insn->next->dalvikInsn.opcode) == kMirOpPhi &&
+            bb->first_mir_insn->next->next == bb->last_mir_insn))))) {
       merge_type = LocalValueNumbering::kReturnMerge;
     }
     // At least one predecessor must have been processed before this bb.
     CHECK(!merge_lvns_.empty());
     if (merge_lvns_.size() == 1u) {
       work_lvn_->MergeOne(*merge_lvns_[0], merge_type);
-      BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(merge_lvns_[0]->Id());
+      BasicBlock* pred_bb = mir_graph_->GetBasicBlock(merge_lvns_[0]->Id());
       if (HasNullCheckLastInsn(pred_bb, bb->id)) {
         work_lvn_->SetSRegNullChecked(pred_bb->last_mir_insn->ssa_rep->uses[0]);
       }
@@ -112,32 +131,13 @@
 
 bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) {
   DCHECK(work_lvn_ != nullptr);
-  DCHECK(bb->id == work_lvn_->Id());
+  DCHECK_EQ(bb->id, work_lvn_->Id());
+  ++bbs_processed_;
   merge_lvns_.clear();
 
-  bool change = false;
-  // Look for a branch to self or an already processed child.
-  // (No need to repeat the LVN if all children are processed later.)
-  ChildBlockIterator iter(bb, cu_->mir_graph.get());
-  for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) {
-    if (child == bb || lvns_[child->id] != nullptr) {
-      // If we found an already processed child, check if the LVN actually differs.
-      change = (lvns_[bb->id] == nullptr || !lvns_[bb->id]->Equals(*work_lvn_));
-      break;
-    }
-  }
-
   std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]);
   lvns_[bb->id] = work_lvn_.release();
-
-  bb->visited = true;
-  if (change) {
-    ChildBlockIterator iter(bb, cu_->mir_graph.get());
-    for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) {
-      child->visited = false;
-    }
-  }
-  return change;
+  return (old_lvn == nullptr) || !old_lvn->Equals(*lvns_[bb->id]);
 }
 
 uint16_t GlobalValueNumbering::GetFieldId(const MirFieldInfo& field_info, uint16_t type) {
@@ -188,7 +188,7 @@
     uint16_t value_name = merge_names[i];
     if (!pred_lvn->IsValueNullChecked(value_name)) {
       // Check if the predecessor has an IF_EQZ/IF_NEZ as the last insn.
-      const BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(pred_lvn->Id());
+      const BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_lvn->Id());
       if (!HasNullCheckLastInsn(pred_bb, work_lvn_->Id())) {
         return false;
       }
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index 7ab77b7..a12a779 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -31,7 +31,10 @@
   GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator);
   ~GlobalValueNumbering();
 
+  // Prepare LVN for the basic block.
   LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb);
+
+  // Finish processing the basic block.
   bool FinishBasicBlock(BasicBlock* bb);
 
   // Checks that the value names didn't overflow.
@@ -42,7 +45,6 @@
   // Allow modifications.
   void AllowModifications() {
     DCHECK(Good());
-    cu_->mir_graph->ClearAllVisitedFlags();
     modifications_allowed_ = true;
   }
 
@@ -182,7 +184,7 @@
   }
 
   const BasicBlock* GetBasicBlock(uint16_t bb_id) const {
-    return cu_->mir_graph->GetBasicBlock(bb_id);
+    return mir_graph_->GetBasicBlock(bb_id);
   }
 
   static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id);
@@ -194,7 +196,7 @@
   }
 
   MIRGraph* GetMirGraph() const {
-    return cu_->mir_graph.get();
+    return mir_graph_;
   }
 
   ScopedArenaAllocator* Allocator() const {
@@ -202,12 +204,16 @@
   }
 
   CompilationUnit* const cu_;
+  MIRGraph* mir_graph_;
   ScopedArenaAllocator* const allocator_;
 
-  static constexpr uint32_t kMaxRepeatCount = 10u;
+  // The number of BBs that we need to process grows exponentially with the number
+  // of nested loops. Don't allow excessive processing for too many nested loops or
+  // otherwise expensive methods.
+  static constexpr uint32_t kMaxBbsToProcessMultiplyFactor = 20u;
 
-  // Track the repeat count to make sure the GVN converges quickly and abort the GVN otherwise.
-  uint32_t repeat_count_;
+  uint32_t bbs_processed_;
+  uint32_t max_bbs_to_process_;
 
   // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good().
   // We usually don't check Good() until the end of LVN unless we're about to modify code.
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index 40bd983..c82d231 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -273,23 +273,20 @@
   }
 
   void PerformGVN() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->ComputeDominators();
-    cu_.mir_graph->ComputeTopologicalSortOrder();
-    cu_.mir_graph->SSATransformationEnd();
-    DoPerformGVN<RepeatingPreOrderDfsIterator>();
+    DoPerformGVN<LoopRepeatingTopologicalSortIterator>();
   }
 
   void PerformPreOrderDfsGVN() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->SSATransformationEnd();
     DoPerformGVN<RepeatingPreOrderDfsIterator>();
   }
 
   template <typename IteratorType>
   void DoPerformGVN() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
     ASSERT_TRUE(gvn_ == nullptr);
     gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get()));
     ASSERT_FALSE(gvn_->CanModify());
@@ -313,7 +310,7 @@
     ASSERT_TRUE(gvn_->Good());
     ASSERT_FALSE(gvn_->CanModify());
     gvn_->AllowModifications();
-    PreOrderDfsIterator iterator(cu_.mir_graph.get());
+    TopologicalSortIterator iterator(cu_.mir_graph.get());
     for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
       LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
       if (lvn != nullptr) {
@@ -340,7 +337,6 @@
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    // gvn_->AllowModifications();
   }
 
   ArenaPool pool_;
@@ -1917,7 +1913,7 @@
   PerformPreOrderDfsGVN();
 }
 
-TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_IFieldAndPhi) {
+TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false },  // Int.
   };
@@ -1954,7 +1950,7 @@
   EXPECT_EQ(value_names_[5], value_names_[12]);
 }
 
-TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_NullCheck) {
+TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, NullCheck) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false },  // Int.
   };
@@ -2024,14 +2020,10 @@
   EXPECT_NE(value_names_[2], value_names_[6]);
   EXPECT_NE(value_names_[3], value_names_[7]);
   EXPECT_NE(value_names_[4], value_names_[8]);
-  EXPECT_NE(value_names_[0], value_names_[12]);
-  EXPECT_NE(value_names_[1], value_names_[13]);
-  EXPECT_NE(value_names_[2], value_names_[14]);
-  EXPECT_NE(value_names_[3], value_names_[15]);
   EXPECT_EQ(value_names_[4], value_names_[12]);
-  EXPECT_NE(value_names_[5], value_names_[13]);
-  EXPECT_NE(value_names_[6], value_names_[14]);
-  EXPECT_NE(value_names_[7], value_names_[15]);
+  EXPECT_EQ(value_names_[5], value_names_[13]);
+  EXPECT_EQ(value_names_[6], value_names_[14]);
+  EXPECT_EQ(value_names_[7], value_names_[15]);
   EXPECT_EQ(value_names_[12], value_names_[20]);
   EXPECT_EQ(value_names_[13], value_names_[21]);
   EXPECT_EQ(value_names_[14], value_names_[22]);
@@ -2049,7 +2041,7 @@
   }
 }
 
-TEST_F(GlobalValueNumberingTestTwoNestedLoops, DISABLED_IFieldAndPhi) {
+TEST_F(GlobalValueNumberingTestTwoNestedLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false },  // Int.
   };
@@ -2090,4 +2082,37 @@
   EXPECT_EQ(value_names_[3], value_names_[14]);
 }
 
+TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) {
+  // When there's an empty catch block, all the exception paths lead to the next block in
+  // the normal path and we can also have normal "taken" or "fall-through" branches to that
+  // path. Check that LocalValueNumbering::PruneNonAliasingRefsForCatch() can handle it.
+  static const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
+  };
+  static const MIRDef mirs[] = {
+      DEF_INVOKE1(4, Instruction::INVOKE_STATIC, 100u),
+  };
+  PrepareBasicBlocks(bbs);
+  BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u);
+  catch_handler->catch_entry = true;
+  // Add successor block info to the check block.
+  BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
+  check_bb->successor_block_list_type = kCatch;
+  check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
+      &cu_.arena, 2, kGrowableArraySuccessorBlocks);
+  SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
+      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+  successor_block_info->block = catch_handler->id;
+  check_bb->successor_blocks->Insert(successor_block_info);
+  BasicBlock* merge_block = cu_.mir_graph->GetBasicBlock(4u);
+  std::swap(merge_block->taken, merge_block->fall_through);
+  PrepareMIRs(mirs);
+  PerformGVN();
+}
+
 }  // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index d5fd6fe..0e072ec 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -445,6 +445,11 @@
 void LocalValueNumbering::PruneNonAliasingRefsForCatch() {
   for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
     const BasicBlock* bb = gvn_->GetBasicBlock(lvn->Id());
+    if (UNLIKELY(bb->taken == id_) || UNLIKELY(bb->fall_through == id_)) {
+      // Non-exceptional path to a catch handler means that the catch block was actually
+      // empty and all exceptional paths lead to the shared path after that empty block.
+      continue;
+    }
     DCHECK_EQ(bb->taken, kNullBlock);
     DCHECK_NE(bb->fall_through, kNullBlock);
     const BasicBlock* fall_through_bb = gvn_->GetBasicBlock(bb->fall_through);
@@ -529,6 +534,10 @@
          (!cmp(entry, *work_it) && !(work_it->second == entry.second)))) {
       work_it = work_map->erase(work_it);
     }
+    if (work_it == work_end) {
+      return;
+    }
+    ++work_it;
   }
 }
 
@@ -850,13 +859,18 @@
   MergeMemoryVersions(merge_type == kCatchMerge);
 
   // Merge non-aliasing maps/sets.
-  MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_,
-            &LocalValueNumbering::MergeNonAliasingIFieldValues>();
-  MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
-            &LocalValueNumbering::MergeAliasingValues<
-                NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
-                NonAliasingArrayVersions>>();
   IntersectSets<ValueNameSet, &LocalValueNumbering::non_aliasing_refs_>();
+  if (!non_aliasing_refs_.empty() && merge_type == kCatchMerge) {
+    PruneNonAliasingRefsForCatch();
+  }
+  if (!non_aliasing_refs_.empty()) {
+    MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_,
+              &LocalValueNumbering::MergeNonAliasingIFieldValues>();
+    MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
+              &LocalValueNumbering::MergeAliasingValues<
+                  NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
+                  NonAliasingArrayVersions>>();
+  }
 
   // We won't do anything complicated for range checks, just calculate the intersection.
   IntersectSets<RangeCheckSet, &LocalValueNumbering::range_checked_>();
@@ -867,7 +881,6 @@
 
   if (merge_type == kCatchMerge) {
     // Memory is clobbered. New memory version already created, don't merge aliasing locations.
-    PruneNonAliasingRefsForCatch();
     return;
   }
 
@@ -1356,8 +1369,8 @@
     case Instruction::MONITOR_EXIT:
       HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0]));
       // If we're running GVN and CanModify(), uneliminated null check indicates bytecode error.
-      if ((gvn_->cu_->disable_opt & (1 << kGlobalValueNumbering)) == 0 && gvn_->CanModify() &&
-          (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) {
+      if ((gvn_->GetCompilationUnit()->disable_opt & (1u << kGlobalValueNumbering)) == 0u &&
+          gvn_->CanModify() && (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) {
         LOG(WARNING) << "Bytecode error: MONITOR_EXIT is still null checked at 0x" << std::hex
             << mir->offset << " in " << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file);
       }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 1c8a9b5..331af21 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -84,6 +84,9 @@
       dfs_post_order_(NULL),
       dom_post_order_traversal_(NULL),
       topological_order_(nullptr),
+      topological_order_loop_ends_(nullptr),
+      topological_order_indexes_(nullptr),
+      topological_order_loop_head_stack_(nullptr),
       i_dom_list_(NULL),
       def_block_matrix_(NULL),
       temp_scoped_alloc_(),
@@ -1526,117 +1529,248 @@
   temp_scoped_alloc_.reset();
 }
 
-void MIRGraph::ComputeTopologicalSortOrder() {
-  // Clear the nodes.
-  ClearAllVisitedFlags();
-
-  // Create the topological order if need be.
-  if (topological_order_ == nullptr) {
-    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks());
-  }
-  topological_order_->Reset();
-
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  ScopedArenaQueue<BasicBlock*> q(allocator.Adapter());
-  ScopedArenaVector<size_t> visited_cnt_values(GetNumBlocks(), 0u, allocator.Adapter());
-
-  // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero.
-  // also fill initial queue.
-  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
-
-  size_t num_blocks = 0u;
-  while (true) {
-    BasicBlock* bb = iterator.Next();
-
-    if (bb == nullptr) {
-      break;
+static BasicBlock* SelectTopologicalSortOrderFallBack(
+    MIRGraph* mir_graph, const ArenaBitVector* current_loop,
+    const ScopedArenaVector<size_t>* visited_cnt_values, ScopedArenaAllocator* allocator,
+    ScopedArenaVector<BasicBlockId>* tmp_stack) {
+  // No true loop head has been found but there may be true loop heads after the mess we need
+  // to resolve. To avoid taking one of those, pick the candidate with the highest number of
+  // reachable unvisited nodes. That candidate will surely be a part of a loop.
+  BasicBlock* fall_back = nullptr;
+  size_t fall_back_num_reachable = 0u;
+  // Reuse the same bit vector for each candidate to mark reachable unvisited blocks.
+  ArenaBitVector candidate_reachable(allocator, mir_graph->GetNumBlocks(), false, kBitMapMisc);
+  AllNodesIterator iter(mir_graph);
+  for (BasicBlock* candidate = iter.Next(); candidate != nullptr; candidate = iter.Next()) {
+    if (candidate->hidden ||                            // Hidden, or
+        candidate->visited ||                           // already processed, or
+        (*visited_cnt_values)[candidate->id] == 0u ||   // no processed predecessors, or
+        (current_loop != nullptr &&                     // outside current loop.
+         !current_loop->IsBitSet(candidate->id))) {
+      continue;
     }
+    DCHECK(tmp_stack->empty());
+    tmp_stack->push_back(candidate->id);
+    candidate_reachable.ClearAllBits();
+    size_t num_reachable = 0u;
+    while (!tmp_stack->empty()) {
+      BasicBlockId current_id = tmp_stack->back();
+      tmp_stack->pop_back();
+      BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id);
+      DCHECK(current_bb != nullptr);
+      ChildBlockIterator child_iter(current_bb, mir_graph);
+      BasicBlock* child_bb = child_iter.Next();
+      for ( ; child_bb != nullptr; child_bb = child_iter.Next()) {
+        DCHECK(!child_bb->hidden);
+        if (child_bb->visited ||                            // Already processed, or
+            (current_loop != nullptr &&                     // outside current loop.
+             !current_loop->IsBitSet(child_bb->id))) {
+          continue;
+        }
+        if (!candidate_reachable.IsBitSet(child_bb->id)) {
+          candidate_reachable.SetBit(child_bb->id);
+          tmp_stack->push_back(child_bb->id);
+          num_reachable += 1u;
+        }
+      }
+    }
+    if (fall_back_num_reachable < num_reachable) {
+      fall_back_num_reachable = num_reachable;
+      fall_back = candidate;
+    }
+  }
+  return fall_back;
+}
 
+// Compute from which unvisited blocks is bb_id reachable through unvisited blocks.
+static void ComputeUnvisitedReachableFrom(MIRGraph* mir_graph, BasicBlockId bb_id,
+                                          ArenaBitVector* reachable,
+                                          ScopedArenaVector<BasicBlockId>* tmp_stack) {
+  // NOTE: Loop heads indicated by the "visited" flag.
+  DCHECK(tmp_stack->empty());
+  reachable->ClearAllBits();
+  tmp_stack->push_back(bb_id);
+  while (!tmp_stack->empty()) {
+    BasicBlockId current_id = tmp_stack->back();
+    tmp_stack->pop_back();
+    BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id);
+    DCHECK(current_bb != nullptr);
+    GrowableArray<BasicBlockId>::Iterator iter(current_bb->predecessors);
+    BasicBlock* pred_bb = mir_graph->GetBasicBlock(iter.Next());
+    for ( ; pred_bb != nullptr; pred_bb = mir_graph->GetBasicBlock(iter.Next())) {
+      if (!pred_bb->visited && !reachable->IsBitSet(pred_bb->id)) {
+        reachable->SetBit(pred_bb->id);
+        tmp_stack->push_back(pred_bb->id);
+      }
+    }
+  }
+}
+
+void MIRGraph::ComputeTopologicalSortOrder() {
+  ScopedArenaAllocator allocator(&cu_->arena_stack);
+  unsigned int num_blocks = GetNumBlocks();
+
+  ScopedArenaQueue<BasicBlock*> q(allocator.Adapter());
+  ScopedArenaVector<size_t> visited_cnt_values(num_blocks, 0u, allocator.Adapter());
+  ScopedArenaVector<BasicBlockId> loop_head_stack(allocator.Adapter());
+  size_t max_nested_loops = 0u;
+  ArenaBitVector loop_exit_blocks(&allocator, num_blocks, false, kBitMapMisc);
+  loop_exit_blocks.ClearAllBits();
+
+  // Count the number of blocks to process and add the entry block(s).
+  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
+  unsigned int num_blocks_to_process = 0u;
+  for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
     if (bb->hidden == true) {
       continue;
     }
 
-    num_blocks += 1u;
-    size_t unvisited_predecessor_count = bb->predecessors->Size();
+    num_blocks_to_process += 1u;
 
-    GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors);
-    // To process loops we should not wait for dominators.
-    while (true) {
-      BasicBlock* pred_bb = GetBasicBlock(pred_iterator.Next());
-
-      if (pred_bb == nullptr) {
-        break;
-      }
-
-      // Skip the backward branch or hidden predecessor.
-      if (pred_bb->hidden ||
-          (pred_bb->dominators != nullptr && pred_bb->dominators->IsBitSet(bb->id))) {
-        unvisited_predecessor_count -= 1u;
-      }
-    }
-
-    visited_cnt_values[bb->id] = unvisited_predecessor_count;
-
-    // Add entry block to queue.
-    if (unvisited_predecessor_count == 0) {
+    if (bb->predecessors->Size() == 0u) {
+      // Add entry block to the queue.
       q.push(bb);
     }
   }
 
-  // We can get a cycle where none of the blocks dominates the other. Therefore don't
-  // stop when the queue is empty, continue until we've processed all the blocks.
-  AllNodesIterator candidate_iter(this);  // For the empty queue case.
-  while (num_blocks != 0u) {
-    num_blocks -= 1u;
+  // Create the topological order if need be.
+  if (topological_order_ == nullptr) {
+    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, num_blocks);
+    topological_order_loop_ends_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks);
+    topological_order_indexes_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks);
+  }
+  topological_order_->Reset();
+  topological_order_loop_ends_->Reset();
+  topological_order_indexes_->Reset();
+  topological_order_loop_ends_->Resize(num_blocks);
+  topological_order_indexes_->Resize(num_blocks);
+  for (BasicBlockId i = 0; i != num_blocks; ++i) {
+    topological_order_loop_ends_->Insert(0u);
+    topological_order_indexes_->Insert(static_cast<uint16_t>(-1));
+  }
+
+  // Mark all blocks as unvisited.
+  ClearAllVisitedFlags();
+
+  // For loop heads, keep track from which blocks they are reachable not going through other
+  // loop heads. Other loop heads are excluded to detect the heads of nested loops. The children
+  // in this set go into the loop body, the other children are jumping over the loop.
+  ScopedArenaVector<ArenaBitVector*> loop_head_reachable_from(allocator.Adapter());
+  loop_head_reachable_from.resize(num_blocks, nullptr);
+  // Reuse the same temp stack whenever calculating a loop_head_reachable_from[loop_head_id].
+  ScopedArenaVector<BasicBlockId> tmp_stack(allocator.Adapter());
+
+  while (num_blocks_to_process != 0u) {
     BasicBlock* bb = nullptr;
     if (!q.empty()) {
+      num_blocks_to_process -= 1u;
       // Get top.
       bb = q.front();
       q.pop();
-    } else {
-      // Find some block we didn't visit yet that has at least one visited predecessor.
-      while (bb == nullptr) {
-        BasicBlock* candidate = candidate_iter.Next();
-        DCHECK(candidate != nullptr);
-        if (candidate->visited || candidate->hidden) {
-          continue;
-        }
-        GrowableArray<BasicBlockId>::Iterator iter(candidate->predecessors);
-        for (BasicBlock* pred_bb = GetBasicBlock(iter.Next()); pred_bb != nullptr;
-            pred_bb = GetBasicBlock(iter.Next())) {
-          if (!pred_bb->hidden && pred_bb->visited) {
-            bb = candidate;
-            break;
+      if (bb->visited) {
+        // Loop head: it was already processed, mark end and copy exit blocks to the queue.
+        DCHECK(q.empty()) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+        uint16_t idx = static_cast<uint16_t>(topological_order_->Size());
+        topological_order_loop_ends_->Put(topological_order_indexes_->Get(bb->id), idx);
+        DCHECK_EQ(loop_head_stack.back(), bb->id);
+        loop_head_stack.pop_back();
+        ArenaBitVector* reachable =
+            loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()];
+        for (BasicBlockId candidate_id : loop_exit_blocks.Indexes()) {
+          if (reachable == nullptr || reachable->IsBitSet(candidate_id)) {
+            q.push(GetBasicBlock(candidate_id));
+            // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again,
+            // so clearing the bit has no effect on the iterator.
+            loop_exit_blocks.ClearBit(candidate_id);
           }
         }
+        continue;
       }
+    } else {
+      // Find the new loop head.
+      AllNodesIterator iter(this);
+      while (true) {
+        BasicBlock* candidate = iter.Next();
+        if (candidate == nullptr) {
+          // We did not find a true loop head, fall back to a reachable block in any loop.
+          ArenaBitVector* current_loop =
+              loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()];
+          bb = SelectTopologicalSortOrderFallBack(this, current_loop, &visited_cnt_values,
+                                                  &allocator, &tmp_stack);
+          DCHECK(bb != nullptr) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+          if (kIsDebugBuild && cu_->dex_file != nullptr) {
+            LOG(INFO) << "Topological sort order: Using fall-back in "
+                << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " BB #" << bb->id
+                << " @0x" << std::hex << bb->start_offset
+                << ", num_blocks = " << std::dec << num_blocks;
+          }
+          break;
+        }
+        if (candidate->hidden ||                            // Hidden, or
+            candidate->visited ||                           // already processed, or
+            visited_cnt_values[candidate->id] == 0u ||      // no processed predecessors, or
+            (!loop_head_stack.empty() &&                    // outside current loop.
+             !loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(candidate->id))) {
+          continue;
+        }
+
+        GrowableArray<BasicBlockId>::Iterator pred_iter(candidate->predecessors);
+        BasicBlock* pred_bb = GetBasicBlock(pred_iter.Next());
+        for ( ; pred_bb != nullptr; pred_bb = GetBasicBlock(pred_iter.Next())) {
+          if (pred_bb != candidate && !pred_bb->visited &&
+              !pred_bb->dominators->IsBitSet(candidate->id)) {
+            break;  // Keep non-null pred_bb to indicate failure.
+          }
+        }
+        if (pred_bb == nullptr) {
+          bb = candidate;
+          break;
+        }
+      }
+      // Compute blocks from which the loop head is reachable and process those blocks first.
+      ArenaBitVector* reachable =
+          new (&allocator) ArenaBitVector(&allocator, num_blocks, false, kBitMapMisc);
+      loop_head_reachable_from[bb->id] = reachable;
+      ComputeUnvisitedReachableFrom(this, bb->id, reachable, &tmp_stack);
+      // Now mark as loop head. (Even if it's only a fall back when we don't find a true loop.)
+      loop_head_stack.push_back(bb->id);
+      max_nested_loops = std::max(max_nested_loops, loop_head_stack.size());
     }
 
     DCHECK_EQ(bb->hidden, false);
     DCHECK_EQ(bb->visited, false);
-
-    // We've visited all the predecessors. So, we can visit bb.
     bb->visited = true;
 
     // Now add the basic block.
+    uint16_t idx = static_cast<uint16_t>(topological_order_->Size());
+    topological_order_indexes_->Put(bb->id, idx);
     topological_order_->Insert(bb->id);
 
-    // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
+    // Update visited_cnt_values for children.
     ChildBlockIterator succIter(bb, this);
     BasicBlock* successor = succIter.Next();
     for ( ; successor != nullptr; successor = succIter.Next()) {
-      if (successor->visited || successor->hidden) {
+      if (successor->hidden) {
         continue;
       }
 
-      // one more predecessor was visited.
-      DCHECK_NE(visited_cnt_values[successor->id], 0u);
-      visited_cnt_values[successor->id] -= 1u;
-      if (visited_cnt_values[successor->id] == 0u) {
-        q.push(successor);
+      // One more predecessor was visited.
+      visited_cnt_values[successor->id] += 1u;
+      if (visited_cnt_values[successor->id] == successor->predecessors->Size()) {
+        if (loop_head_stack.empty() ||
+            loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(successor->id)) {
+          q.push(successor);
+        } else {
+          DCHECK(!loop_exit_blocks.IsBitSet(successor->id));
+          loop_exit_blocks.SetBit(successor->id);
+        }
       }
     }
   }
+
+  // Prepare the loop head stack for iteration.
+  topological_order_loop_head_stack_ =
+      new (arena_) GrowableArray<std::pair<uint16_t, bool>>(arena_, max_nested_loops);
 }
 
 bool BasicBlock::IsExceptionBlock() const {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 1556a19..768ae21 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -27,6 +27,7 @@
 #include "mir_method_info.h"
 #include "utils/arena_bit_vector.h"
 #include "utils/growable_array.h"
+#include "utils/scoped_arena_containers.h"
 #include "reg_location.h"
 #include "reg_storage.h"
 
@@ -689,6 +690,21 @@
     return topological_order_;
   }
 
+  GrowableArray<BasicBlockId>* GetTopologicalSortOrderLoopEnds() {
+    DCHECK(topological_order_loop_ends_ != nullptr);
+    return topological_order_loop_ends_;
+  }
+
+  GrowableArray<BasicBlockId>* GetTopologicalSortOrderIndexes() {
+    DCHECK(topological_order_indexes_ != nullptr);
+    return topological_order_indexes_;
+  }
+
+  GrowableArray<std::pair<uint16_t, bool>>* GetTopologicalSortOrderLoopHeadStack() {
+    DCHECK(topological_order_loop_head_stack_ != nullptr);
+    return topological_order_loop_head_stack_;
+  }
+
   bool IsConst(int32_t s_reg) const {
     return is_constant_v_->IsBitSet(s_reg);
   }
@@ -727,7 +743,7 @@
       * would be filtered out with current settings.  When orig_sreg field is removed
       * from RegLocation, expand s_reg_low to handle all possible cases and remove DCHECK().
       */
-    DCHECK_EQ(new_num, static_cast<int16_t>(new_num));
+    CHECK_EQ(new_num, static_cast<int16_t>(new_num));
     num_ssa_regs_ = new_num;
   }
 
@@ -1132,6 +1148,14 @@
   GrowableArray<BasicBlockId>* dfs_post_order_;
   GrowableArray<BasicBlockId>* dom_post_order_traversal_;
   GrowableArray<BasicBlockId>* topological_order_;
+  // Indexes in topological_order_ need to be only as big as the BasicBlockId.
+  COMPILE_ASSERT(sizeof(BasicBlockId) == sizeof(uint16_t), assuming_16_bit_BasicBlockId);
+  // For each loop head, remember the past-the-end index of the end of the loop. 0 if not loop head.
+  GrowableArray<uint16_t>* topological_order_loop_ends_;
+  // Map BB ids to topological_order_ indexes. 0xffff if not included (hidden or null block).
+  GrowableArray<uint16_t>* topological_order_indexes_;
+  // Stack of the loop head indexes and recalculation flags for RepeatingTopologicalSortIterator.
+  GrowableArray<std::pair<uint16_t, bool>>* topological_order_loop_head_stack_;
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // num_dalvik_register x num_blocks.
   std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
@@ -1177,6 +1201,7 @@
   friend class ClassInitCheckEliminationTest;
   friend class GlobalValueNumberingTest;
   friend class LocalValueNumberingTest;
+  friend class TopologicalSortOrderTest;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc
new file mode 100644
index 0000000..932f453
--- /dev/null
+++ b/compiler/dex/mir_graph_test.cc
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir_graph.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class TopologicalSortOrderTest : public testing::Test {
+ protected:
+  struct BBDef {
+    static constexpr size_t kMaxSuccessors = 4;
+    static constexpr size_t kMaxPredecessors = 4;
+
+    BBType type;
+    size_t num_successors;
+    BasicBlockId successors[kMaxPredecessors];
+    size_t num_predecessors;
+    BasicBlockId predecessors[kMaxPredecessors];
+  };
+
+#define DEF_SUCC0() \
+    0u, { }
+#define DEF_SUCC1(s1) \
+    1u, { s1 }
+#define DEF_SUCC2(s1, s2) \
+    2u, { s1, s2 }
+#define DEF_SUCC3(s1, s2, s3) \
+    3u, { s1, s2, s3 }
+#define DEF_SUCC4(s1, s2, s3, s4) \
+    4u, { s1, s2, s3, s4 }
+#define DEF_PRED0() \
+    0u, { }
+#define DEF_PRED1(p1) \
+    1u, { p1 }
+#define DEF_PRED2(p1, p2) \
+    2u, { p1, p2 }
+#define DEF_PRED3(p1, p2, p3) \
+    3u, { p1, p2, p3 }
+#define DEF_PRED4(p1, p2, p3, p4) \
+    4u, { p1, p2, p3, p4 }
+#define DEF_BB(type, succ, pred) \
+    { type, succ, pred }
+
+  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
+    cu_.mir_graph->block_id_map_.clear();
+    cu_.mir_graph->block_list_.Reset();
+    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
+    ASSERT_EQ(kNullBlock, defs[0].type);
+    ASSERT_EQ(kEntryBlock, defs[1].type);
+    ASSERT_EQ(kExitBlock, defs[2].type);
+    for (size_t i = 0u; i != count; ++i) {
+      const BBDef* def = &defs[i];
+      BasicBlock* bb = cu_.mir_graph->NewMemBB(def->type, i);
+      cu_.mir_graph->block_list_.Insert(bb);
+      if (def->num_successors <= 2) {
+        bb->successor_block_list_type = kNotUsed;
+        bb->successor_blocks = nullptr;
+        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
+        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
+      } else {
+        bb->successor_block_list_type = kPackedSwitch;
+        bb->fall_through = 0u;
+        bb->taken = 0u;
+        bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
+            &cu_.arena, def->num_successors, kGrowableArraySuccessorBlocks);
+        for (size_t j = 0u; j != def->num_successors; ++j) {
+          SuccessorBlockInfo* successor_block_info =
+              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
+                                                               kArenaAllocSuccessor));
+          successor_block_info->block = j;
+          successor_block_info->key = 0u;  // Not used by class init check elimination.
+          bb->successor_blocks->Insert(successor_block_info);
+        }
+      }
+      bb->predecessors = new (&cu_.arena) GrowableArray<BasicBlockId>(
+          &cu_.arena, def->num_predecessors, kGrowableArrayPredecessors);
+      for (size_t j = 0u; j != def->num_predecessors; ++j) {
+        ASSERT_NE(0u, def->predecessors[j]);
+        bb->predecessors->Insert(def->predecessors[j]);
+      }
+      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
+        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
+            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
+      }
+    }
+    cu_.mir_graph->num_blocks_ = count;
+    ASSERT_EQ(count, cu_.mir_graph->block_list_.Size());
+    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_.Get(1);
+    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
+    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_.Get(2);
+    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
+  }
+
+  template <size_t count>
+  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
+    DoPrepareBasicBlocks(defs, count);
+  }
+
+  void ComputeTopologicalSortOrder() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
+    ASSERT_NE(cu_.mir_graph->topological_order_, nullptr);
+    ASSERT_NE(cu_.mir_graph->topological_order_loop_ends_, nullptr);
+    ASSERT_NE(cu_.mir_graph->topological_order_indexes_, nullptr);
+    ASSERT_EQ(cu_.mir_graph->GetNumBlocks(), cu_.mir_graph->topological_order_indexes_->Size());
+    for (size_t i = 0, size = cu_.mir_graph->GetTopologicalSortOrder()->Size(); i != size; ++i) {
+      ASSERT_LT(cu_.mir_graph->topological_order_->Get(i), cu_.mir_graph->GetNumBlocks());
+      BasicBlockId id = cu_.mir_graph->topological_order_->Get(i);
+      EXPECT_EQ(i, cu_.mir_graph->topological_order_indexes_->Get(id));
+    }
+  }
+
+  void DoCheckOrder(const BasicBlockId* ids, size_t count) {
+    ASSERT_EQ(count, cu_.mir_graph->GetTopologicalSortOrder()->Size());
+    for (size_t i = 0; i != count; ++i) {
+      EXPECT_EQ(ids[i], cu_.mir_graph->GetTopologicalSortOrder()->Get(i)) << i;
+    }
+  }
+
+  template <size_t count>
+  void CheckOrder(const BasicBlockId (&ids)[count]) {
+    DoCheckOrder(ids, count);
+  }
+
+  void DoCheckLoopEnds(const uint16_t* ends, size_t count) {
+    for (size_t i = 0; i != count; ++i) {
+      ASSERT_LT(i, cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Size());
+      EXPECT_EQ(ends[i], cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Get(i)) << i;
+    }
+  }
+
+  template <size_t count>
+  void CheckLoopEnds(const uint16_t (&ends)[count]) {
+    DoCheckLoopEnds(ends, count);
+  }
+
+  TopologicalSortOrderTest()
+      : pool_(),
+        cu_(&pool_) {
+    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
+  }
+
+  ArenaPool pool_;
+  CompilationUnit cu_;
+};
+
+TEST_F(TopologicalSortOrderTest, DoWhile) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 0, 3, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, While) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED2(1, 4)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(3)),     // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 3, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, WhileWithTwoBackEdges) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED3(1, 4, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED1(3)),     // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)),        // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 4, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoop) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED2(1, 6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED2(3, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to 4.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)),            // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 7, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 5, 4, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoopHeadLoops) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 4)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED2(3, 5)),      // Nested head, loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to 4.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 4, 4, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoopSameBackBranchBlock) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 3), DEF_PRED1(4)),         // Loops to 4 and 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 4, 4, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, TwoReorderedInnerLoops) {
+  // This is a simplified version of real code graph where the branch from 8 to 5 must prevent
+  // the block 5 from being considered a loop head before processing the loop 7-8.
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(9)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 9), DEF_PRED2(1, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 7), DEF_PRED1(3)),         // Branch over loop in 5.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 3), DEF_PRED3(4, 6, 8)),   // Loops to 4; inner loop.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),            // Loops to 5.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(8), DEF_PRED2(4, 8)),         // Loop head.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 5), DEF_PRED1(7)),         // Loops to 7; branches to 5.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 7, 8, 5, 6, 9, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 7, 0, 5, 0, 7, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoopWithBackEdgeAfterOuterLoopBackEdge) {
+  // This is a simplified version of real code graph. The back-edge from 7 to the inner
+  // loop head 4 comes after the back-edge from 6 to the outer loop head 3. To make this
+  // appear a bit more complex, there's also a back-edge from 5 to 4.
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED2(1, 6)),         // Outer loop head.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED3(3, 5, 7)),   // Inner loop head.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to inner loop head 4.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 3), DEF_PRED1(4)),         // Loops to outer loop head 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(2, 4), DEF_PRED1(6)),         // Loops to inner loop head 4.
+  };
+  const BasicBlockId expected_order[] = {
+      // NOTE: The 5 goes before 6 only because 5 is a "fall-through" from 4 while 6 is "taken".
+      1, 3, 4, 5, 6, 7, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 6, 6, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, LoopWithTwoEntryPoints) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 6)),  // Fall-back block is chosen as
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED2(3, 4)),  // the earlier from these two.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(6)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 7, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 0, 5, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+}  // namespace art
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 869c48f..5c98654 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -321,7 +321,7 @@
     return true;
   }
   // Don't do a separate LVN if we did the GVN.
-  bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0;
+  bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1u << kGlobalValueNumbering)) != 0u;
   std::unique_ptr<ScopedArenaAllocator> allocator;
   std::unique_ptr<GlobalValueNumbering> global_valnum;
   std::unique_ptr<LocalValueNumbering> local_valnum;
@@ -737,11 +737,9 @@
   ArenaBitVector* ssa_regs_to_check = temp_bit_vector_;
   if (do_nce) {
     /*
-     * Set initial state.  Be conservative with catch
-     * blocks and start with no assumptions about null check
-     * status (except for "this").
+     * Set initial state. Catch blocks don't need any special treatment.
      */
-    if ((bb->block_type == kEntryBlock) | bb->catch_entry) {
+    if (bb->block_type == kEntryBlock) {
       ssa_regs_to_check->ClearAllBits();
       // Assume all ins are objects.
       for (uint16_t in_reg = cu_->num_dalvik_registers - cu_->num_ins;
@@ -1047,12 +1045,11 @@
   }
 
   /*
-   * Set initial state.  Be conservative with catch
-   * blocks and start with no assumptions about class init check status.
+   * Set initial state.  Catch blocks don't need any special treatment.
    */
   ArenaBitVector* classes_to_check = temp_bit_vector_;
   DCHECK(classes_to_check != nullptr);
-  if ((bb->block_type == kEntryBlock) | bb->catch_entry) {
+  if (bb->block_type == kEntryBlock) {
     classes_to_check->SetInitialBits(temp_bit_vector_size_);
   } else if (bb->predecessors->Size() == 1) {
     BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0));
@@ -1142,11 +1139,7 @@
 }
 
 bool MIRGraph::ApplyGlobalValueNumberingGate() {
-  if ((cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0) {
-    return false;
-  }
-
-  if ((merged_df_flags_ & DF_LVN) == 0) {
+  if ((cu_->disable_opt & (1u << kGlobalValueNumbering)) != 0u) {
     return false;
   }
 
@@ -1182,7 +1175,7 @@
           lvn->GetValueNumber(mir);
         }
         bool change = temp_gvn_->FinishBasicBlock(bb);
-        DCHECK(!change);
+        DCHECK(!change) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
       }
     }
   } else {
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 8c70b5c..c510b52 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -195,7 +195,7 @@
     cu_.mir_graph->SSATransformationEnd();
     bool gate_result = cu_.mir_graph->EliminateClassInitChecksGate();
     ASSERT_TRUE(gate_result);
-    RepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
+    LoopRepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
     bool change = false;
     for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateClassInitChecks(bb);
@@ -373,30 +373,47 @@
   static const SFieldDef sfields[] = {
       { 0u, 1u, 0u, 0u },
       { 1u, 1u, 1u, 1u },
+      { 2u, 1u, 2u, 2u },
+      { 3u, 1u, 3u, 3u },
   };
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
       DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)),
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),  // Catch handler.
-      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),     // The top.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // The throwing insn.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Catch handler.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // The merged block.
   };
   static const MIRDef mirs[] = {
-      DEF_MIR(Instruction::SGET, 3u, 0u),
-      DEF_MIR(Instruction::SGET, 3u, 1u),
-      DEF_MIR(Instruction::SGET, 4u, 1u),
-      DEF_MIR(Instruction::SGET, 5u, 0u),  // Not eliminated.
-      DEF_MIR(Instruction::SGET, 5u, 1u),  // Eliminated.
+      DEF_MIR(Instruction::SGET, 3u, 0u),  // Before the exception edge.
+      DEF_MIR(Instruction::SGET, 3u, 1u),  // Before the exception edge.
+      DEF_MIR(Instruction::SGET, 4u, 2u),  // After the exception edge.
+      DEF_MIR(Instruction::SGET, 4u, 3u),  // After the exception edge.
+      DEF_MIR(Instruction::SGET, 5u, 0u),  // In catch handler; class init check eliminated.
+      DEF_MIR(Instruction::SGET, 5u, 2u),  // In catch handler; class init check not eliminated.
+      DEF_MIR(Instruction::SGET, 6u, 0u),  // Class init check eliminated.
+      DEF_MIR(Instruction::SGET, 6u, 1u),  // Class init check eliminated.
+      DEF_MIR(Instruction::SGET, 6u, 2u),  // Class init check eliminated.
+      DEF_MIR(Instruction::SGET, 6u, 3u),  // Class init check not eliminated.
   };
   static const bool expected_ignore_clinit_check[] = {
-      false, false, false, false, true
+      false, false, false, false, true, false, true, true, true, false
   };
 
   PrepareSFields(sfields);
   PrepareBasicBlocks(bbs);
-  BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(4u);
+  BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u);
   catch_handler->catch_entry = true;
+  // Add successor block info to the check block.
+  BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
+  check_bb->successor_block_list_type = kCatch;
+  check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
+      &cu_.arena, 2, kGrowableArraySuccessorBlocks);
+  SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
+      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+  successor_block_info->block = catch_handler->id;
+  check_bb->successor_blocks->Insert(successor_block_info);
   PrepareMIRs(mirs);
   PerformClassInitCheckElimination();
   ASSERT_EQ(arraysize(expected_ignore_clinit_check), mir_count_);
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
index 031c5cf..133593c 100644
--- a/compiler/dex/pass_driver_me.h
+++ b/compiler/dex/pass_driver_me.h
@@ -68,6 +68,9 @@
       case kRepeatingTopologicalSortTraversal:
         DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass);
         break;
+      case kLoopRepeatingTopologicalSortTraversal:
+        DoWalkBasicBlocks<LoopRepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass);
+        break;
       case kAllNodes:
         DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
         break;
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
index ff69865..c7276eb 100644
--- a/compiler/dex/pass_me.h
+++ b/compiler/dex/pass_me.h
@@ -55,6 +55,7 @@
   kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
   kTopologicalSortTraversal,               /**< @brief Topological Order traversal. */
   kRepeatingTopologicalSortTraversal,      /**< @brief Repeating Topological Order traversal. */
+  kLoopRepeatingTopologicalSortTraversal,  /**< @brief Loop-repeating Topological Order traversal. */
   kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
 };
 
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 6b96e92..5059c5f 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -358,7 +358,7 @@
    */
   bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
   NewLIR0(kPseudoMethodEntry);
-  constexpr size_t kStackOverflowReservedUsableBytes = kArmStackOverflowReservedBytes -
+  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm) -
       Thread::kStackOverflowSignalReservedBytes;
   bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
   if (!skip_overflow_check) {
@@ -381,7 +381,7 @@
       // This is done before the callee save instructions to avoid any possibility
       // of these overflowing.  This uses r12 and that's never saved in a callee
       // save.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, kArmStackOverflowReservedBytes);
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, GetStackOverflowReservedBytes(kArm));
       Load32Disp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
     }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 582af51..fa252a1 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -110,6 +110,8 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
+    bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
     bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
     bool GenInlinedSqrt(CallInfo* info);
@@ -136,6 +138,9 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
+    void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                          int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                          int dest_reg_class) OVERRIDE;
     bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     void GenMonitorExit(int opt_flags, RegLocation rl_src);
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index e06d814..dcb8857 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -338,6 +338,60 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
+  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
+  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
+      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
+    return kCoreReg;
+  }
+  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
+  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
+    return kFPReg;
+  }
+  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
+  // is faster using a core reg or fp reg depends on the particular CPU. Without further
+  // investigation and testing we prefer core register. (If the result is subsequently used in
+  // another fp operation, the dalvik reg will probably get promoted and that should be handled
+  // by the cases above.)
+  return kCoreReg;
+}
+
+bool ArmMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLoc(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValue(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(kThumb2Vabss, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
+  }
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool ArmMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLocWide(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValueWide(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(kThumb2Vabsd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
+  }
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
 bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   RegLocation rl_src = info->args[0];
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2fcc3a5..a85b740 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -203,6 +203,30 @@
   OpCmpImmBranch(ccode, low_reg, val_lo, taken);
 }
 
+void ArmMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                                  int dest_reg_class) {
+  // TODO: Generalize the IT below to accept more than one-instruction loads.
+  DCHECK(InexpensiveConstantInt(true_val));
+  DCHECK(InexpensiveConstantInt(false_val));
+
+  if ((true_val == 0 && code == kCondEq) ||
+      (false_val == 0 && code == kCondNe)) {
+    OpRegRegReg(kOpSub, rs_dest, left_op, right_op);
+    DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
+    LIR* it = OpIT(kCondNe, "");
+    LoadConstant(rs_dest, code == kCondEq ? false_val : true_val);
+    OpEndIT(it);
+    return;
+  }
+
+  OpRegReg(kOpCmp, left_op, right_op);  // Same?
+  LIR* it = OpIT(code, "E");   // if-convert the test
+  LoadConstant(rs_dest, true_val);      // .eq case - load true
+  LoadConstant(rs_dest, false_val);     // .eq case - load true
+  OpEndIT(it);
+}
+
 void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 9cbf7b8..9bb9dda 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1107,7 +1107,7 @@
     // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr
     // in LDREXD and recalculate it from r_base.
     RegStorage r_temp = AllocTemp();
-    RegStorage r_temp_high = AllocFreeTemp();  // We may not have another temp.
+    RegStorage r_temp_high = AllocTemp(false);  // We may not have another temp.
     if (r_temp_high.Valid()) {
       NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
       FreeTemp(r_temp_high);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index d946ee3..5e95500 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -330,8 +330,8 @@
 
   NewLIR0(kPseudoMethodEntry);
 
-  constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes -
-        Thread::kStackOverflowSignalReservedBytes;
+  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64) -
+      Thread::kStackOverflowSignalReservedBytes;
   const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
   const int spill_count = num_core_spills_ + num_fp_spills_;
   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index f51145c..8d15326 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -175,6 +175,7 @@
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
     bool GenInlinedReverseBits(CallInfo* info, OpSize size);
+    bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
     bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
     bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
@@ -204,7 +205,14 @@
     void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
-    void GenSelect(BasicBlock* bb, MIR* mir);
+    void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
+    void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                          int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                          int dest_reg_class) OVERRIDE;
+    // Helper used in the above two.
+    void GenSelect(int32_t left, int32_t right, ConditionCode code, RegStorage rs_dest,
+                   int result_reg_class);
+
     bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     void GenMonitorExit(int opt_flags, RegLocation rl_src);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 6594c4b..175cef1 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -323,12 +323,57 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
+  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
+  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
+      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
+    return kCoreReg;
+  }
+  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
+  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
+    return kFPReg;
+  }
+  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
+  // is faster using a core reg or fp reg depends on the particular CPU. For example, on A53
+  // it's faster using core reg while on A57 it's faster with fp reg, the difference being
+  // bigger on the A53. Without further investigation and testing we prefer core register.
+  // (If the result is subsequently used in another fp operation, the dalvik reg will probably
+  // get promoted and that should be handled by the cases above.)
+  return kCoreReg;
+}
+
+bool Arm64Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLoc(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValue(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(kA64Fabs2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    NewLIR4(kA64Ubfm4rrdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 30);
+  }
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
 bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTargetWide(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLocWide(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValueWide(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(FWIDE(kA64Fabs2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
+  }
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 2b78e81..aed8de8 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -85,141 +85,129 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  RegLocation rl_result;
-  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
-  RegLocation rl_dest = mir_graph_->GetDest(mir);
-  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
-  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+static constexpr bool kUseDeltaEncodingInGenSelect = false;
 
-  rl_src = LoadValue(rl_src, src_reg_class);
+void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode,
+                             RegStorage rs_dest, int result_reg_class) {
+  if (false_val == 0 ||               // 0 is better as first operand.
+      true_val == 1 ||                // Potentially Csinc.
+      true_val == -1 ||               // Potentially Csinv.
+      true_val == false_val + 1) {    // Potentially Csinc.
+    ccode = NegateComparison(ccode);
+    std::swap(true_val, false_val);
+  }
+
+  ArmConditionCode code = ArmConditionEncoding(ccode);
+
+  int opcode;                                      // The opcode.
+  RegStorage left_op = RegStorage::InvalidReg();   // The operands.
+  RegStorage right_op = RegStorage::InvalidReg();  // The operands.
+
+  bool is_wide = rs_dest.Is64Bit();
+
+  RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr;
+
+  if (true_val == 0) {
+    left_op = zero_reg;
+  } else {
+    left_op = rs_dest;
+    LoadConstantNoClobber(rs_dest, true_val);
+  }
+  if (false_val == 1) {
+    right_op = zero_reg;
+    opcode = kA64Csinc4rrrc;
+  } else if (false_val == -1) {
+    right_op = zero_reg;
+    opcode = kA64Csinv4rrrc;
+  } else if (false_val == true_val + 1) {
+    right_op = left_op;
+    opcode = kA64Csinc4rrrc;
+  } else if (false_val == -true_val) {
+    right_op = left_op;
+    opcode = kA64Csneg4rrrc;
+  } else if (false_val == ~true_val) {
+    right_op = left_op;
+    opcode = kA64Csinv4rrrc;
+  } else if (true_val == 0) {
+    // left_op is zero_reg.
+    right_op = rs_dest;
+    LoadConstantNoClobber(rs_dest, false_val);
+    opcode = kA64Csel4rrrc;
+  } else {
+    // Generic case.
+    RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
+    if (is_wide) {
+      if (t_reg2.Is32Bit()) {
+        t_reg2 = As64BitReg(t_reg2);
+      }
+    } else {
+      if (t_reg2.Is64Bit()) {
+        t_reg2 = As32BitReg(t_reg2);
+      }
+    }
+
+    if (kUseDeltaEncodingInGenSelect) {
+      int32_t delta = false_val - true_val;
+      uint32_t abs_val = delta < 0 ? -delta : delta;
+
+      if (abs_val < 0x1000) {  // TODO: Replace with InexpensiveConstant with opcode.
+        // Can encode as immediate to an add.
+        right_op = t_reg2;
+        OpRegRegImm(kOpAdd, t_reg2, left_op, delta);
+      }
+    }
+
+    // Load as constant.
+    if (!right_op.Valid()) {
+      LoadConstantNoClobber(t_reg2, false_val);
+      right_op = t_reg2;
+    }
+
+    opcode = kA64Csel4rrrc;
+  }
+
+  DCHECK(left_op.Valid() && right_op.Valid());
+  NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(),
+      code);
+}
+
+void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                                    int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                                    int dest_reg_class) {
+  DCHECK(rs_dest.Valid());
+  OpRegReg(kOpCmp, left_op, right_op);
+  GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
+}
+
+void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
   // rl_src may be aliased with rl_result/rl_dest, so do compare early.
   OpRegImm(kOpCmp, rl_src.reg, 0);
 
-  ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode);
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
 
   // The kMirOpSelect has two variants, one for constants and one for moves.
-  bool is_wide = rl_dest.ref || rl_dest.wide;
-
   if (mir->ssa_rep->num_uses == 1) {
-    uint32_t true_val = mir->dalvikInsn.vB;
-    uint32_t false_val = mir->dalvikInsn.vC;
-
-    int opcode;             // The opcode.
-    int left_op, right_op;  // The operands.
-    bool rl_result_evaled = false;
-
-    // Check some simple cases.
-    // TODO: Improve this.
-    int zero_reg = (is_wide ? rs_xzr : rs_wzr).GetReg();
-
-    if ((true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0)) {
-      // CSInc cheap based on wzr.
-      if (true_val == 1) {
-        // Negate.
-        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
-      }
-
-      left_op = right_op = zero_reg;
-      opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
-    } else if ((true_val == 0 && false_val == 0xFFFFFFFF) ||
-               (true_val == 0xFFFFFFFF && false_val == 0)) {
-      // CSneg cheap based on wzr.
-      if (true_val == 0xFFFFFFFF) {
-        // Negate.
-        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
-      }
-
-      left_op = right_op = zero_reg;
-      opcode = is_wide ? WIDE(kA64Csinv4rrrc) : kA64Csinv4rrrc;
-    } else if (true_val == 0 || false_val == 0) {
-      // Csel half cheap based on wzr.
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-      rl_result_evaled = true;
-      if (false_val == 0) {
-        // Negate.
-        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
-      }
-      LoadConstantNoClobber(rl_result.reg, true_val == 0 ? false_val : true_val);
-      left_op = zero_reg;
-      right_op = rl_result.reg.GetReg();
-      opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
-    } else if (true_val == 1 || false_val == 1) {
-      // CSInc half cheap based on wzr.
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-      rl_result_evaled = true;
-      if (true_val == 1) {
-        // Negate.
-        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
-      }
-      LoadConstantNoClobber(rl_result.reg, true_val == 1 ? false_val : true_val);
-      left_op = rl_result.reg.GetReg();
-      right_op = zero_reg;
-      opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
-    } else if (true_val == 0xFFFFFFFF || false_val == 0xFFFFFFFF) {
-      // CSneg half cheap based on wzr.
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-      rl_result_evaled = true;
-      if (true_val == 0xFFFFFFFF) {
-        // Negate.
-        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
-      }
-      LoadConstantNoClobber(rl_result.reg, true_val == 0xFFFFFFFF ? false_val : true_val);
-      left_op = rl_result.reg.GetReg();
-      right_op = zero_reg;
-      opcode = is_wide ? WIDE(kA64Csinv4rrrc) : kA64Csinv4rrrc;
-    } else if ((true_val + 1 == false_val) || (false_val + 1 == true_val)) {
-      // Load a constant and use CSinc. Use rl_result.
-      if (false_val + 1 == true_val) {
-        // Negate.
-        code = ArmConditionEncoding(NegateComparison(mir->meta.ccode));
-        true_val = false_val;
-      }
-
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-      rl_result_evaled = true;
-      LoadConstantNoClobber(rl_result.reg, true_val);
-      left_op = right_op = rl_result.reg.GetReg();
-      opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc;
-    } else {
-      // Csel. The rest. Use rl_result and a temp.
-      // TODO: To minimize the constants being loaded, check whether one can be inexpensively
-      //       loaded as n - 1 or ~n.
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-      rl_result_evaled = true;
-      LoadConstantNoClobber(rl_result.reg, true_val);
-      RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
-      if (rl_dest.wide) {
-        if (t_reg2.Is32Bit()) {
-          t_reg2 = As64BitReg(t_reg2);
-        }
-      }
-      LoadConstantNoClobber(t_reg2, false_val);
-
-      // Use csel.
-      left_op = rl_result.reg.GetReg();
-      right_op = t_reg2.GetReg();
-      opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
-    }
-
-    if (!rl_result_evaled) {
-      rl_result = EvalLoc(rl_dest, result_reg_class, true);
-    }
-
-    NewLIR4(opcode, rl_result.reg.GetReg(), left_op, right_op, code);
+    RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
+    GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg,
+              rl_dest.ref ? kRefReg : kCoreReg);
+    StoreValue(rl_dest, rl_result);
   } else {
     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
 
+    RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
     rl_true = LoadValue(rl_true, result_reg_class);
     rl_false = LoadValue(rl_false, result_reg_class);
-    rl_result = EvalLoc(rl_dest, result_reg_class, true);
+    RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
+    bool is_wide = rl_dest.ref || rl_dest.wide;
     int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
     NewLIR4(opcode, rl_result.reg.GetReg(),
-            rl_true.reg.GetReg(), rl_false.reg.GetReg(), code);
+            rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode));
+    StoreValue(rl_dest, rl_result);
   }
-  StoreValue(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 502859a..adc228c 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1166,7 +1166,6 @@
     LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
   }
 
-  LIR* ne_branchover = NULL;
   // FIXME: what should we be comparing here? compressed or decompressed references?
   if (cu_->instruction_set == kThumb2) {
     OpRegReg(kOpCmp, check_class, object_class);  // Same?
@@ -1174,14 +1173,10 @@
     LoadConstant(result_reg, 1);     // .eq case - load true
     OpEndIT(it);
   } else {
-    ne_branchover = OpCmpBranch(kCondNe, check_class, object_class, NULL);
-    LoadConstant(result_reg, 1);     // eq case - load true
+    GenSelectConst32(check_class, object_class, kCondEq, 1, 0, result_reg, kCoreReg);
   }
   LIR* target = NewLIR0(kPseudoTargetLabel);
   null_branchover->target = target;
-  if (ne_branchover != NULL) {
-    ne_branchover->target = target;
-  }
   FreeTemp(object_class);
   FreeTemp(check_class);
   if (IsTemp(result_reg)) {
@@ -1196,15 +1191,14 @@
                                          bool can_assume_type_is_in_dex_cache,
                                          uint32_t type_idx, RegLocation rl_dest,
                                          RegLocation rl_src) {
-  // X86 has its own implementation.
-  DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
-
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
   RegStorage method_reg = TargetReg(kArg1, kRef);
   LoadCurrMethodDirect(method_reg);   // kArg1 <= current Method*
   RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*
+  RegStorage ref_reg = TargetReg(kArg0, kRef);  // kArg0 will hold the ref.
+  RegStorage ret_reg = GetReturn(kRefReg).reg;
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
@@ -1215,63 +1209,82 @@
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     }
-    OpRegCopy(class_reg, TargetReg(kRet0, kRef));  // Align usage with fast path
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
+    OpRegCopy(class_reg, ret_reg);  // Align usage with fast path
+    LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
   } else if (use_declaring_class) {
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
+    LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
     LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
+    if (can_assume_type_is_in_dex_cache) {
+      // Conditionally, as in the other case we will also load it.
+      LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
+    }
+
     // Load dex cache entry into class_reg (kArg2)
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  // kArg0 <= ref
     LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 class_reg, kNotVolatile);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
     LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
     if (!can_assume_type_is_in_dex_cache) {
-      // Need to test presence of type in dex cache at runtime
-      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
-      // Not resolved
-      // Call out to helper, which will return resolved type in kRet0
-      if (cu_->target64) {
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
-      } else {
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
-      }
-      OpRegCopy(TargetReg(kArg2, kRef), TargetReg(kRet0, kRef));  // Align usage with fast path
-      LoadValueDirectFixed(rl_src, TargetReg(kArg0, kRef));  /* reload Ref */
-      // Rejoin code paths
-      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
-      hop_branch->target = hop_target;
+      LIR* slow_path_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
+      LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+
+      // Should load value here.
+      LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
+
+      class InitTypeSlowPath : public Mir2Lir::LIRSlowPath {
+       public:
+        InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx,
+                         RegLocation rl_src)
+            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont), type_idx_(type_idx),
+              rl_src_(rl_src) {
+        }
+
+        void Compile() OVERRIDE {
+          GenerateTargetLabel();
+
+          if (cu_->target64) {
+            m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
+                                       true);
+          } else {
+            m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
+                                       true);
+          }
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kRef),
+                          m2l_->TargetReg(kRet0, kRef));  // Align usage with fast path
+
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+
+       private:
+        uint32_t type_idx_;
+        RegLocation rl_src_;
+      };
+
+      AddSlowPath(new (arena_) InitTypeSlowPath(this, slow_path_branch, slow_path_target,
+                                                type_idx, rl_src));
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
   RegLocation rl_result = GetReturn(kCoreReg);
-  if (cu_->instruction_set == kMips) {
-    // On MIPS rArg0 != rl_result, place false in result if branch is taken.
+  if (!IsSameReg(rl_result.reg, ref_reg)) {
+    // On MIPS and x86_64 rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
   }
-  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0, kRef), 0, NULL);
+  LIR* branch1 = OpCmpImmBranch(kCondEq, ref_reg, 0, NULL);
 
   /* load object->klass_ */
+  RegStorage ref_class_reg = TargetReg(kArg1, kRef);  // kArg1 will hold the Class* of ref.
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadRefDisp(TargetReg(kArg0, kRef), mirror::Object::ClassOffset().Int32Value(),
-              TargetReg(kArg1, kRef), kNotVolatile);
+  LoadRefDisp(ref_reg, mirror::Object::ClassOffset().Int32Value(),
+              ref_class_reg, kNotVolatile);
   /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */
   LIR* branchover = NULL;
   if (type_known_final) {
-    // rl_result == ref == null == 0.
-    if (cu_->instruction_set == kThumb2) {
-      OpRegReg(kOpCmp, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef));  // Same?
-      LIR* it = OpIT(kCondEq, "E");   // if-convert the test
-      LoadConstant(rl_result.reg, 1);     // .eq case - load true
-      LoadConstant(rl_result.reg, 0);     // .ne case - load false
-      OpEndIT(it);
-    } else {
-      LoadConstant(rl_result.reg, 0);     // ne case - load false
-      branchover = OpCmpBranch(kCondNe, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef), NULL);
-      LoadConstant(rl_result.reg, 1);     // eq case - load true
-    }
+    // rl_result == ref == class.
+    GenSelectConst32(ref_class_reg, class_reg, kCondEq, 1, 0, rl_result.reg,
+                     kCoreReg);
   } else {
     if (cu_->instruction_set == kThumb2) {
       RegStorage r_tgt = cu_->target64 ?
@@ -1280,11 +1293,11 @@
       LIR* it = nullptr;
       if (!type_known_abstract) {
       /* Uses conditional nullification */
-        OpRegReg(kOpCmp, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef));  // Same?
+        OpRegReg(kOpCmp, ref_class_reg, class_reg);  // Same?
         it = OpIT(kCondEq, "EE");   // if-convert the test
-        LoadConstant(TargetReg(kArg0, kNotWide), 1);     // .eq case - load true
+        LoadConstant(rl_result.reg, 1);     // .eq case - load true
       }
-      OpRegCopy(TargetReg(kArg0, kRef), TargetReg(kArg2, kRef));    // .ne case - arg0 <= class
+      OpRegCopy(ref_reg, class_reg);    // .ne case - arg0 <= class
       OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
       if (it != nullptr) {
         OpEndIT(it);
@@ -1296,12 +1309,13 @@
         LoadConstant(rl_result.reg, 1);     // assume true
         branchover = OpCmpBranch(kCondEq, TargetReg(kArg1, kRef), TargetReg(kArg2, kRef), NULL);
       }
-      RegStorage r_tgt = cu_->target64 ?
-          LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
-          LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
-      OpRegCopy(TargetReg(kArg0, kRef), TargetReg(kArg2, kRef));    // .ne case - arg0 <= class
-      OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
-      FreeTemp(r_tgt);
+
+      OpRegCopy(TargetReg(kArg0, kRef), class_reg);    // .ne case - arg0 <= class
+      if (cu_->target64) {
+        CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial), false);
+      } else {
+        CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial), false);
+      }
     }
   }
   // TODO: only clobber when type isn't final?
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 8ce6e1a..2c69593 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1261,7 +1261,9 @@
   bool is_finalizable;
   const DexFile* old_dex = cu_->dex_file;
   cu_->dex_file = ref_dex_file;
-  RegStorage reg_class = TargetPtrReg(kArg1);
+  RegStorage reg_class = TargetReg(kArg1, kRef);
+  Clobber(reg_class);
+  LockTemp(reg_class);
   if (!cu_->compiler_driver->CanEmbedTypeInCode(*ref_dex_file, type_idx, &unused_type_initialized,
                                                 &use_direct_type_ptr, &direct_type_ptr,
                                                 &is_finalizable) || is_finalizable) {
@@ -1296,11 +1298,19 @@
   RegStorage reg_disabled = AllocTemp();
   Load32Disp(reg_class, slow_path_flag_offset, reg_slow_path);
   Load32Disp(reg_class, disable_flag_offset, reg_disabled);
-  OpRegRegReg(kOpOr, reg_slow_path, reg_slow_path, reg_disabled);
+  FreeTemp(reg_class);
+  LIR* or_inst = OpRegRegReg(kOpOr, reg_slow_path, reg_slow_path, reg_disabled);
   FreeTemp(reg_disabled);
 
   // if slow path, jump to JNI path target
-  LIR* slow_path_branch = OpCmpImmBranch(kCondNe, reg_slow_path, 0, nullptr);
+  LIR* slow_path_branch;
+  if (or_inst->u.m.def_mask->HasBit(ResourceMask::kCCode)) {
+    // Generate conditional branch only, as the OR set a condition state (we are interested in a 'Z' flag).
+    slow_path_branch = OpCondBranch(kCondNe, nullptr);
+  } else {
+    // Generate compare and branch.
+    slow_path_branch = OpCmpImmBranch(kCondNe, reg_slow_path, 0, nullptr);
+  }
   FreeTemp(reg_slow_path);
 
   // slow path not enabled, simply load the referent of the reference object
@@ -1543,20 +1553,6 @@
   return true;
 }
 
-bool Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
 bool Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
   // Currently implemented only for ARM64
   return false;
@@ -1567,22 +1563,6 @@
   return false;
 }
 
-bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTargetWide(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  OpRegCopyWide(rl_result.reg, rl_src.reg);
-  OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
 bool Mir2Lir::GenInlinedFloatCvt(CallInfo* info) {
   if (cu_->instruction_set == kMips) {
     // TODO - add Mips implementation
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 4a06086..bb18ad2 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -110,6 +110,8 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
+    bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
     bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
     bool GenInlinedSqrt(CallInfo* info);
@@ -136,6 +138,9 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
+    void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                          int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                          int dest_reg_class) OVERRIDE;
     bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index c3a4c17..054514e 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -215,6 +215,18 @@
   }
 }
 
+void MipsMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                                   int dest_reg_class) {
+  // Implement as a branch-over.
+  // TODO: Conditional move?
+  LoadConstant(rs_dest, false_val);  // Favors false.
+  LIR* ne_branchover = OpCmpBranch(code, left_op, right_op, NULL);
+  LoadConstant(rs_dest, true_val);
+  LIR* target_label = NewLIR0(kPseudoTargetLabel);
+  ne_branchover->target = target_label;
+}
+
 void MipsMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
   UNIMPLEMENTED(FATAL) << "Need codegen for select";
 }
@@ -279,6 +291,16 @@
   return false;
 }
 
+bool MipsMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  // TODO - add Mips implementation
+  return false;
+}
+
+bool MipsMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  // TODO - add Mips implementation
+  return false;
+}
+
 bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info) {
   DCHECK_NE(cu_->instruction_set, kThumb2);
   return false;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 5b56633..f183dc9 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -32,6 +32,7 @@
 #include "utils/array_ref.h"
 #include "utils/arena_allocator.h"
 #include "utils/growable_array.h"
+#include "utils/stack_checks.h"
 
 namespace art {
 
@@ -205,36 +206,6 @@
 #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
 #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
 
-// Size of a frame that we definitely consider large. Anything larger than this should
-// definitely get a stack overflow check.
-static constexpr size_t kLargeFrameSize = 2 * KB;
-
-// Size of a frame that should be small. Anything leaf method smaller than this should run
-// without a stack overflow check.
-// The constant is from experience with frameworks code.
-static constexpr size_t kSmallFrameSize = 1 * KB;
-
-// Determine whether a frame is small or large, used in the decision on whether to elide a
-// stack overflow check on method entry.
-//
-// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
-// overflow-usable stack space.
-static constexpr bool IsLargeFrame(size_t size, InstructionSet isa) {
-  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
-}
-
-// We want to ensure that on all systems kSmallFrameSize will lead to false in IsLargeFrame.
-COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm),
-               kSmallFrameSize_is_not_a_small_frame_arm);
-COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm64),
-               kSmallFrameSize_is_not_a_small_frame_arm64);
-COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kMips),
-               kSmallFrameSize_is_not_a_small_frame_mips);
-COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86),
-               kSmallFrameSize_is_not_a_small_frame_x86);
-COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64),
-               kSmallFrameSize_is_not_a_small_frame_x64_64);
-
 class Mir2Lir : public Backend {
   public:
     static constexpr bool kFailOnSizeError = true && kIsDebugBuild;
@@ -745,14 +716,13 @@
     virtual RegStorage AllocPreservedSingle(int s_reg);
     virtual RegStorage AllocPreservedDouble(int s_reg);
     RegStorage AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required);
-    virtual RegStorage AllocFreeTemp();
-    virtual RegStorage AllocTemp();
-    virtual RegStorage AllocTempWide();
-    virtual RegStorage AllocTempRef();
-    virtual RegStorage AllocTempSingle();
-    virtual RegStorage AllocTempDouble();
-    virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    virtual RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
+    virtual RegStorage AllocTemp(bool required = true);
+    virtual RegStorage AllocTempWide(bool required = true);
+    virtual RegStorage AllocTempRef(bool required = true);
+    virtual RegStorage AllocTempSingle(bool required = true);
+    virtual RegStorage AllocTempDouble(bool required = true);
+    virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class, bool required = true);
+    virtual RegStorage AllocTypedTempWide(bool fp_hint, int reg_class, bool required = true);
     void FlushReg(RegStorage reg);
     void FlushRegWide(RegStorage reg);
     RegStorage AllocLiveReg(int s_reg, int reg_class, bool wide);
@@ -990,8 +960,8 @@
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
     virtual bool GenInlinedAbsLong(CallInfo* info);
-    virtual bool GenInlinedAbsFloat(CallInfo* info);
-    virtual bool GenInlinedAbsDouble(CallInfo* info);
+    virtual bool GenInlinedAbsFloat(CallInfo* info) = 0;
+    virtual bool GenInlinedAbsDouble(CallInfo* info) = 0;
     bool GenInlinedFloatCvt(CallInfo* info);
     bool GenInlinedDoubleCvt(CallInfo* info);
     virtual bool GenInlinedArrayCopyCharArray(CallInfo* info);
@@ -1381,6 +1351,13 @@
     virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
 
     /**
+     * @brief Generates code to select one of the given constants depending on the given opcode.
+     */
+    virtual void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                                  int dest_reg_class) = 0;
+
+    /**
      * @brief Used to generate a memory barrier in an architecture specific way.
      * @details The last generated LIR will be considered for use as barrier. Namely,
      * if the last LIR can be updated in a way where it will serve the semantics of
@@ -1536,11 +1513,23 @@
 
     void AddSlowPath(LIRSlowPath* slowpath);
 
-    virtual void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                            bool type_known_abstract, bool use_declaring_class,
-                                            bool can_assume_type_is_in_dex_cache,
-                                            uint32_t type_idx, RegLocation rl_dest,
-                                            RegLocation rl_src);
+    /*
+     *
+     * @brief Implement Set up instanceof a class.
+     * @param needs_access_check 'true' if we must check the access.
+     * @param type_known_final 'true' if the type is known to be a final class.
+     * @param type_known_abstract 'true' if the type is known to be an abstract class.
+     * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
+     * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
+     * @param type_idx Type index to use if use_declaring_class is 'false'.
+     * @param rl_dest Result to be set to 0 or 1.
+     * @param rl_src Object to be tested.
+     */
+    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                    bool type_known_abstract, bool use_declaring_class,
+                                    bool can_assume_type_is_in_dex_cache,
+                                    uint32_t type_idx, RegLocation rl_dest,
+                                    RegLocation rl_src);
     /*
      * @brief Generate the debug_frame FDE information if possible.
      * @returns pointer to vector containg CFE information, or NULL.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index fa1c36e..45244e1 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -408,64 +408,67 @@
   return RegStorage::InvalidReg();  // No register available
 }
 
-/* Return a temp if one is available, -1 otherwise */
-RegStorage Mir2Lir::AllocFreeTemp() {
-  return AllocTempBody(reg_pool_->core_regs_, &reg_pool_->next_core_reg_, false);
+RegStorage Mir2Lir::AllocTemp(bool required) {
+  return AllocTempBody(reg_pool_->core_regs_, &reg_pool_->next_core_reg_, required);
 }
 
-RegStorage Mir2Lir::AllocTemp() {
-  return AllocTempBody(reg_pool_->core_regs_, &reg_pool_->next_core_reg_, true);
-}
-
-RegStorage Mir2Lir::AllocTempWide() {
+RegStorage Mir2Lir::AllocTempWide(bool required) {
   RegStorage res;
   if (reg_pool_->core64_regs_.Size() != 0) {
-    res = AllocTempBody(reg_pool_->core64_regs_, &reg_pool_->next_core64_reg_, true);
+    res = AllocTempBody(reg_pool_->core64_regs_, &reg_pool_->next_core64_reg_, required);
   } else {
     RegStorage low_reg = AllocTemp();
     RegStorage high_reg = AllocTemp();
     res = RegStorage::MakeRegPair(low_reg, high_reg);
   }
-  CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kIgnoreRef, FPCheck::kCheckNotFP);
+  if (required) {
+    CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kIgnoreRef, FPCheck::kCheckNotFP);
+  }
   return res;
 }
 
-RegStorage Mir2Lir::AllocTempRef() {
-  RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true);
-  DCHECK(!res.IsPair());
-  CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
+RegStorage Mir2Lir::AllocTempRef(bool required) {
+  RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, required);
+  if (required) {
+    DCHECK(!res.IsPair());
+    CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
+  }
   return res;
 }
 
-RegStorage Mir2Lir::AllocTempSingle() {
-  RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, true);
-  DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
-  CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
+RegStorage Mir2Lir::AllocTempSingle(bool required) {
+  RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, required);
+  if (required) {
+    DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
+    CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
+  }
   return res;
 }
 
-RegStorage Mir2Lir::AllocTempDouble() {
-  RegStorage res = AllocTempBody(reg_pool_->dp_regs_, &reg_pool_->next_dp_reg_, true);
-  DCHECK(res.IsDouble()) << "Reg: 0x" << std::hex << res.GetRawBits();
-  CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
+RegStorage Mir2Lir::AllocTempDouble(bool required) {
+  RegStorage res = AllocTempBody(reg_pool_->dp_regs_, &reg_pool_->next_dp_reg_, required);
+  if (required) {
+    DCHECK(res.IsDouble()) << "Reg: 0x" << std::hex << res.GetRawBits();
+    CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
+  }
   return res;
 }
 
-RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class, bool required) {
   DCHECK_NE(reg_class, kRefReg);  // NOTE: the Dalvik width of a reference is always 32 bits.
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
+    return AllocTempDouble(required);
   }
-  return AllocTempWide();
+  return AllocTempWide(required);
 }
 
-RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
+RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class, bool required) {
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempSingle();
+    return AllocTempSingle(required);
   } else if (reg_class == kRefReg) {
-    return AllocTempRef();
+    return AllocTempRef(required);
   }
-  return AllocTemp();
+  return AllocTemp(required);
 }
 
 RegStorage Mir2Lir::FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg) {
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index ebe3f0a..efd9079 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -367,7 +367,11 @@
   EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E, SETS_CCODES|REG_USE0),
   EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F, SETS_CCODES|REG_USE0),
   EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Orpd,      0x66, 0x56, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Orps,      0x00, 0x56, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Andpd,     0x66, 0x54, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Andps,     0x00, 0x54, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Xorpd,     0x66, 0x57, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Xorps,     0x00, 0x57, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58, REG_DEF0_USE0),
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index cf4521a..49c0a03 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -173,6 +173,7 @@
   void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
   bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
   bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
+  bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
   bool GenInlinedSqrt(CallInfo* info);
   bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
   bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
@@ -202,6 +203,9 @@
   void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
   void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
   void GenSelect(BasicBlock* bb, MIR* mir);
+  void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                        int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                        int dest_reg_class) OVERRIDE;
   bool GenMemBarrier(MemBarrierKind barrier_kind);
   void GenMoveException(RegLocation rl_dest);
   void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
@@ -267,22 +271,6 @@
    */
   void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
                           RegLocation rl_src);
-  /*
-   *
-   * @brief Implement Set up instanceof a class with x86 specific code.
-   * @param needs_access_check 'true' if we must check the access.
-   * @param type_known_final 'true' if the type is known to be a final class.
-   * @param type_known_abstract 'true' if the type is known to be an abstract class.
-   * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
-   * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
-   * @param type_idx Type index to use if use_declaring_class is 'false'.
-   * @param rl_dest Result to be set to 0 or 1.
-   * @param rl_src Object to be tested.
-   */
-  void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                  bool type_known_abstract, bool use_declaring_class,
-                                  bool can_assume_type_is_in_dex_cache,
-                                  uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
 
   void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                       RegLocation rl_src1, RegLocation rl_shift);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index fc65deb..62053fd 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -705,4 +705,77 @@
   }
 }
 
+bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
+  if (is_double) {
+    RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
+    RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
+    RegLocation rl_dest = InlineTargetWide(info);
+    RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);
+
+    // Avoid src2 corruption by OpRegCopyWide.
+    if (rl_result.reg == rl_src2.reg) {
+        std::swap(rl_src2.reg, rl_src1.reg);
+    }
+
+    OpRegCopyWide(rl_result.reg, rl_src1.reg);
+    NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    // If either arg is NaN, return NaN.
+    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
+    // Min/Max branches.
+    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
+    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
+    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
+    NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
+    // Handle NaN.
+    branch_nan->target = NewLIR0(kPseudoTargetLabel);
+    LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
+    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
+    // Handle Min/Max. Copy greater/lesser value from src2.
+    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
+    OpRegCopyWide(rl_result.reg, rl_src2.reg);
+    // Right operand is already in result reg.
+    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
+    // Exit.
+    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
+    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
+    RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
+    RegLocation rl_dest = InlineTarget(info);
+    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+
+    // Avoid src2 corruption by OpRegCopyWide.
+    if (rl_result.reg == rl_src2.reg) {
+        std::swap(rl_src2.reg, rl_src1.reg);
+    }
+
+    OpRegCopy(rl_result.reg, rl_src1.reg);
+    NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    // If either arg is NaN, return NaN.
+    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
+    // Min/Max branches.
+    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
+    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
+    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
+    NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
+    // Handle NaN.
+    branch_nan->target = NewLIR0(kPseudoTargetLabel);
+    LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
+    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
+    // Handle Min/Max. Copy greater/lesser value from src2.
+    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
+    OpRegCopy(rl_result.reg, rl_src2.reg);
+    // Right operand is already in result reg.
+    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
+    // Exit.
+    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
+    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
+    StoreValue(rl_dest, rl_result);
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 3f1df18..724ee7e 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -206,6 +206,67 @@
   }
 }
 
+void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                                  int dest_reg_class) {
+  DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
+  DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
+
+  // We really need this check for correctness, otherwise we will need to do more checks in
+  // non zero/one case
+  if (true_val == false_val) {
+    LoadConstantNoClobber(rs_dest, true_val);
+    return;
+  }
+
+  const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
+
+  const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
+  if (zero_one_case && IsByteRegister(rs_dest)) {
+    if (!dest_intersect) {
+      LoadConstantNoClobber(rs_dest, 0);
+    }
+    OpRegReg(kOpCmp, left_op, right_op);
+    // Set the low byte of the result to 0 or 1 from the compare condition code.
+    NewLIR2(kX86Set8R, rs_dest.GetReg(),
+            X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
+    if (dest_intersect) {
+      NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
+    }
+  } else {
+    // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
+    // and it cannot use xor because it makes cc flags to be dirty
+    RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
+    if (temp_reg.Valid()) {
+      if (false_val == 0 && dest_intersect) {
+        code = FlipComparisonOrder(code);
+        std::swap(true_val, false_val);
+      }
+      if (!dest_intersect) {
+        LoadConstantNoClobber(rs_dest, false_val);
+      }
+      LoadConstantNoClobber(temp_reg, true_val);
+      OpRegReg(kOpCmp, left_op, right_op);
+      if (dest_intersect) {
+        LoadConstantNoClobber(rs_dest, false_val);
+        DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
+      }
+      OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
+      FreeTemp(temp_reg);
+    } else {
+      // slow path
+      LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
+      LoadConstantNoClobber(rs_dest, false_val);
+      LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
+      LIR* true_case = NewLIR0(kPseudoTargetLabel);
+      cmp_branch->target = true_case;
+      LoadConstantNoClobber(rs_dest, true_val);
+      LIR* end = NewLIR0(kPseudoTargetLabel);
+      that_is_it->target = end;
+    }
+  }
+}
+
 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
@@ -2399,110 +2460,6 @@
   StoreValue(rl_dest, rl_result);
 }
 
-void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                            bool type_known_abstract, bool use_declaring_class,
-                                            bool can_assume_type_is_in_dex_cache,
-                                            uint32_t type_idx, RegLocation rl_dest,
-                                            RegLocation rl_src) {
-  FlushAllRegs();
-  // May generate a call - use explicit registers.
-  LockCallTemps();
-  RegStorage method_reg = TargetReg(kArg1, kRef);  // kArg1 gets current Method*.
-  LoadCurrMethodDirect(method_reg);
-  RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*.
-  RegStorage ref_reg = TargetReg(kArg0, kRef);  // kArg2 will hold the ref.
-  // Reference must end up in kArg0.
-  if (needs_access_check) {
-    // Check we have access to type_idx and if not throw IllegalAccessError,
-    // Caller function returns Class* in kArg0.
-    if (cu_->target64) {
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
-                           type_idx, true);
-    } else {
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                           type_idx, true);
-    }
-    OpRegCopy(class_reg, TargetReg(kRet0, kRef));
-    LoadValueDirectFixed(rl_src, ref_reg);
-  } else if (use_declaring_class) {
-    LoadValueDirectFixed(rl_src, ref_reg);
-    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
-                class_reg, kNotVolatile);
-  } else {
-    // Load dex cache entry into class_reg (kArg2).
-    LoadValueDirectFixed(rl_src, ref_reg);
-    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
-                class_reg, kNotVolatile);
-    int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
-        (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
-    LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
-    if (!can_assume_type_is_in_dex_cache) {
-      // Need to test presence of type in dex cache at runtime.
-      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
-      // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
-      if (cu_->target64) {
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
-      } else {
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
-      }
-      OpRegCopy(class_reg, TargetReg(kRet0, kRef));  // Align usage with fast path.
-      LoadValueDirectFixed(rl_src, ref_reg);  /* Reload Ref. */
-      // Rejoin code paths
-      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
-      hop_branch->target = hop_target;
-    }
-  }
-  /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
-  RegLocation rl_result = GetReturn(kRefReg);
-
-  // On x86-64 kArg0 is not EAX, so we have to copy ref from kArg0 to EAX.
-  if (cu_->target64) {
-    OpRegCopy(rl_result.reg, ref_reg);
-  }
-
-  // For 32-bit, SETcc only works with EAX..EDX.
-  DCHECK_LT(rl_result.reg.GetRegNum(), 4);
-
-  // Is the class NULL?
-  LIR* branch1 = OpCmpImmBranch(kCondEq, ref_reg, 0, NULL);
-
-  RegStorage ref_class_reg = TargetReg(kArg1, kRef);  // kArg2 will hold the Class*.
-  /* Load object->klass_. */
-  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadRefDisp(ref_reg,  mirror::Object::ClassOffset().Int32Value(), ref_class_reg,
-              kNotVolatile);
-  /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */
-  LIR* branchover = nullptr;
-  if (type_known_final) {
-    // Ensure top 3 bytes of result are 0.
-    LoadConstant(rl_result.reg, 0);
-    OpRegReg(kOpCmp, ref_class_reg, class_reg);
-    // Set the low byte of the result to 0 or 1 from the compare condition code.
-    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondEq);
-  } else {
-    if (!type_known_abstract) {
-      LoadConstant(rl_result.reg, 1);     // Assume result succeeds.
-      branchover = OpCmpBranch(kCondEq, ref_class_reg, class_reg, NULL);
-    }
-    OpRegCopy(TargetReg(kArg0, kRef), class_reg);
-    if (cu_->target64) {
-      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
-    } else {
-      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
-    }
-  }
-  // TODO: only clobber when type isn't final?
-  ClobberCallerSave();
-  /* Branch targets here. */
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  StoreValue(rl_dest, rl_result);
-  branch1->target = target;
-  if (branchover != nullptr) {
-    branchover->target = target;
-  }
-}
-
 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_lhs, RegLocation rl_rhs) {
   OpKind op = kOpBkpt;
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 06001d7..451ae8b 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1225,19 +1225,12 @@
  * otherwise bails to standard library code.
  */
 bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers
-
-  // EAX: 16 bit character being searched.
-  // ECX: count: number of words to be searched.
-  // EDI: String being searched.
-  // EDX: temporary during execution.
-  // EBX or R11: temporary during execution (depending on mode).
-
   RegLocation rl_obj = info->args[0];
   RegLocation rl_char = info->args[1];
   RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
-  RegStorage tmpReg = cu_->target64 ? rs_r11 : rs_rBX;
+  // RBX is callee-save register in 64-bit mode.
+  RegStorage rs_tmp = cu_->target64 ? rs_r11 : rs_rBX;
+  int start_value = -1;
 
   uint32_t char_value =
     rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
@@ -1248,22 +1241,46 @@
   }
 
   // Okay, we are commited to inlining this.
+  // EAX: 16 bit character being searched.
+  // ECX: count: number of words to be searched.
+  // EDI: String being searched.
+  // EDX: temporary during execution.
+  // EBX or R11: temporary during execution (depending on mode).
+  // REP SCASW: search instruction.
+
+  FlushReg(rs_rAX);
+  Clobber(rs_rAX);
+  LockTemp(rs_rAX);
+  FlushReg(rs_rCX);
+  Clobber(rs_rCX);
+  LockTemp(rs_rCX);
+  FlushReg(rs_rDX);
+  Clobber(rs_rDX);
+  LockTemp(rs_rDX);
+  FlushReg(rs_tmp);
+  Clobber(rs_tmp);
+  LockTemp(rs_tmp);
+  if (cu_->target64) {
+    FlushReg(rs_rDI);
+    Clobber(rs_rDI);
+    LockTemp(rs_rDI);
+  }
+
   RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
 
   // Is the string non-NULL?
   LoadValueDirectFixed(rl_obj, rs_rDX);
   GenNullCheck(rs_rDX, info->opt_flags);
-  // uint32_t opt_flags = info->opt_flags;
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
 
-  // Does the character fit in 16 bits?
-  LIR* slowpath_branch = nullptr;
+  LIR *slowpath_branch = nullptr, *length_compare = nullptr;
+
+  // We need the value in EAX.
   if (rl_char.is_const) {
-    // We need the value in EAX.
     LoadConstantNoClobber(rs_rAX, char_value);
   } else {
-    // Character is not a constant; compare at runtime.
+    // Does the character fit in 16 bits? Compare it at runtime.
     LoadValueDirectFixed(rl_char, rs_rAX);
     slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
   }
@@ -1278,31 +1295,33 @@
   // Start of char data with array_.
   int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
-  // Character is in EAX.
-  // Object pointer is in EDX.
-
   // Compute the number of words to search in to rCX.
   Load32Disp(rs_rDX, count_offset, rs_rCX);
 
-  // Possible signal here due to null pointer dereference.
-  // Note that the signal handler will expect the top word of
-  // the stack to be the ArtMethod*.  If the PUSH edi instruction
-  // below is ahead of the load above then this will not be true
-  // and the signal handler will not work.
-  MarkPossibleNullPointerException(0);
+  if (!cu_->target64) {
+    // Possible signal here due to null pointer dereference.
+    // Note that the signal handler will expect the top word of
+    // the stack to be the ArtMethod*.  If the PUSH edi instruction
+    // below is ahead of the load above then this will not be true
+    // and the signal handler will not work.
+    MarkPossibleNullPointerException(0);
 
-  // We need to preserve EDI, but have no spare registers, so push it on the stack.
-  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
-  NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    // EDI is callee-save register in 32-bit mode.
+    NewLIR1(kX86Push32R, rs_rDI.GetReg());
+  }
 
-  LIR *length_compare = nullptr;
-  int start_value = 0;
-  bool is_index_on_stack = false;
   if (zero_based) {
+    // Start index is not present.
     // We have to handle an empty string.  Use special instruction JECXZ.
     length_compare = NewLIR0(kX86Jecxz8);
+
+    // Copy the number of words to search in a temporary register.
+    // We will use the register at the end to calculate result.
+    OpRegReg(kOpMov, rs_tmp, rs_rCX);
   } else {
+    // Start index is present.
     rl_start = info->args[2];
+
     // We have to offset by the start index.
     if (rl_start.is_const) {
       start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
@@ -1310,73 +1329,55 @@
 
       // Is the start > count?
       length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
+      OpRegImm(kOpMov, rs_rDI, start_value);
+
+      // Copy the number of words to search in a temporary register.
+      // We will use the register at the end to calculate result.
+      OpRegReg(kOpMov, rs_tmp, rs_rCX);
 
       if (start_value != 0) {
+        // Decrease the number of words to search by the start index.
         OpRegImm(kOpSub, rs_rCX, start_value);
       }
     } else {
-      // Runtime start index.
-      rl_start = UpdateLocTyped(rl_start, kCoreReg);
-      if (rl_start.location == kLocPhysReg) {
-        // Handle "start index < 0" case.
-        OpRegReg(kOpXor, tmpReg, tmpReg);
-        OpRegReg(kOpCmp, rl_start.reg, tmpReg);
-        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, tmpReg);
-
-        // The length of the string should be greater than the start index.
-        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
-        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
-        if (rl_start.reg == rs_rDI) {
-          // The special case. We will use EDI further, so lets put start index to stack.
-          NewLIR1(kX86Push32R, rs_rDI.GetReg());
-          is_index_on_stack = true;
-        }
-      } else {
+      // Handle "start index < 0" case.
+      if (!cu_->target64 && rl_start.location != kLocPhysReg) {
         // Load the start index from stack, remembering that we pushed EDI.
-        int displacement = SRegOffset(rl_start.s_reg_low) +
-                           (cu_->target64 ? 2 : 1) * sizeof(uint32_t);
+        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
         {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          Load32Disp(rs_rX86_SP, displacement, tmpReg);
+          Load32Disp(rs_rX86_SP, displacement, rs_rDI);
         }
-        OpRegReg(kOpXor, rs_rDI, rs_rDI);
-        OpRegReg(kOpCmp, tmpReg, rs_rDI);
-        OpCondRegReg(kOpCmov, kCondLt, tmpReg, rs_rDI);
-
-        length_compare = OpCmpBranch(kCondLe, rs_rCX, tmpReg, nullptr);
-        OpRegReg(kOpSub, rs_rCX, tmpReg);
-        // Put the start index to stack.
-        NewLIR1(kX86Push32R, tmpReg.GetReg());
-        is_index_on_stack = true;
+      } else {
+        LoadValueDirectFixed(rl_start, rs_rDI);
       }
+      OpRegReg(kOpXor, rs_tmp, rs_tmp);
+      OpRegReg(kOpCmp, rs_rDI, rs_tmp);
+      OpCondRegReg(kOpCmov, kCondLt, rs_rDI, rs_tmp);
+
+      // The length of the string should be greater than the start index.
+      length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rDI, nullptr);
+
+      // Copy the number of words to search in a temporary register.
+      // We will use the register at the end to calculate result.
+      OpRegReg(kOpMov, rs_tmp, rs_rCX);
+
+      // Decrease the number of words to search by the start index.
+      OpRegReg(kOpSub, rs_rCX, rs_rDI);
     }
   }
-  DCHECK(length_compare != nullptr);
 
-  // ECX now contains the count in words to be searched.
-
-  // Load the address of the string into R11 or EBX (depending on mode).
+  // Load the address of the string into EDI.
+  // In case of start index we have to add the address to existing value in EDI.
   // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
-  Load32Disp(rs_rDX, value_offset, rs_rDI);
-  Load32Disp(rs_rDX, offset_offset, tmpReg);
-  OpLea(tmpReg, rs_rDI, tmpReg, 1, data_offset);
-
-  // Now compute into EDI where the search will start.
-  if (zero_based || rl_start.is_const) {
-    if (start_value == 0) {
-      OpRegCopy(rs_rDI, tmpReg);
-    } else {
-      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), tmpReg.GetReg(), 2 * start_value);
-    }
+  if (zero_based || (!zero_based && rl_start.is_const && start_value == 0)) {
+    Load32Disp(rs_rDX, offset_offset, rs_rDI);
   } else {
-    if (is_index_on_stack == true) {
-      // Load the start index from stack.
-      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
-      OpLea(rs_rDI, tmpReg, rs_rDX, 1, 0);
-    } else {
-      OpLea(rs_rDI, tmpReg, rl_start.reg, 1, 0);
-    }
+    OpRegMem(kOpAdd, rs_rDI, rs_rDX, offset_offset);
   }
+  OpRegImm(kOpLsl, rs_rDI, 1);
+  OpRegMem(kOpAdd, rs_rDI, rs_rDX, value_offset);
+  OpRegImm(kOpAdd, rs_rDI, data_offset);
 
   // EDI now contains the start of the string to be searched.
   // We are all prepared to do the search for the character.
@@ -1386,10 +1387,9 @@
   LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
 
   // yes, we matched.  Compute the index of the result.
-  // index = ((curr_ptr - orig_ptr) / 2) - 1.
-  OpRegReg(kOpSub, rs_rDI, tmpReg);
-  OpRegImm(kOpAsr, rs_rDI, 1);
-  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
+  OpRegReg(kOpSub, rs_tmp, rs_rCX);
+  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_tmp.GetReg(), -1);
+
   LIR *all_done = NewLIR1(kX86Jmp8, 0);
 
   // Failed to match; return -1.
@@ -1400,8 +1400,9 @@
 
   // And join up at the end.
   all_done->target = NewLIR0(kPseudoTargetLabel);
-  // Restore EDI from the stack.
-  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+
+  if (!cu_->target64)
+    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
 
   // Out of line code returns here.
   if (slowpath_branch != nullptr) {
@@ -1410,6 +1411,15 @@
   }
 
   StoreValue(rl_dest, rl_return);
+
+  FreeTemp(rs_rAX);
+  FreeTemp(rs_rCX);
+  FreeTemp(rs_rDX);
+  FreeTemp(rs_tmp);
+  if (cu_->target64) {
+    FreeTemp(rs_rDI);
+  }
+
   return true;
 }
 
@@ -2487,7 +2497,7 @@
   in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
   const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
   const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
-          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+          info->args[last_mapped_in].wide ? 2 : 1;
   int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
 
   // Fisrt of all, check whether it make sense to use bulk copying
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 047a65d..bae01d9 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -1050,6 +1050,7 @@
         ->IsIntrinsic(index, &method)) {
       switch (method.opcode) {
         case kIntrinsicAbsDouble:
+        case kIntrinsicMinMaxDouble:
           store_method_addr_ = true;
           break;
         default:
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 17f9b91..500c6b8 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -534,10 +534,14 @@
   Binary0fOpCode(kX86Ucomiss),  // unordered float compare
   Binary0fOpCode(kX86Comisd),   // double compare
   Binary0fOpCode(kX86Comiss),   // float compare
-  Binary0fOpCode(kX86Orps),     // or of floating point registers
-  Binary0fOpCode(kX86Xorps),    // xor of floating point registers
-  Binary0fOpCode(kX86Addsd),    // double add
-  Binary0fOpCode(kX86Addss),    // float add
+  Binary0fOpCode(kX86Orpd),     // double logical OR
+  Binary0fOpCode(kX86Orps),     // float logical OR
+  Binary0fOpCode(kX86Andpd),    // double logical AND
+  Binary0fOpCode(kX86Andps),    // float logical AND
+  Binary0fOpCode(kX86Xorpd),    // double logical XOR
+  Binary0fOpCode(kX86Xorps),    // float logical XOR
+  Binary0fOpCode(kX86Addsd),    // double ADD
+  Binary0fOpCode(kX86Addss),    // float ADD
   Binary0fOpCode(kX86Mulsd),    // double multiply
   Binary0fOpCode(kX86Mulss),    // float multiply
   Binary0fOpCode(kX86Cvtsd2ss),  // double to float
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 7c9614f..cdf71b6 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -41,6 +41,11 @@
 
     void ClassRejected(ClassReference ref) OVERRIDE;
 
+    // We are running in an environment where we can call patchoat safely so we should.
+    bool IsRelocationPossible() OVERRIDE {
+      return true;
+    }
+
   private:
     VerificationResults* const verification_results_;
     DexFileToMethodInlinerMap* const method_inliner_map_;
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 892b302..4a3e071 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -324,13 +324,15 @@
     }
 
     for (int i = 0; ssa_rep->fp_use && i< ssa_rep->num_uses; i++) {
-      if (ssa_rep->fp_use[i])
+      if (ssa_rep->fp_use[i]) {
         changed |= SetFp(uses[i]);
       }
+    }
     for (int i = 0; ssa_rep->fp_def && i< ssa_rep->num_defs; i++) {
-      if (ssa_rep->fp_def[i])
+      if (ssa_rep->fp_def[i]) {
         changed |= SetFp(defs[i]);
       }
+    }
     // Special-case handling for moves & Phi
     if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) {
       /*
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 89295f2..022ec6b 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -231,8 +231,8 @@
   // the super class.
   bool can_sharpen_super_based_on_type = (*invoke_type == kSuper) &&
       (referrer_class != methods_class) && referrer_class->IsSubClass(methods_class) &&
-      resolved_method->GetMethodIndex() < methods_class->GetVTable()->GetLength() &&
-      (methods_class->GetVTable()->Get(resolved_method->GetMethodIndex()) == resolved_method);
+      resolved_method->GetMethodIndex() < methods_class->GetVTableLength() &&
+      (methods_class->GetVTableEntry(resolved_method->GetMethodIndex()) == resolved_method);
 
   if (can_sharpen_virtual_based_on_type || can_sharpen_super_based_on_type) {
     // Sharpen a virtual call into a direct call. The method_idx is into referrer's
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9e88c8d..f85bc65 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -955,7 +955,8 @@
     if (class_in_image) {
       // boot -> app class pointers.
       *is_type_initialized = resolved_class->IsInitialized();
-      *use_direct_type_ptr = true;
+      // TODO This is somewhat hacky. We should refactor all of this invoke codepath.
+      *use_direct_type_ptr = !GetCompilerOptions().GetIncludePatchInformation();
       *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_class);
       return true;
     } else {
@@ -1099,6 +1100,9 @@
   *direct_method = 0;
   bool use_dex_cache = false;
   const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
+  // TODO This is somewhat hacky. We should refactor all of this invoke codepath.
+  const bool force_relocations = (compiling_boot ||
+                                  GetCompilerOptions().GetIncludePatchInformation());
   if (compiler_->IsPortable()) {
     if (sharp_type != kStatic && sharp_type != kDirect) {
       return;
@@ -1109,7 +1113,7 @@
       return;
     }
     // TODO: support patching on all architectures.
-    use_dex_cache = compiling_boot && !support_boot_image_fixup_;
+    use_dex_cache = force_relocations && !support_boot_image_fixup_;
   }
   bool method_code_in_boot = (method->GetDeclaringClass()->GetClassLoader() == nullptr);
   if (!use_dex_cache) {
@@ -1128,8 +1132,8 @@
   if (method_code_in_boot) {
     *stats_flags |= kFlagDirectCallToBoot | kFlagDirectMethodToBoot;
   }
-  if (!use_dex_cache && compiling_boot) {
-    if (!IsImageClass(method->GetDeclaringClassDescriptor())) {
+  if (!use_dex_cache && force_relocations) {
+    if (!IsImage() || !IsImageClass(method->GetDeclaringClassDescriptor())) {
       // We can only branch directly to Methods that are resolved in the DexCache.
       // Otherwise we won't invoke the resolution trampoline.
       use_dex_cache = true;
@@ -1150,7 +1154,7 @@
       if (dex_method_idx != DexFile::kDexNoIndex) {
         target_method->dex_method_index = dex_method_idx;
       } else {
-        if (compiling_boot && !use_dex_cache) {
+        if (force_relocations && !use_dex_cache) {
           target_method->dex_method_index = method->GetDexMethodIndex();
           target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
         }
@@ -1167,19 +1171,26 @@
       *type = sharp_type;
     }
   } else {
-    bool method_in_image = compiling_boot ||
+    bool method_in_image =
         Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace();
-    if (method_in_image) {
+    if (method_in_image || compiling_boot) {
+      // We know we must be able to get to the method in the image, so use that pointer.
       CHECK(!method->IsAbstract());
       *type = sharp_type;
-      *direct_method = compiling_boot ? -1 : reinterpret_cast<uintptr_t>(method);
-      *direct_code = compiling_boot ? -1 : compiler_->GetEntryPointOf(method);
+      *direct_method = force_relocations ? -1 : reinterpret_cast<uintptr_t>(method);
+      *direct_code = force_relocations ? -1 : compiler_->GetEntryPointOf(method);
       target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
       target_method->dex_method_index = method->GetDexMethodIndex();
     } else if (!must_use_direct_pointers) {
       // Set the code and rely on the dex cache for the method.
       *type = sharp_type;
-      *direct_code = compiler_->GetEntryPointOf(method);
+      if (force_relocations) {
+        *direct_code = -1;
+        target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+        target_method->dex_method_index = method->GetDexMethodIndex();
+      } else {
+        *direct_code = compiler_->GetEntryPointOf(method);
+      }
     } else {
       // Direct pointers were required but none were available.
       VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method);
diff --git a/compiler/elf_patcher.cc b/compiler/elf_patcher.cc
new file mode 100644
index 0000000..6112fbb
--- /dev/null
+++ b/compiler/elf_patcher.cc
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "elf_patcher.h"
+
+#include <vector>
+#include <set>
+
+#include "elf_file.h"
+#include "elf_utils.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/array-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "mirror/dex_cache-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/string-inl.h"
+#include "oat.h"
+#include "os.h"
+#include "utils.h"
+
+namespace art {
+
+bool ElfPatcher::Patch(const CompilerDriver* driver, ElfFile* elf_file,
+                       const std::string& oat_location,
+                       ImageAddressCallback cb, void* cb_data,
+                       std::string* error_msg) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  const OatFile* oat_file = class_linker->FindOpenedOatFileFromOatLocation(oat_location);
+  if (oat_file == nullptr) {
+    CHECK(Runtime::Current()->IsCompiler());
+    oat_file = OatFile::Open(oat_location, oat_location, NULL, false, error_msg);
+    if (oat_file == nullptr) {
+      *error_msg = StringPrintf("Unable to find or open oat file at '%s': %s", oat_location.c_str(),
+                                error_msg->c_str());
+      return false;
+    }
+    CHECK_EQ(class_linker->RegisterOatFile(oat_file), oat_file);
+  }
+  return ElfPatcher::Patch(driver, elf_file, oat_file,
+                           reinterpret_cast<uintptr_t>(oat_file->Begin()), cb, cb_data, error_msg);
+}
+
+bool ElfPatcher::Patch(const CompilerDriver* driver, ElfFile* elf, const OatFile* oat_file,
+                       uintptr_t oat_data_start, ImageAddressCallback cb, void* cb_data,
+                       std::string* error_msg) {
+  Elf32_Shdr* data_sec = elf->FindSectionByName(".rodata");
+  if (data_sec == nullptr) {
+    *error_msg = "Unable to find .rodata section and oat header";
+    return false;
+  }
+  OatHeader* oat_header = reinterpret_cast<OatHeader*>(elf->Begin() + data_sec->sh_offset);
+  if (!oat_header->IsValid()) {
+    *error_msg = "Oat header was not valid";
+    return false;
+  }
+
+  ElfPatcher p(driver, elf, oat_file, oat_header, oat_data_start, cb, cb_data, error_msg);
+  return p.PatchElf();
+}
+
+mirror::ArtMethod* ElfPatcher::GetTargetMethod(const CompilerDriver::CallPatchInformation* patch) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(
+      hs.NewHandle(class_linker->FindDexCache(*patch->GetTargetDexFile())));
+  mirror::ArtMethod* method = class_linker->ResolveMethod(*patch->GetTargetDexFile(),
+                                                          patch->GetTargetMethodIdx(),
+                                                          dex_cache,
+                                                          NullHandle<mirror::ClassLoader>(),
+                                                          NullHandle<mirror::ArtMethod>(),
+                                                          patch->GetTargetInvokeType());
+  CHECK(method != NULL)
+    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
+  CHECK(!method->IsRuntimeMethod())
+    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
+  CHECK(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx()) == method)
+    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
+    << PrettyMethod(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx())) << " "
+    << PrettyMethod(method);
+  return method;
+}
+
+mirror::Class* ElfPatcher::GetTargetType(const CompilerDriver::TypePatchInformation* patch) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile())));
+  mirror::Class* klass = class_linker->ResolveType(patch->GetDexFile(), patch->GetTargetTypeIdx(),
+                                                   dex_cache, NullHandle<mirror::ClassLoader>());
+  CHECK(klass != NULL)
+    << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
+  CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
+    << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
+    << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " "
+    << PrettyClass(klass);
+  return klass;
+}
+
+void ElfPatcher::AddPatch(uintptr_t p) {
+  if (write_patches_ && patches_set_.find(p) == patches_set_.end()) {
+    patches_set_.insert(p);
+    patches_.push_back(p);
+  }
+}
+
+uint32_t* ElfPatcher::GetPatchLocation(uintptr_t patch_ptr) {
+  CHECK_GE(patch_ptr, reinterpret_cast<uintptr_t>(oat_file_->Begin()));
+  uintptr_t off = patch_ptr - reinterpret_cast<uintptr_t>(oat_file_->Begin());
+  uintptr_t ret = reinterpret_cast<uintptr_t>(oat_header_) + off;
+
+  CHECK_GE(ret, reinterpret_cast<uintptr_t>(elf_file_->Begin()));
+  CHECK_LT(ret, reinterpret_cast<uintptr_t>(elf_file_->End()));
+  return reinterpret_cast<uint32_t*>(ret);
+}
+
+void ElfPatcher::SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  const void* quick_oat_code = class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
+                                                                patch->GetReferrerClassDefIdx(),
+                                                                patch->GetReferrerMethodIdx());
+  // TODO: make this Thumb2 specific
+  uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(quick_oat_code) & ~0x1);
+  uintptr_t patch_ptr = reinterpret_cast<uintptr_t>(base + patch->GetLiteralOffset());
+  uint32_t* patch_location = GetPatchLocation(patch_ptr);
+  if (kIsDebugBuild) {
+    if (patch->IsCall()) {
+      const CompilerDriver::CallPatchInformation* cpatch = patch->AsCall();
+      const DexFile::MethodId& id =
+          cpatch->GetTargetDexFile()->GetMethodId(cpatch->GetTargetMethodIdx());
+      uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
+      uint32_t actual = *patch_location;
+      CHECK(actual == expected || actual == value) << std::hex
+          << "actual=" << actual
+          << "expected=" << expected
+          << "value=" << value;
+    }
+    if (patch->IsType()) {
+      const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
+      const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
+      uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
+      uint32_t actual = *patch_location;
+      CHECK(actual == expected || actual == value) << std::hex
+          << "actual=" << actual
+          << "expected=" << expected
+          << "value=" << value;
+    }
+  }
+  *patch_location = value;
+  oat_header_->UpdateChecksum(patch_location, sizeof(value));
+
+  if (patch->IsCall() && patch->AsCall()->IsRelative()) {
+    // We never record relative patches.
+    return;
+  }
+  uintptr_t loc = patch_ptr - (reinterpret_cast<uintptr_t>(oat_file_->Begin()) +
+                               oat_header_->GetExecutableOffset());
+  CHECK_GT(patch_ptr, reinterpret_cast<uintptr_t>(oat_file_->Begin()) +
+                      oat_header_->GetExecutableOffset());
+  CHECK_LT(loc, oat_file_->Size() - oat_header_->GetExecutableOffset());
+  AddPatch(loc);
+}
+
+bool ElfPatcher::PatchElf() {
+  // TODO if we are adding patches the resulting ELF file might have a
+  // potentially rather large amount of free space where patches might have been
+  // placed. We should adjust the ELF file to get rid of this excess space.
+  if (write_patches_) {
+    patches_.reserve(compiler_driver_->GetCodeToPatch().size() +
+                     compiler_driver_->GetMethodsToPatch().size() +
+                     compiler_driver_->GetClassesToPatch().size());
+  }
+  Thread* self = Thread::Current();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  const char* old_cause = self->StartAssertNoThreadSuspension("ElfPatcher");
+
+  typedef std::vector<const CompilerDriver::CallPatchInformation*> CallPatches;
+  const CallPatches& code_to_patch = compiler_driver_->GetCodeToPatch();
+  for (size_t i = 0; i < code_to_patch.size(); i++) {
+    const CompilerDriver::CallPatchInformation* patch = code_to_patch[i];
+
+    mirror::ArtMethod* target = GetTargetMethod(patch);
+    uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
+    DCHECK_NE(quick_code, 0U) << PrettyMethod(target);
+    const OatFile* target_oat = class_linker->FindOpenedOatFileForDexFile(*patch->GetTargetDexFile());
+    // Get where the data actually starts. if target is this oat_file_ it is oat_data_start_,
+    // otherwise it is wherever target_oat is loaded.
+    uintptr_t oat_data_addr = GetBaseAddressFor(target_oat);
+    uintptr_t code_base = reinterpret_cast<uintptr_t>(target_oat->Begin());
+    uintptr_t code_offset = quick_code - code_base;
+    bool is_quick_offset = false;
+    if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge())) {
+      is_quick_offset = true;
+      code_offset = oat_header_->GetQuickToInterpreterBridgeOffset();
+    } else if (quick_code ==
+        reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) {
+      CHECK(target->IsNative());
+      is_quick_offset = true;
+      code_offset = oat_header_->GetQuickGenericJniTrampolineOffset();
+    }
+    uintptr_t value;
+    if (patch->IsRelative()) {
+      // value to patch is relative to the location being patched
+      const void* quick_oat_code =
+        class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
+                                         patch->GetReferrerClassDefIdx(),
+                                         patch->GetReferrerMethodIdx());
+      if (is_quick_offset) {
+        // If its a quick offset it means that we are doing a relative patch from the class linker
+        // oat_file to the elf_patcher oat_file so we need to adjust the quick oat code to be the
+        // one in the output oat_file (ie where it is actually going to be loaded).
+        quick_code = PointerToLowMemUInt32(reinterpret_cast<void*>(oat_data_addr + code_offset));
+        quick_oat_code =
+            reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(quick_oat_code) +
+                oat_data_addr - code_base);
+      }
+      uintptr_t base = reinterpret_cast<uintptr_t>(quick_oat_code);
+      uintptr_t patch_location = base + patch->GetLiteralOffset();
+      value = quick_code - patch_location + patch->RelativeOffset();
+    } else if (code_offset != 0) {
+      value = PointerToLowMemUInt32(reinterpret_cast<void*>(oat_data_addr + code_offset));
+    } else {
+      value = 0;
+    }
+    SetPatchLocation(patch, value);
+  }
+
+  const CallPatches& methods_to_patch = compiler_driver_->GetMethodsToPatch();
+  for (size_t i = 0; i < methods_to_patch.size(); i++) {
+    const CompilerDriver::CallPatchInformation* patch = methods_to_patch[i];
+    mirror::ArtMethod* target = GetTargetMethod(patch);
+    SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
+  }
+
+  const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch =
+      compiler_driver_->GetClassesToPatch();
+  for (size_t i = 0; i < classes_to_patch.size(); i++) {
+    const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i];
+    mirror::Class* target = GetTargetType(patch);
+    SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
+  }
+
+  self->EndAssertNoThreadSuspension(old_cause);
+
+  if (write_patches_) {
+    return WriteOutPatchData();
+  }
+  return true;
+}
+
+bool ElfPatcher::WriteOutPatchData() {
+  Elf32_Shdr* shdr = elf_file_->FindSectionByName(".oat_patches");
+  if (shdr != nullptr) {
+    CHECK_EQ(shdr, elf_file_->FindSectionByType(SHT_OAT_PATCH))
+        << "Incorrect type for .oat_patches section";
+    CHECK_LE(patches_.size() * sizeof(uintptr_t), shdr->sh_size)
+        << "We got more patches than anticipated";
+    CHECK_LE(reinterpret_cast<uintptr_t>(elf_file_->Begin()) + shdr->sh_offset + shdr->sh_size,
+              reinterpret_cast<uintptr_t>(elf_file_->End())) << "section is too large";
+    CHECK(shdr == &elf_file_->GetSectionHeader(elf_file_->GetSectionHeaderNum() - 1) ||
+          shdr->sh_offset + shdr->sh_size <= (shdr + 1)->sh_offset)
+        << "Section overlaps onto next section";
+    // It's mmap'd so we can just memcpy.
+    memcpy(elf_file_->Begin() + shdr->sh_offset, patches_.data(),
+           patches_.size() * sizeof(uintptr_t));
+    // TODO We should fill in the newly empty space between the last patch and
+    // the start of the next section by moving the following sections down if
+    // possible.
+    shdr->sh_size = patches_.size() * sizeof(uintptr_t);
+    return true;
+  } else {
+    LOG(ERROR) << "Unable to find section header for SHT_OAT_PATCH";
+    *error_msg_ = "Unable to find section to write patch information to in ";
+    *error_msg_ += elf_file_->GetFile().GetPath();
+    return false;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/elf_patcher.h b/compiler/elf_patcher.h
new file mode 100644
index 0000000..0a9f0a01
--- /dev/null
+++ b/compiler/elf_patcher.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_ELF_PATCHER_H_
+#define ART_COMPILER_ELF_PATCHER_H_
+
+#include "base/mutex.h"
+#include "driver/compiler_driver.h"
+#include "elf_file.h"
+#include "mirror/art_method.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "oat_file.h"
+#include "oat.h"
+#include "os.h"
+
+namespace art {
+
+class ElfPatcher {
+ public:
+  typedef void* (*ImageAddressCallback)(void* data, mirror::Object* obj);
+
+  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
+                    const std::string& oat_location,
+                    ImageAddressCallback cb, void* cb_data,
+                    std::string* error_msg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
+                    const OatFile* oat_file, uintptr_t oat_data_begin,
+                    ImageAddressCallback cb, void* cb_data,
+                    std::string* error_msg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
+                    const std::string& oat_location,
+                    std::string* error_msg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return ElfPatcher::Patch(driver, elf_file, oat_location,
+                             DefaultImageAddressCallback, nullptr, error_msg);
+  }
+
+  static bool Patch(const CompilerDriver* driver, ElfFile* elf_file,
+                    const OatFile* oat_file, uintptr_t oat_data_begin,
+                    std::string* error_msg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return ElfPatcher::Patch(driver, elf_file, oat_file, oat_data_begin,
+                             DefaultImageAddressCallback, nullptr, error_msg);
+  }
+
+ private:
+  ElfPatcher(const CompilerDriver* driver, ElfFile* elf_file, const OatFile* oat_file,
+             OatHeader* oat_header, uintptr_t oat_data_begin,
+             ImageAddressCallback cb, void* cb_data, std::string* error_msg)
+      : compiler_driver_(driver), elf_file_(elf_file), oat_file_(oat_file),
+        oat_header_(oat_header), oat_data_begin_(oat_data_begin), get_image_address_(cb),
+        cb_data_(cb_data), error_msg_(error_msg),
+        write_patches_(compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {}
+  ~ElfPatcher() {}
+
+  static void* DefaultImageAddressCallback(void* data_unused, mirror::Object* obj) {
+    return static_cast<void*>(obj);
+  }
+
+  bool PatchElf()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void AddPatch(uintptr_t off);
+
+  void SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Takes the pointer into the oat_file_ and get the pointer in to the ElfFile.
+  uint32_t* GetPatchLocation(uintptr_t patch_ptr);
+
+  bool WriteOutPatchData();
+
+  uintptr_t GetBaseAddressFor(const OatFile* f) {
+    if (f == oat_file_) {
+      return oat_data_begin_;
+    } else {
+      return reinterpret_cast<uintptr_t>(f->Begin());
+    }
+  }
+
+  const CompilerDriver* compiler_driver_;
+
+  // The elf_file containing the oat_data we are patching up
+  ElfFile* elf_file_;
+
+  // The oat_file that is actually loaded.
+  const OatFile* oat_file_;
+
+  // The oat_header_ within the elf_file_
+  OatHeader* oat_header_;
+
+  // Where the elf_file will be loaded during normal runs.
+  uintptr_t oat_data_begin_;
+
+  // Callback to get image addresses.
+  ImageAddressCallback get_image_address_;
+  void* cb_data_;
+
+  std::string* error_msg_;
+  std::vector<uintptr_t> patches_;
+  std::set<uintptr_t> patches_set_;
+  bool write_patches_;
+
+  DISALLOW_COPY_AND_ASSIGN(ElfPatcher);
+};
+
+}  // namespace art
+#endif  // ART_COMPILER_ELF_PATCHER_H_
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index fe4fcd4..3005e56 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -80,7 +80,7 @@
       t.NewTiming("WriteElf");
       ScopedObjectAccess soa(Thread::Current());
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, compiler_driver_.get(), &timings,
+      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, 0, compiler_driver_.get(), &timings,
                            &key_value_store);
       bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                                 !kIsTargetBuild,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 8ef2964..d102bbc 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -29,6 +29,7 @@
 #include "driver/compiler_driver.h"
 #include "elf_file.h"
 #include "elf_utils.h"
+#include "elf_patcher.h"
 #include "elf_writer.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -84,7 +85,7 @@
     return false;
   }
   std::string error_msg;
-  oat_file_ = OatFile::OpenWritable(oat_file.get(), oat_location, &error_msg);
+  oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, &error_msg);
   if (oat_file_ == nullptr) {
     LOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location
         << ": " << error_msg;
@@ -801,214 +802,35 @@
   }
 }
 
-static ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(
-      hs.NewHandle(class_linker->FindDexCache(*patch->GetTargetDexFile())));
-  ArtMethod* method = class_linker->ResolveMethod(*patch->GetTargetDexFile(),
-                                                  patch->GetTargetMethodIdx(),
-                                                  dex_cache,
-                                                  NullHandle<mirror::ClassLoader>(),
-                                                  NullHandle<mirror::ArtMethod>(),
-                                                  patch->GetTargetInvokeType());
-  CHECK(method != NULL)
-    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
-  CHECK(!method->IsRuntimeMethod())
-    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
-  CHECK(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx()) == method)
-    << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
-    << PrettyMethod(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx())) << " "
-    << PrettyMethod(method);
-  return method;
-}
-
-static Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile())));
-  Class* klass = class_linker->ResolveType(patch->GetDexFile(), patch->GetTargetTypeIdx(),
-                                           dex_cache, NullHandle<mirror::ClassLoader>());
-  CHECK(klass != NULL)
-    << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
-  CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
-    << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
-    << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " "
-    << PrettyClass(klass);
-  return klass;
+static OatHeader* GetOatHeaderFromElf(ElfFile* elf) {
+  Elf32_Shdr* data_sec = elf->FindSectionByName(".rodata");
+  if (data_sec == nullptr) {
+    return nullptr;
+  }
+  return reinterpret_cast<OatHeader*>(elf->Begin() + data_sec->sh_offset);
 }
 
 void ImageWriter::PatchOatCodeAndMethods(File* elf_file) {
-  std::vector<uintptr_t> patches;
-  std::set<uintptr_t> patches_set;
-  auto maybe_push = [&patches, &patches_set] (uintptr_t p) {
-    if (patches_set.find(p) == patches_set.end()) {
-      patches.push_back(p);
-      patches_set.insert(p);
-    }
-  };
-  const bool add_patches = compiler_driver_.GetCompilerOptions().GetIncludePatchInformation();
-  if (add_patches) {
-    // TODO if we are adding patches the resulting ELF file might have a potentially rather large
-    // amount of free space where patches might have been placed. We should adjust the ELF file to
-    // get rid of this excess space.
-    patches.reserve(compiler_driver_.GetCodeToPatch().size() +
-                    compiler_driver_.GetMethodsToPatch().size() +
-                    compiler_driver_.GetClassesToPatch().size());
+  std::string error_msg;
+  std::unique_ptr<ElfFile> elf(ElfFile::Open(elf_file, PROT_READ|PROT_WRITE,
+                                             MAP_SHARED, &error_msg));
+  if (elf.get() == nullptr) {
+    LOG(FATAL) << "Unable patch oat file: " << error_msg;
+    return;
   }
-  uintptr_t loc = 0;
-  Thread* self = Thread::Current();
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
-
-  typedef std::vector<const CompilerDriver::CallPatchInformation*> CallPatches;
-  const CallPatches& code_to_patch = compiler_driver_.GetCodeToPatch();
-  for (size_t i = 0; i < code_to_patch.size(); i++) {
-    const CompilerDriver::CallPatchInformation* patch = code_to_patch[i];
-    ArtMethod* target = GetTargetMethod(patch);
-    uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
-    DCHECK_NE(quick_code, 0U) << PrettyMethod(target);
-    uintptr_t code_base = reinterpret_cast<uintptr_t>(&oat_file_->GetOatHeader());
-    uintptr_t code_offset = quick_code - code_base;
-    bool is_quick_offset = false;
-    if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge())) {
-      is_quick_offset = true;
-      code_offset = quick_to_interpreter_bridge_offset_;
-    } else if (quick_code ==
-        reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) {
-      CHECK(target->IsNative());
-      is_quick_offset = true;
-      code_offset = quick_generic_jni_trampoline_offset_;
-    }
-    uintptr_t value;
-    if (patch->IsRelative()) {
-      // value to patch is relative to the location being patched
-      const void* quick_oat_code =
-        class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
-                                         patch->GetReferrerClassDefIdx(),
-                                         patch->GetReferrerMethodIdx());
-      if (is_quick_offset) {
-        // If its a quick offset it means that we are doing a relative patch from the class linker
-        // oat_file to the image writer oat_file so we need to adjust the quick oat code to be the
-        // one in the image writer oat_file.
-        quick_code = PointerToLowMemUInt32(GetOatAddress(code_offset));
-        quick_oat_code =
-            reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(quick_oat_code) +
-                reinterpret_cast<uintptr_t>(oat_data_begin_) - code_base);
-      }
-      uintptr_t base = reinterpret_cast<uintptr_t>(quick_oat_code);
-      uintptr_t patch_location = base + patch->GetLiteralOffset();
-      value = quick_code - patch_location + patch->RelativeOffset();
-    } else {
-      value = PointerToLowMemUInt32(GetOatAddress(code_offset));
-    }
-    SetPatchLocation(patch, value, &loc);
-    if (add_patches && !patch->AsCall()->IsRelative()) {
-      maybe_push(loc);
-    }
+  if (!ElfPatcher::Patch(&compiler_driver_, elf.get(), oat_file_,
+                         reinterpret_cast<uintptr_t>(oat_data_begin_),
+                         GetImageAddressCallback, reinterpret_cast<void*>(this),
+                         &error_msg)) {
+    LOG(FATAL) << "unable to patch oat file: " << error_msg;
+    return;
   }
+  OatHeader* oat_header = GetOatHeaderFromElf(elf.get());
+  CHECK(oat_header != nullptr);
+  CHECK(oat_header->IsValid());
 
-  const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
-  for (size_t i = 0; i < methods_to_patch.size(); i++) {
-    const CompilerDriver::CallPatchInformation* patch = methods_to_patch[i];
-    ArtMethod* target = GetTargetMethod(patch);
-    SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)), &loc);
-    if (add_patches && !patch->AsCall()->IsRelative()) {
-      maybe_push(loc);
-    }
-  }
-
-  const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch =
-      compiler_driver_.GetClassesToPatch();
-  for (size_t i = 0; i < classes_to_patch.size(); i++) {
-    const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i];
-    Class* target = GetTargetType(patch);
-    SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)), &loc);
-    if (add_patches) {
-      maybe_push(loc);
-    }
-  }
-
-  // Update the image header with the new checksum after patching
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  image_header->SetOatChecksum(oat_file_->GetOatHeader().GetChecksum());
-  self->EndAssertNoThreadSuspension(old_cause);
-
-  // Update the ElfFiles SHT_OAT_PATCH section to include the patches.
-  if (add_patches) {
-    std::string err;
-    // TODO we are mapping in the contents of this file twice. We should be able
-    // to do it only once, which would be better.
-    std::unique_ptr<ElfFile> file(ElfFile::Open(elf_file, true, false, &err));
-    if (file == nullptr) {
-      LOG(ERROR) << err;
-    }
-    Elf32_Shdr* shdr = file->FindSectionByName(".oat_patches");
-    if (shdr != nullptr) {
-      CHECK_EQ(shdr, file->FindSectionByType(SHT_OAT_PATCH))
-          << "Incorrect type for .oat_patches section";
-      CHECK_LE(patches.size() * sizeof(uintptr_t), shdr->sh_size)
-          << "We got more patches than anticipated";
-      CHECK_LE(reinterpret_cast<uintptr_t>(file->Begin()) + shdr->sh_offset + shdr->sh_size,
-               reinterpret_cast<uintptr_t>(file->End())) << "section is too large";
-      CHECK(shdr == &file->GetSectionHeader(file->GetSectionHeaderNum() - 1) ||
-            shdr->sh_offset + shdr->sh_size <= (shdr + 1)->sh_offset)
-          << "Section overlaps onto next section";
-      // It's mmap'd so we can just memcpy.
-      memcpy(file->Begin() + shdr->sh_offset, patches.data(), patches.size()*sizeof(uintptr_t));
-      // TODO We should fill in the newly empty space between the last patch and the start of the
-      // next section by moving the following sections down if possible.
-      shdr->sh_size = patches.size() * sizeof(uintptr_t);
-    } else {
-      LOG(ERROR) << "Unable to find section header for SHT_OAT_PATCH";
-    }
-  }
-}
-
-void ImageWriter::SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value,
-                                   uintptr_t* patched_ptr) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const void* quick_oat_code = class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
-                                                                patch->GetReferrerClassDefIdx(),
-                                                                patch->GetReferrerMethodIdx());
-  OatHeader& oat_header = const_cast<OatHeader&>(oat_file_->GetOatHeader());
-  // TODO: make this Thumb2 specific
-  uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(quick_oat_code) & ~0x1);
-  uint32_t* patch_location = reinterpret_cast<uint32_t*>(base + patch->GetLiteralOffset());
-  if (kIsDebugBuild) {
-    if (patch->IsCall()) {
-      const CompilerDriver::CallPatchInformation* cpatch = patch->AsCall();
-      const DexFile::MethodId& id = cpatch->GetTargetDexFile()->GetMethodId(cpatch->GetTargetMethodIdx());
-      uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
-      uint32_t actual = *patch_location;
-      CHECK(actual == expected || actual == value) << std::hex
-          << "actual=" << actual
-          << "expected=" << expected
-          << "value=" << value;
-    }
-    if (patch->IsType()) {
-      const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
-      const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
-      uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
-      uint32_t actual = *patch_location;
-      CHECK(actual == expected || actual == value) << std::hex
-          << "actual=" << actual
-          << "expected=" << expected
-          << "value=" << value;
-    }
-  }
-  *patch_location = value;
-  oat_header.UpdateChecksum(patch_location, sizeof(value));
-
-  uintptr_t loc = reinterpret_cast<uintptr_t>(patch_location) -
-      (reinterpret_cast<uintptr_t>(oat_file_->Begin()) + oat_header.GetExecutableOffset());
-  CHECK_GT(reinterpret_cast<uintptr_t>(patch_location),
-            reinterpret_cast<uintptr_t>(oat_file_->Begin()) + oat_header.GetExecutableOffset());
-  CHECK_LT(loc, oat_file_->Size() - oat_header.GetExecutableOffset());
-
-  *patched_ptr = loc;
+  image_header->SetOatChecksum(oat_header->GetChecksum());
 }
 
 }  // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index cf5bc93..e8bcf7f 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -70,6 +70,11 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   size_t GetImageOffset(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static void* GetImageAddressCallback(void* writer, mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reinterpret_cast<ImageWriter*>(writer)->GetImageAddress(obj);
+  }
+
   mirror::Object* GetImageAddress(mirror::Object* object) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (object == NULL) {
@@ -159,9 +164,6 @@
   // Patches references in OatFile to expect runtime addresses.
   void PatchOatCodeAndMethods(File* elf_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value,
-                        uintptr_t* patched_location)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const CompilerDriver& compiler_driver_;
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 84f0b3c..11d1728 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -120,6 +120,7 @@
   OatWriter oat_writer(class_linker->GetBootClassPath(),
                        42U,
                        4096U,
+                       0,
                        compiler_driver_.get(),
                        &timings,
                        &key_value_store);
@@ -183,7 +184,7 @@
 TEST_F(OatTest, OatHeaderSizeCheck) {
   // If this test is failing and you have to update these constants,
   // it is time to update OatHeader::kOatVersion
-  EXPECT_EQ(80U, sizeof(OatHeader));
+  EXPECT_EQ(84U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
   EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 63a3c8c..22f36f4 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -50,6 +50,7 @@
 OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
                      uint32_t image_file_location_oat_checksum,
                      uintptr_t image_file_location_oat_begin,
+                     int32_t image_patch_delta,
                      const CompilerDriver* compiler,
                      TimingLogger* timings,
                      SafeMap<std::string, std::string>* key_value_store)
@@ -57,6 +58,7 @@
     dex_files_(&dex_files),
     image_file_location_oat_checksum_(image_file_location_oat_checksum),
     image_file_location_oat_begin_(image_file_location_oat_begin),
+    image_patch_delta_(image_patch_delta),
     key_value_store_(key_value_store),
     oat_header_(NULL),
     size_dex_file_alignment_(0),
@@ -126,6 +128,7 @@
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
   CHECK_EQ(compiler->IsImage(),
            key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+  CHECK_ALIGNED(image_patch_delta_, kPageSize);
 }
 
 OatWriter::~OatWriter() {
@@ -808,6 +811,7 @@
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
   if (compiler_driver_->IsImage()) {
+    CHECK_EQ(image_patch_delta_, 0);
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field, fn_name) \
@@ -840,6 +844,7 @@
     oat_header_->SetQuickImtConflictTrampolineOffset(0);
     oat_header_->SetQuickResolutionTrampolineOffset(0);
     oat_header_->SetQuickToInterpreterBridgeOffset(0);
+    oat_header_->SetImagePatchDelta(image_patch_delta_);
   }
   return offset;
 }
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 3d34956..945048e 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -79,6 +79,7 @@
   OatWriter(const std::vector<const DexFile*>& dex_files,
             uint32_t image_file_location_oat_checksum,
             uintptr_t image_file_location_oat_begin,
+            int32_t image_patch_delta,
             const CompilerDriver* compiler,
             TimingLogger* timings,
             SafeMap<std::string, std::string>* key_value_store);
@@ -253,6 +254,7 @@
   // dependencies on the image.
   uint32_t image_file_location_oat_checksum_;
   uintptr_t image_file_location_oat_begin_;
+  int32_t image_patch_delta_;
 
   // data to write
   SafeMap<std::string, std::string>* key_value_store_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f594129..1f0b361 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -34,6 +34,37 @@
 
 namespace art {
 
+/**
+ * Helper class to add HTemporary instructions. This class is used when
+ * converting a DEX instruction to multiple HInstruction, and where those
+ * instructions do not die at the following instruction, but instead spans
+ * multiple instructions.
+ */
+class Temporaries : public ValueObject {
+ public:
+  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
+    graph_->UpdateNumberOfTemporaries(count_);
+  }
+
+  void Add(HInstruction* instruction) {
+    // We currently only support vreg size temps.
+    DCHECK(instruction->GetType() != Primitive::kPrimLong
+           && instruction->GetType() != Primitive::kPrimDouble);
+    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
+    instruction->GetBlock()->AddInstruction(temp);
+    DCHECK(temp->GetPrevious() == instruction);
+  }
+
+ private:
+  HGraph* const graph_;
+
+  // The total number of temporaries that will be used.
+  const size_t count_;
+
+  // Current index in the temporary stack, updated by `Add`.
+  size_t index_;
+};
+
 static bool IsTypeSupported(Primitive::Type type) {
   return type != Primitive::kPrimFloat && type != Primitive::kPrimDouble;
 }
@@ -308,9 +339,13 @@
       arena_, number_of_arguments, return_type, dex_offset, method_idx);
 
   size_t start_index = 0;
+  Temporaries temps(graph_, is_instance_call ? 1 : 0);
   if (is_instance_call) {
     HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
-    invoke->SetArgumentAt(0, arg);
+    HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_offset);
+    current_block_->AddInstruction(null_check);
+    temps.Add(null_check);
+    invoke->SetArgumentAt(0, null_check);
     start_index = 1;
   }
 
@@ -343,37 +378,6 @@
   return true;
 }
 
-/**
- * Helper class to add HTemporary instructions. This class is used when
- * converting a DEX instruction to multiple HInstruction, and where those
- * instructions do not die at the following instruction, but instead spans
- * multiple instructions.
- */
-class Temporaries : public ValueObject {
- public:
-  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
-    graph_->UpdateNumberOfTemporaries(count_);
-  }
-
-  void Add(HInstruction* instruction) {
-    // We currently only support vreg size temps.
-    DCHECK(instruction->GetType() != Primitive::kPrimLong
-           && instruction->GetType() != Primitive::kPrimDouble);
-    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
-    instruction->GetBlock()->AddInstruction(temp);
-    DCHECK(temp->GetPrevious() == instruction);
-  }
-
- private:
-  HGraph* const graph_;
-
-  // The total number of temporaries that will be used.
-  const size_t count_;
-
-  // Current index in the temporary stack, updated by `Add`.
-  size_t index_;
-};
-
 bool HGraphBuilder::BuildFieldAccess(const Instruction& instruction,
                                      uint32_t dex_offset,
                                      bool is_put) {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e0db0f1..bd8c27e 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -30,13 +30,16 @@
 
 namespace art {
 
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator) {
+void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
   const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
   DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock());
   DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1)));
   block_labels_.SetSize(blocks.Size());
 
   DCHECK_EQ(frame_size_, kUninitializedFrameSize);
+  if (!is_leaf) {
+    MarkNotLeaf();
+  }
   ComputeFrameSize(GetGraph()->GetMaximumNumberOfOutVRegs()
                    + GetGraph()->GetNumberOfLocalVRegs()
                    + GetGraph()->GetNumberOfTemporaries()
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 18e3e5a..b31c3a3 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -70,7 +70,7 @@
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
   // succeeded.
-  void CompileBaseline(CodeAllocator* allocator);
+  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
   void CompileOptimized(CodeAllocator* allocator);
   static CodeGenerator* Create(ArenaAllocator* allocator,
                                HGraph* graph,
@@ -131,6 +131,14 @@
   void BuildNativeGCMap(
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
 
+  bool IsLeafMethod() const {
+    return is_leaf_;
+  }
+
+  void MarkNotLeaf() {
+    is_leaf_ = false;
+  }
+
  protected:
   CodeGenerator(HGraph* graph, size_t number_of_registers)
       : frame_size_(kUninitializedFrameSize),
@@ -138,7 +146,8 @@
         block_labels_(graph->GetArena(), 0),
         pc_infos_(graph->GetArena(), 32),
         slow_paths_(graph->GetArena(), 8),
-        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {}
+        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)),
+        is_leaf_(true) {}
   ~CodeGenerator() {}
 
   // Register allocation logic.
@@ -171,6 +180,8 @@
   // Temporary data structure used when doing register allocation.
   bool* const blocked_registers_;
 
+  bool is_leaf_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 73c2d48..90ec6cf 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -17,12 +17,14 @@
 #include "code_generator_arm.h"
 
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/arm/assembler_arm.h"
 #include "utils/arm/managed_register_arm.h"
+#include "utils/stack_checks.h"
 
 namespace art {
 
@@ -32,6 +34,11 @@
 
 namespace arm {
 
+static constexpr bool kExplicitStackOverflowCheck = false;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
+static constexpr int kCurrentMethodStackOffset = 0;
+
 #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathARM : public SlowPathCode {
@@ -51,6 +58,20 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
 };
 
+class StackOverflowCheckSlowPathARM : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathARM() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ LoadFromOffset(kLoadWord, PC, TR,
+        QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value());
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM);
+};
+
 #undef __
 #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
@@ -82,9 +103,6 @@
   return EQ;        // Unreachable.
 }
 
-static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << ArmManagedRegister::FromCoreRegister(Register(reg));
 }
@@ -97,7 +115,8 @@
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
-      move_resolver_(graph->GetArena(), this) {}
+      move_resolver_(graph->GetArena(), this),
+      assembler_(true) {}
 
 size_t CodeGeneratorARM::FrameEntrySpillSize() const {
   return kNumberOfPushedRegistersAtEntry * kArmWordSize;
@@ -205,6 +224,22 @@
         codegen_(codegen) {}
 
 void CodeGeneratorARM::GenerateFrameEntry() {
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kArm);
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
+      AddSlowPath(slow_path);
+
+      __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value());
+      __ cmp(SP, ShifterOperand(IP));
+      __ b(slow_path->GetEntryLabel(), CC);
+    } else {
+      __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
+      __ ldr(IP, Address(IP, 0));
+      RecordPcInfo(0);
+    }
+  }
+
   core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7);
   __ PushList(1 << LR | 1 << R6 | 1 << R7);
 
@@ -377,11 +412,17 @@
 }
 
 void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  }
+
   if (instruction->AsIntConstant() != nullptr) {
     int32_t value = instruction->AsIntConstant()->GetValue();
     if (location.IsRegister()) {
       __ LoadImmediate(location.AsArm().AsCoreRegister(), value);
     } else {
+      DCHECK(location.IsStackSlot());
       __ LoadImmediate(IP, value);
       __ str(IP, Address(SP, location.GetStackIndex()));
     }
@@ -391,6 +432,7 @@
       __ LoadImmediate(location.AsArm().AsRegisterPairLow(), Low32Bits(value));
       __ LoadImmediate(location.AsArm().AsRegisterPairHigh(), High32Bits(value));
     } else {
+      DCHECK(location.IsDoubleStackSlot());
       __ LoadImmediate(IP, Low32Bits(value));
       __ str(IP, Address(SP, location.GetStackIndex()));
       __ LoadImmediate(IP, High32Bits(value));
@@ -424,11 +466,11 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimNot:
       case Primitive::kPrimInt:
-        Move32(location, instruction->GetLocations()->Out());
+        Move32(location, locations->Out());
         break;
 
       case Primitive::kPrimLong:
-        Move64(location, instruction->GetLocations()->Out());
+        Move64(location, locations->Out());
         break;
 
       default:
@@ -478,20 +520,33 @@
   HCondition* condition = cond->AsCondition();
   if (condition->NeedsMaterialization()) {
     // Condition has been materialized, compare the output to 0
-    if (!if_instr->GetLocations()->InAt(0).IsRegister()) {
-      LOG(FATAL) << "Materialized condition is not in an ARM register";
-    }
+    DCHECK(if_instr->GetLocations()->InAt(0).IsRegister());
     __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(),
            ShifterOperand(0));
     __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), EQ);
   } else {
     // Condition has not been materialized, use its inputs as the comparison and its
     // condition as the branch condition.
-    __ cmp(condition->GetLocations()->InAt(0).AsArm().AsCoreRegister(),
-           ShifterOperand(condition->GetLocations()->InAt(1).AsArm().AsCoreRegister()));
+    LocationSummary* locations = condition->GetLocations();
+    if (locations->InAt(1).IsRegister()) {
+      __ cmp(locations->InAt(0).AsArm().AsCoreRegister(),
+             ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
+    } else {
+      DCHECK(locations->InAt(1).IsConstant());
+      int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+      ShifterOperand operand;
+      if (ShifterOperand::CanHoldArm(value, &operand)) {
+        __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(value));
+      } else {
+        Register temp = IP;
+        __ LoadImmediate(temp, value);
+        __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(temp));
+      }
+    }
     __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()),
          ARMCondition(condition->GetCondition()));
   }
+
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
     __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
   }
@@ -501,7 +556,7 @@
 void LocationsBuilderARM::VisitCondition(HCondition* comp) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1)));
   if (comp->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
@@ -509,16 +564,29 @@
 }
 
 void InstructionCodeGeneratorARM::VisitCondition(HCondition* comp) {
-  if (comp->NeedsMaterialization()) {
-    LocationSummary* locations = comp->GetLocations();
+  if (!comp->NeedsMaterialization()) return;
+
+  LocationSummary* locations = comp->GetLocations();
+  if (locations->InAt(1).IsRegister()) {
     __ cmp(locations->InAt(0).AsArm().AsCoreRegister(),
            ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
-    __ it(ARMCondition(comp->GetCondition()), kItElse);
-    __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1),
-           ARMCondition(comp->GetCondition()));
-    __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0),
-           ARMOppositeCondition(comp->GetCondition()));
+  } else {
+    DCHECK(locations->InAt(1).IsConstant());
+    int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+    ShifterOperand operand;
+    if (ShifterOperand::CanHoldArm(value, &operand)) {
+      __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(value));
+    } else {
+      Register temp = IP;
+      __ LoadImmediate(temp, value);
+      __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(temp));
+    }
   }
+  __ it(ARMCondition(comp->GetCondition()), kItElse);
+  __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1),
+         ARMCondition(comp->GetCondition()));
+  __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0),
+         ARMOppositeCondition(comp->GetCondition()));
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -611,20 +679,17 @@
 }
 
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
-  // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::ConstantLocation(constant));
   constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
-  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
-  // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::ConstantLocation(constant));
   constant->SetLocations(locations);
 }
 
@@ -688,6 +753,7 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(ArmCoreLocation(R0));
 
@@ -753,6 +819,7 @@
   __ blx(LR);
 
   codegen_->RecordPcInfo(invoke->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderARM::VisitAdd(HAdd* add) {
@@ -761,7 +828,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -783,9 +850,15 @@
   LocationSummary* locations = add->GetLocations();
   switch (add->GetResultType()) {
     case Primitive::kPrimInt:
-      __ add(locations->Out().AsArm().AsCoreRegister(),
-             locations->InAt(0).AsArm().AsCoreRegister(),
-             ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
+      if (locations->InAt(1).IsRegister()) {
+        __ add(locations->Out().AsArm().AsCoreRegister(),
+               locations->InAt(0).AsArm().AsCoreRegister(),
+               ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
+      } else {
+        __ AddConstant(locations->Out().AsArm().AsCoreRegister(),
+                       locations->InAt(0).AsArm().AsCoreRegister(),
+                       locations->InAt(1).GetConstant()->AsIntConstant()->GetValue());
+      }
       break;
 
     case Primitive::kPrimLong:
@@ -815,7 +888,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -836,11 +909,18 @@
 void InstructionCodeGeneratorARM::VisitSub(HSub* sub) {
   LocationSummary* locations = sub->GetLocations();
   switch (sub->GetResultType()) {
-    case Primitive::kPrimInt:
-      __ sub(locations->Out().AsArm().AsCoreRegister(),
-             locations->InAt(0).AsArm().AsCoreRegister(),
-             ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
+    case Primitive::kPrimInt: {
+      if (locations->InAt(1).IsRegister()) {
+        __ sub(locations->Out().AsArm().AsCoreRegister(),
+               locations->InAt(0).AsArm().AsCoreRegister(),
+               ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
+      } else {
+        __ AddConstant(locations->Out().AsArm().AsCoreRegister(),
+                       locations->InAt(0).AsArm().AsCoreRegister(),
+                       -locations->InAt(1).GetConstant()->AsIntConstant()->GetValue());
+      }
       break;
+    }
 
     case Primitive::kPrimLong:
       __ subs(locations->Out().AsArm().AsRegisterPairLow(),
@@ -878,6 +958,7 @@
 };
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(ArmCoreLocation(calling_convention.GetRegisterAt(0)));
@@ -896,6 +977,7 @@
   __ blx(LR);
 
   codegen_->RecordPcInfo(instruction->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -948,9 +1030,11 @@
              ShifterOperand(right.AsRegisterPairHigh()));  // Signed compare.
       __ b(&less, LT);
       __ b(&greater, GT);
+      // Do LoadImmediate before any `cmp`, as LoadImmediate might affect
+      // the status flags.
+      __ LoadImmediate(output, 0);
       __ cmp(left.AsRegisterPairLow(),
              ShifterOperand(right.AsRegisterPairLow()));  // Unsigned compare.
-      __ LoadImmediate(output, 0);
       __ b(&done, EQ);
       __ b(&less, CC);
 
@@ -986,6 +1070,11 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Temporary registers for the write barrier.
+  if (instruction->InputAt(1)->GetType() == Primitive::kPrimNot) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
   instruction->SetLocations(locations);
 }
 
@@ -1010,10 +1099,24 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      Register value = locations->InAt(1).AsArm().AsCoreRegister();
+      __ StoreToOffset(kStoreWord, value, obj, offset);
+      break;
+    }
+
     case Primitive::kPrimNot: {
       Register value = locations->InAt(1).AsArm().AsCoreRegister();
       __ StoreToOffset(kStoreWord, value, obj, offset);
+
+      Register temp = locations->GetTemp(0).AsArm().AsCoreRegister();
+      Register card = locations->GetTemp(1).AsArm().AsCoreRegister();
+      Label is_null;
+      __ CompareAndBranchIfZero(value, &is_null);
+      __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmWordSize>().Int32Value());
+      __ Lsr(temp, obj, gc::accounting::CardTable::kCardShift);
+      __ strb(card, Address(card, temp));
+      __ Bind(&is_null);
       break;
     }
 
@@ -1158,7 +1261,16 @@
       __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
     }
   } else {
-    LOG(FATAL) << "Unimplemented";
+    DCHECK(source.IsConstant());
+    DCHECK(source.GetConstant()->AsIntConstant() != nullptr);
+    int32_t value = source.GetConstant()->AsIntConstant()->GetValue();
+    if (destination.IsRegister()) {
+      __ LoadImmediate(destination.AsArm().AsCoreRegister(), value);
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ LoadImmediate(IP, value);
+      __ str(IP, Address(SP, destination.GetStackIndex()));
+    }
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1b5974f..85ab22b 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -20,7 +20,7 @@
 #include "code_generator.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
-#include "utils/arm/assembler_arm32.h"
+#include "utils/arm/assembler_thumb2.h"
 
 namespace art {
 namespace arm {
@@ -89,7 +89,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
 
-  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -107,7 +107,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
 
-  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -172,7 +172,7 @@
   }
 
   virtual InstructionSet GetInstructionSet() const OVERRIDE {
-    return InstructionSet::kArm;
+    return InstructionSet::kThumb2;
   }
 
  private:
@@ -184,7 +184,7 @@
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ParallelMoveResolverARM move_resolver_;
-  Arm32Assembler assembler_;
+  Thumb2Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4e69a0c..251a2ad 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -15,7 +15,9 @@
  */
 
 #include "code_generator_x86.h"
+#include "gc/accounting/card_table.h"
 #include "utils/assembler.h"
+#include "utils/stack_checks.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/managed_register_x86.h"
 
@@ -32,6 +34,11 @@
 
 namespace x86 {
 
+static constexpr bool kExplicitStackOverflowCheck = false;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
 #define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathX86 : public SlowPathCode {
@@ -49,6 +56,21 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
 };
 
+class StackOverflowCheckSlowPathX86 : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathX86() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ addl(ESP,
+            Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
+    __ fs()->jmp(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowStackOverflow)));
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86);
+};
+
 #undef __
 #define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
 
@@ -66,9 +88,6 @@
   return kEqual;
 }
 
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -183,8 +202,23 @@
   static const int kFakeReturnRegister = 8;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86);
+  if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
+    __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
+    RecordPcInfo(0);
+  }
+
   // The return PC has already been pushed on the stack.
   __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
+
+  if (!skip_overflow_check && kExplicitStackOverflowCheck) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86();
+    AddSlowPath(slow_path);
+
+    __ fs()->cmpl(ESP, Address::Absolute(Thread::StackEndOffset<kX86WordSize>()));
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+
   __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
 }
 
@@ -473,6 +507,10 @@
     // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
     if (rhs.IsRegister()) {
       __ cmpl(lhs.AsX86().AsCpuRegister(), rhs.AsX86().AsCpuRegister());
+    } else if (rhs.IsConstant()) {
+      HIntConstant* instruction = rhs.GetConstant()->AsIntConstant();
+      Immediate imm(instruction->AsIntConstant()->GetValue());
+      __ cmpl(lhs.AsX86().AsCpuRegister(), imm);
     } else {
       __ cmpl(lhs.AsX86().AsCpuRegister(), Address(ESP, rhs.GetStackIndex()));
     }
@@ -530,7 +568,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::Any());
   if (comp->NeedsMaterialization()) {
-    locations->SetOut(Location::SameAsFirstInput());
+    locations->SetOut(Location::RequiresRegister());
   }
   comp->SetLocations(locations);
 }
@@ -541,6 +579,10 @@
     if (locations->InAt(1).IsRegister()) {
       __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
               locations->InAt(1).AsX86().AsCpuRegister());
+    } else if (locations->InAt(1).IsConstant()) {
+      HConstant* instruction = locations->InAt(1).GetConstant();
+      Immediate imm(instruction->AsIntConstant()->GetValue());
+      __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), imm);
     } else {
       __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
               Address(ESP, locations->InAt(1).GetStackIndex()));
@@ -598,20 +640,17 @@
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
-  // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::ConstantLocation(constant));
   constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) {
-  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
-  // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::ConstantLocation(constant));
   constant->SetLocations(locations);
 }
 
@@ -676,6 +715,7 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(X86CpuLocation(EAX));
 
@@ -733,6 +773,7 @@
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke->GetDexPc());
 }
 
@@ -769,6 +810,10 @@
       if (locations->InAt(1).IsRegister()) {
         __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
                 locations->InAt(1).AsX86().AsCpuRegister());
+      } else if (locations->InAt(1).IsConstant()) {
+        HConstant* instruction = locations->InAt(1).GetConstant();
+        Immediate imm(instruction->AsIntConstant()->GetValue());
+        __ addl(locations->InAt(0).AsX86().AsCpuRegister(), imm);
       } else {
         __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
                 Address(ESP, locations->InAt(1).GetStackIndex()));
@@ -838,6 +883,10 @@
       if (locations->InAt(1).IsRegister()) {
         __ subl(locations->InAt(0).AsX86().AsCpuRegister(),
                 locations->InAt(1).AsX86().AsCpuRegister());
+      } else if (locations->InAt(1).IsConstant()) {
+        HConstant* instruction = locations->InAt(1).GetConstant();
+        Immediate imm(instruction->AsIntConstant()->GetValue());
+        __ subl(locations->InAt(0).AsX86().AsCpuRegister(), imm);
       } else {
         __ subl(locations->InAt(0).AsX86().AsCpuRegister(),
                 Address(ESP, locations->InAt(1).GetStackIndex()));
@@ -875,6 +924,7 @@
 }
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetOut(X86CpuLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
@@ -892,6 +942,7 @@
       Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocObjectWithAccessCheck)));
 
   codegen_->RecordPcInfo(instruction->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
@@ -996,6 +1047,12 @@
   } else {
     locations->SetInAt(1, Location::RequiresRegister());
   }
+  // Temporary registers for the write barrier.
+  if (instruction->InputAt(1)->GetType() == Primitive::kPrimNot) {
+    locations->AddTemp(Location::RequiresRegister());
+    // Ensure the card is in a byte register.
+    locations->AddTemp(X86CpuLocation(ECX));
+  }
   instruction->SetLocations(locations);
 }
 
@@ -1020,10 +1077,25 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      Register value = locations->InAt(1).AsX86().AsCpuRegister();
+      __ movl(Address(obj, offset), value);
+      break;
+    }
+
     case Primitive::kPrimNot: {
       Register value = locations->InAt(1).AsX86().AsCpuRegister();
       __ movl(Address(obj, offset), value);
+      Label is_null;
+      Register temp = locations->GetTemp(0).AsX86().AsCpuRegister();
+      Register card = locations->GetTemp(1).AsX86().AsCpuRegister();
+      __ testl(value, value);
+      __ j(kEqual, &is_null);
+      __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
+      __ movl(temp, obj);
+      __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
+      __ movb(Address(temp, card, TIMES_1, 0),  locations->GetTemp(1).AsX86().AsByteRegister());
+      __ Bind(&is_null);
       break;
     }
 
@@ -1178,6 +1250,14 @@
       MoveMemoryToMemory(destination.GetStackIndex(),
                          source.GetStackIndex());
     }
+  } else if (source.IsConstant()) {
+    HIntConstant* instruction = source.GetConstant()->AsIntConstant();
+    Immediate imm(instruction->AsIntConstant()->GetValue());
+    if (destination.IsRegister()) {
+      __ movl(destination.AsX86().AsCpuRegister(), imm);
+    } else {
+      __ movl(Address(ESP, destination.GetStackIndex()), imm);
+    }
   } else {
     LOG(FATAL) << "Unimplemented";
   }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index d622d2a..b7c2ad8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -90,7 +90,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
 
-  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -108,7 +108,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
 
-  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -126,7 +126,7 @@
 class CodeGeneratorX86 : public CodeGenerator {
  public:
   explicit CodeGeneratorX86(HGraph* graph);
-  virtual ~CodeGeneratorX86() { }
+  virtual ~CodeGeneratorX86() {}
 
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e3ce5ce..641e8e1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -17,11 +17,13 @@
 #include "code_generator_x86_64.h"
 
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
 #include "mirror/object_reference.h"
 #include "thread.h"
 #include "utils/assembler.h"
+#include "utils/stack_checks.h"
 #include "utils/x86_64/assembler_x86_64.h"
 #include "utils/x86_64/managed_register_x86_64.h"
 
@@ -33,6 +35,15 @@
 
 namespace x86_64 {
 
+static constexpr bool kExplicitStackOverflowCheck = true;
+
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
+
 #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
@@ -41,7 +52,8 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
+    __ gs()->call(
+        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
     codegen->RecordPcInfo(dex_pc_);
   }
 
@@ -50,6 +62,22 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
 };
 
+class StackOverflowCheckSlowPathX86_64 : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathX86_64() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ addq(CpuRegister(RSP),
+            Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+    __ gs()->jmp(
+        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true));
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64);
+};
+
 #undef __
 #define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
 
@@ -67,12 +95,6 @@
   return kEqual;
 }
 
-// Some x86_64 instructions require a register to be available as temp.
-static constexpr Register TMP = R11;
-
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -147,7 +169,26 @@
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
   // The return PC has already been pushed on the stack.
-  __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+  __ subq(CpuRegister(RSP),
+          Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+
+  bool skip_overflow_check = IsLeafMethod()
+      && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86_64);
+
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
+      AddSlowPath(slow_path);
+
+      __ gs()->cmpq(CpuRegister(RSP),
+                    Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ testq(CpuRegister(RAX), Address(
+          CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
+    }
+  }
+
   __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
 }
 
@@ -329,7 +370,14 @@
   } else {
     Location lhs = condition->GetLocations()->InAt(0);
     Location rhs = condition->GetLocations()->InAt(1);
-    __ cmpl(lhs.AsX86_64().AsCpuRegister(), rhs.AsX86_64().AsCpuRegister());
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsX86_64().AsCpuRegister(), rhs.AsX86_64().AsCpuRegister());
+    } else if (rhs.IsConstant()) {
+      __ cmpl(lhs.AsX86_64().AsCpuRegister(),
+              Immediate(rhs.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpl(lhs.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
+    }
     __ j(X86_64Condition(condition->GetCondition()),
          codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   }
@@ -382,7 +430,7 @@
 void LocationsBuilderX86_64::VisitCondition(HCondition* comp) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
   if (comp->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
@@ -391,8 +439,17 @@
 
 void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
   if (comp->NeedsMaterialization()) {
-    __ cmpq(comp->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(),
-            comp->GetLocations()->InAt(1).AsX86_64().AsCpuRegister());
+    LocationSummary* locations = comp->GetLocations();
+    if (locations->InAt(1).IsRegister()) {
+      __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+    } else if (locations->InAt(1).IsConstant()) {
+      __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+    }
     __ setcc(X86_64Condition(comp->GetCondition()),
              comp->GetLocations()->Out().AsX86_64().AsCpuRegister());
   }
@@ -480,25 +537,21 @@
 }
 
 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
-  // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::ConstantLocation(constant));
   constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
-  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
-  // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::ConstantLocation(constant));
   constant->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
-  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -606,6 +659,7 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(X86_64CpuLocation(RDI));
 
@@ -660,13 +714,19 @@
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke->GetDexPc());
 }
 
 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
   switch (add->GetResultType()) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -693,8 +753,17 @@
             locations->Out().AsX86_64().AsCpuRegister().AsRegister());
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
-              locations->InAt(1).AsX86_64().AsCpuRegister());
+      if (locations->InAt(1).IsRegister()) {
+        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+                locations->InAt(1).AsX86_64().AsCpuRegister());
+      } else if (locations->InAt(1).IsConstant()) {
+        HConstant* instruction = locations->InAt(1).GetConstant();
+        Immediate imm(instruction->AsIntConstant()->GetValue());
+        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(), imm);
+      } else {
+        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+                Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+      }
       break;
     }
     case Primitive::kPrimLong: {
@@ -718,7 +787,12 @@
 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub);
   switch (sub->GetResultType()) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -745,8 +819,17 @@
             locations->Out().AsX86_64().AsCpuRegister().AsRegister());
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
-      __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(),
-              locations->InAt(1).AsX86_64().AsCpuRegister());
+      if (locations->InAt(1).IsRegister()) {
+        __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+                locations->InAt(1).AsX86_64().AsCpuRegister());
+      } else if (locations->InAt(1).IsConstant()) {
+        HConstant* instruction = locations->InAt(1).GetConstant();
+        Immediate imm(instruction->AsIntConstant()->GetValue());
+        __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(), imm);
+      } else {
+        __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+                Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+      }
       break;
     }
     case Primitive::kPrimLong: {
@@ -768,6 +851,7 @@
 }
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetOut(X86_64CpuLocation(RAX));
   instruction->SetLocations(locations);
@@ -781,6 +865,7 @@
   __ gs()->call(Address::Absolute(
       QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocObjectWithAccessCheck), true));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(instruction->GetDexPc());
 }
 
@@ -831,6 +916,11 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Temporary registers for the write barrier.
+  if (instruction->InputAt(1)->GetType() == Primitive::kPrimNot) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
   instruction->SetLocations(locations);
 }
 
@@ -854,9 +944,24 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      __ movl(Address(obj, offset), value);
+      break;
+    }
+
     case Primitive::kPrimNot: {
       __ movl(Address(obj, offset), value);
+      Label is_null;
+      CpuRegister temp = locations->GetTemp(0).AsX86_64().AsCpuRegister();
+      CpuRegister card = locations->GetTemp(1).AsX86_64().AsCpuRegister();
+      __ testl(value, value);
+      __ j(kEqual, &is_null);
+      __ gs()->movq(card, Address::Absolute(
+          Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+      __ movq(temp, obj);
+      __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
+      __ movb(Address(temp, card, TIMES_1, 0),  card);
+      __ Bind(&is_null);
       break;
     }
 
@@ -1008,6 +1113,26 @@
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
+  } else if (source.IsConstant()) {
+    HConstant* constant = source.GetConstant();
+    if (constant->IsIntConstant()) {
+      Immediate imm(constant->AsIntConstant()->GetValue());
+      if (destination.IsRegister()) {
+        __ movl(destination.AsX86_64().AsCpuRegister(), imm);
+      } else {
+        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
+      }
+    } else if (constant->IsLongConstant()) {
+      int64_t value = constant->AsLongConstant()->GetValue();
+      if (destination.IsRegister()) {
+        __ movq(destination.AsX86_64().AsCpuRegister(), Immediate(value));
+      } else {
+        __ movq(CpuRegister(TMP), Immediate(value));
+        __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+      }
+    } else {
+      LOG(FATAL) << "Unimplemented constant type";
+    }
   } else {
     LOG(FATAL) << "Unimplemented";
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 8283dda..a20ca3f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -87,7 +87,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
 
-  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -105,7 +105,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
 
-  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 7ec0c84..d7ac10d 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -48,10 +48,17 @@
 };
 
 #if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
-static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) {
+static void Run(const InternalCodeAllocator& allocator,
+                const CodeGenerator& codegen,
+                bool has_result,
+                int32_t expected) {
   typedef int32_t (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+  if (codegen.GetInstructionSet() == kThumb2) {
+    // For thumb we need the bottom bit set.
+    f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
+  }
   int32_t result = f();
   if (has_result) {
     CHECK_EQ(result, expected);
@@ -69,21 +76,23 @@
   InternalCodeAllocator allocator;
 
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86);
-  codegen->CompileBaseline(&allocator);
+  // We avoid doing a stack overflow check that requires the runtime being setup,
+  // by making sure the compiler knows the methods we are running are leaf methods.
+  codegen->CompileBaseline(&allocator, true);
 #if defined(__i386__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 
   codegen = CodeGenerator::Create(&arena, graph, kArm);
-  codegen->CompileBaseline(&allocator);
+  codegen->CompileBaseline(&allocator, true);
 #if defined(__arm__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 
   codegen = CodeGenerator::Create(&arena, graph, kX86_64);
-  codegen->CompileBaseline(&allocator);
+  codegen->CompileBaseline(&allocator, true);
 #if defined(__x86_64__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 }
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index f033e2e..f011e85 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -108,6 +108,10 @@
       } else {
         codegen_.DumpCoreRegister(output_, location.reg().RegId());
       }
+    } else if (location.IsConstant()) {
+      output_ << "constant";
+    } else if (location.IsInvalid()) {
+      output_ << "invalid";
     } else if (location.IsStackSlot()) {
       output_ << location.GetStackIndex() << "(sp)";
     } else {
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 98766d2..468cfb7 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -29,4 +29,11 @@
   }
 }
 
+
+Location Location::RegisterOrConstant(HInstruction* instruction) {
+  return instruction->IsConstant()
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresRegister();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 40a39ad..aaddb09 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -24,6 +24,7 @@
 
 namespace art {
 
+class HConstant;
 class HInstruction;
 
 /**
@@ -34,23 +35,33 @@
  public:
   enum Kind {
     kInvalid = 0,
-    kStackSlot = 1,  // Word size slot.
-    kDoubleStackSlot = 2,  // 64bit stack slot.
-    kRegister = 3,
+    kConstant = 1,
+    kStackSlot = 2,  // Word size slot.
+    kDoubleStackSlot = 3,  // 64bit stack slot.
+    kRegister = 4,
     // On 32bits architectures, quick can pass a long where the
     // low bits are in the last parameter register, and the high
     // bits are in a stack slot. The kQuickParameter kind is for
     // handling this special case.
-    kQuickParameter = 4,
+    kQuickParameter = 5,
 
     // Unallocated location represents a location that is not fixed and can be
     // allocated by a register allocator.  Each unallocated location has
     // a policy that specifies what kind of location is suitable. Payload
     // contains register allocation policy.
-    kUnallocated = 5,
+    kUnallocated = 6,
   };
 
   Location() : value_(kInvalid) {
+    // Verify that non-tagged location kinds do not interfere with kConstantTag.
+    COMPILE_ASSERT((kInvalid & kLocationTagMask) != kConstant, TagError);
+    COMPILE_ASSERT((kUnallocated & kLocationTagMask) != kConstant, TagError);
+    COMPILE_ASSERT((kStackSlot & kLocationTagMask) != kConstant, TagError);
+    COMPILE_ASSERT((kDoubleStackSlot & kLocationTagMask) != kConstant, TagError);
+    COMPILE_ASSERT((kRegister & kLocationTagMask) != kConstant, TagError);
+    COMPILE_ASSERT((kConstant & kLocationTagMask) == kConstant, TagError);
+    COMPILE_ASSERT((kQuickParameter & kLocationTagMask) == kConstant, TagError);
+
     DCHECK(!IsValid());
   }
 
@@ -61,6 +72,20 @@
     return *this;
   }
 
+  bool IsConstant() const {
+    return (value_ & kLocationTagMask) == kConstant;
+  }
+
+  static Location ConstantLocation(HConstant* constant) {
+    DCHECK(constant != nullptr);
+    return Location(kConstant | reinterpret_cast<uword>(constant));
+  }
+
+  HConstant* GetConstant() const {
+    DCHECK(IsConstant());
+    return reinterpret_cast<HConstant*>(value_ & ~kLocationTagMask);
+  }
+
   bool IsValid() const {
     return value_ != kInvalid;
   }
@@ -69,11 +94,6 @@
     return !IsValid();
   }
 
-  bool IsConstant() const {
-    // TODO: support constants.
-    return false;
-  }
-
   // Empty location. Used if there the location should be ignored.
   static Location NoLocation() {
     return Location();
@@ -162,12 +182,13 @@
 
   const char* DebugString() const {
     switch (GetKind()) {
-      case kInvalid: return "?";
+      case kInvalid: return "I";
       case kRegister: return "R";
       case kStackSlot: return "S";
       case kDoubleStackSlot: return "DS";
       case kQuickParameter: return "Q";
       case kUnallocated: return "U";
+      case kConstant: return "C";
     }
     return "?";
   }
@@ -196,6 +217,8 @@
     return UnallocatedLocation(kRequiresRegister);
   }
 
+  static Location RegisterOrConstant(HInstruction* instruction);
+
   // The location of the first input to the instruction will be
   // used to replace this unallocated location.
   static Location SameAsFirstInput() {
@@ -215,6 +238,7 @@
   // Number of bits required to encode Kind value.
   static constexpr uint32_t kBitsForKind = 4;
   static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind;
+  static constexpr uword kLocationTagMask = 0x3;
 
   explicit Location(uword value) : value_(value) {}
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e87b044..61a6f6b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -408,7 +408,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBasicBlock);
 };
 
-#define FOR_EACH_INSTRUCTION(M)                            \
+#define FOR_EACH_CONCRETE_INSTRUCTION(M)                   \
   M(Add)                                                   \
   M(Condition)                                             \
   M(Equal)                                                 \
@@ -440,6 +440,9 @@
   M(NullCheck)                                             \
   M(Temporary)                                             \
 
+#define FOR_EACH_INSTRUCTION(M)                            \
+  FOR_EACH_CONCRETE_INSTRUCTION(M)                         \
+  M(Constant)
 
 #define FORWARD_DECLARATION(type) class H##type;
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
@@ -1078,11 +1081,21 @@
   DISALLOW_COPY_AND_ASSIGN(HStoreLocal);
 };
 
+class HConstant : public HExpression<0> {
+ public:
+  explicit HConstant(Primitive::Type type) : HExpression(type) {}
+
+  DECLARE_INSTRUCTION(Constant);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HConstant);
+};
+
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
-class HIntConstant : public HExpression<0> {
+class HIntConstant : public HConstant {
  public:
-  explicit HIntConstant(int32_t value) : HExpression(Primitive::kPrimInt), value_(value) {}
+  explicit HIntConstant(int32_t value) : HConstant(Primitive::kPrimInt), value_(value) {}
 
   int32_t GetValue() const { return value_; }
 
@@ -1094,14 +1107,12 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
-class HLongConstant : public HExpression<0> {
+class HLongConstant : public HConstant {
  public:
-  explicit HLongConstant(int64_t value) : HExpression(Primitive::kPrimLong), value_(value) {}
+  explicit HLongConstant(int64_t value) : HConstant(Primitive::kPrimLong), value_(value) {}
 
   int64_t GetValue() const { return value_; }
 
-  virtual Primitive::Type GetType() const { return Primitive::kPrimLong; }
-
   DECLARE_INSTRUCTION(LongConstant);
 
  private:
@@ -1278,13 +1289,12 @@
 
   DECLARE_INSTRUCTION(Phi);
 
- protected:
+ private:
   GrowableArray<HInstruction*> inputs_;
   const uint32_t reg_number_;
   Primitive::Type type_;
   bool is_live_;
 
- private:
   DISALLOW_COPY_AND_ASSIGN(HPhi);
 };
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b621e51..8a5077b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -79,13 +79,14 @@
                                                jobject class_loader,
                                                const DexFile& dex_file) const {
   InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
-  // The optimizing compiler currently does not have a Thumb2 assembler.
-  if (instruction_set == kThumb2) {
-    instruction_set = kArm;
+  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
+  // overflow checks) assume thumb2.
+  if (instruction_set == kArm) {
+    instruction_set = kThumb2;
   }
 
   // Do not attempt to compile on architectures we do not support.
-  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kArm) {
+  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kThumb2) {
     return nullptr;
   }
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 68130dd..bd3a7d9 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -153,13 +153,13 @@
       if (current->HasRegister()) {
         DCHECK(instruction->IsParameterValue());
         inactive_.Add(current);
-      } else if (current->HasSpillSlot()) {
-        DCHECK(instruction->IsParameterValue());
+      } else if (current->HasSpillSlot() || instruction->IsConstant()) {
         // Split before first register use.
         size_t first_register_use = current->FirstRegisterUse();
         if (first_register_use != kNoLifetime) {
           LiveInterval* split = Split(current, first_register_use - 1);
-          // The new interval may start at a late
+          // Don't add direclty to `unhandled_`, it needs to be sorted and the start
+          // of this new interval might be after intervals already in the list.
           AddToUnhandled(split);
         } else {
           // Nothing to do, we won't allocate a register for this value.
@@ -579,6 +579,11 @@
     return;
   }
 
+  if (defined_by->IsConstant()) {
+    // Constants don't need a spill slot.
+    return;
+  }
+
   LiveInterval* last_sibling = interval;
   while (last_sibling->GetNextSibling() != nullptr) {
     last_sibling = last_sibling->GetNextSibling();
@@ -644,11 +649,16 @@
   if (interval->HasRegister()) {
     return Location::RegisterLocation(ManagedRegister(interval->GetRegister()));
   } else {
-    DCHECK(interval->GetParent()->HasSpillSlot());
-    if (NeedTwoSpillSlot(interval->GetType())) {
-      return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot());
+    HInstruction* defined_by = interval->GetParent()->GetDefinedBy();
+    if (defined_by->IsConstant()) {
+      return defined_by->GetLocations()->Out();
     } else {
-      return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+      DCHECK(interval->GetParent()->HasSpillSlot());
+      if (NeedTwoSpillSlot(interval->GetType())) {
+        return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot());
+      } else {
+        return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+      }
     }
   }
 }
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e35ff56..be1c7ec 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -66,7 +66,10 @@
 
   static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
   static bool Supports(InstructionSet instruction_set) {
-    return instruction_set == kX86 || instruction_set == kArm || instruction_set == kX86_64;
+    return instruction_set == kX86
+        || instruction_set == kArm
+        || instruction_set == kX86_64
+        || instruction_set == kThumb2;
   }
 
   size_t GetNumberOfSpillSlots() const {
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 8a34928..671ccb6 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -73,6 +73,11 @@
   return os;
 }
 
+ShifterOperand::ShifterOperand(uint32_t immed)
+    : type_(kImmediate), rm_(kNoRegister), rs_(kNoRegister),
+      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(immed) {
+  CHECK(immed < (1u << 12) || ArmAssembler::ModifiedImmediate(immed) != kInvalidModifiedImmediate);
+}
 
 
 uint32_t ShifterOperand::encodingArm() const {
@@ -169,9 +174,7 @@
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
 
     case MOV:
-      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
-        return true;
-      }
+      // TODO: Support less than or equal to 12bits.
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
     case MVN:
     default:
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index be19174..54965f6 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -35,9 +35,7 @@
       is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(0) {
   }
 
-  explicit ShifterOperand(uint32_t immed) : type_(kImmediate), rm_(kNoRegister), rs_(kNoRegister),
-      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(immed) {
-  }
+  explicit ShifterOperand(uint32_t immed);
 
   // Data-processing operands - Register
   explicit ShifterOperand(Register rm) : type_(kRegister), rm_(rm), rs_(kNoRegister),
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 604f59e..78ff31a 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -619,7 +619,8 @@
     return true;
   }
 
-  bool can_contain_high_register = opcode == MOV || opcode == ADD || opcode == SUB;
+  bool can_contain_high_register = (opcode == MOV)
+      || ((opcode == ADD || opcode == SUB) && (rn == rd));
 
   if (IsHighRegister(rd) || IsHighRegister(rn)) {
     if (can_contain_high_register) {
@@ -757,23 +758,21 @@
   int32_t encoding = 0;
   if (so.IsImmediate()) {
     // Check special cases.
-    if ((opcode == SUB || opcode == ADD) && rn == SP) {
-      // There are special ADD/SUB rd, SP, #imm12 instructions.
+    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
       if (opcode == SUB) {
         thumb_opcode = 0b0101;
       } else {
         thumb_opcode = 0;
       }
       uint32_t imm = so.GetImmediate();
-      CHECK_LT(imm, (1u << 12));
 
       uint32_t i = (imm >> 11) & 1;
       uint32_t imm3 = (imm >> 8) & 0b111;
       uint32_t imm8 = imm & 0xff;
 
       encoding = B31 | B30 | B29 | B28 | B25 |
-           B19 | B18 | B16 |
            thumb_opcode << 21 |
+           rn << 16 |
            rd << 8 |
            i << 26 |
            imm3 << 12 |
@@ -877,11 +876,17 @@
            rn_shift = 8;
         } else {
           thumb_opcode = 0b1010;
+          rd = rn;
           rn = so.GetRegister();
         }
 
         break;
-      case CMN: thumb_opcode = 0b1011; rn = so.GetRegister(); break;
+      case CMN: {
+        thumb_opcode = 0b1011;
+        rd = rn;
+        rn = so.GetRegister();
+        break;
+      }
       case ORR: thumb_opcode = 0b1100; break;
       case MOV:
         dp_opcode = 0;
@@ -1371,13 +1376,23 @@
       }
 
       if (must_be_32bit) {
-        int32_t encoding = 0x1f << 27 | B22 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 |
+        int32_t encoding = 0x1f << 27 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 |
             ad.encodingThumb(true);
+        if (half) {
+          encoding |= B21;
+        } else if (!byte) {
+          encoding |= B22;
+        }
         Emit32(encoding);
       } else {
         // 16 bit register offset.
         int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) |
             ad.encodingThumb(false);
+        if (byte) {
+          encoding |= B10;
+        } else if (half) {
+          encoding |= B9;
+        }
         Emit16(encoding);
       }
     }
@@ -1470,6 +1485,7 @@
     // branch the size may change if it so happens that other branches change size that change
     // the distance to the target and that distance puts this branch over the limit for 16 bits.
     if (size == Branch::k16Bit) {
+      DCHECK(!force_32bit_branches_);
       Emit16(0);          // Space for a 16 bit branch.
     } else {
       Emit32(0);            // Space for a 32 bit branch.
@@ -1477,7 +1493,7 @@
   } else {
     // Branch is to an unbound label.  Emit space for it.
     uint16_t branch_id = AddBranch(branch_type, pc, cond);    // Unresolved branch.
-    if (force_32bit_) {
+    if (force_32bit_branches_ || force_32bit_) {
       Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
       Emit16(0);                   // another 16 bits.
     } else {
@@ -2073,6 +2089,7 @@
     uint32_t branch_location = branch->GetLocation();
     uint16_t next = buffer_.Load<uint16_t>(branch_location);       // Get next in chain.
     if (changed) {
+      DCHECK(!force_32bit_branches_);
       MakeHoleForBranch(branch->GetLocation(), 2);
       if (branch->IsCompareAndBranch()) {
         // A cbz/cbnz instruction has changed size.  There is no valid encoding for
@@ -2506,12 +2523,22 @@
 
 
 void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) {
-  cbz(r, label);
+  if (force_32bit_branches_) {
+    cmp(r, ShifterOperand(0));
+    b(label, EQ);
+  } else {
+    cbz(r, label);
+  }
 }
 
 
 void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
-  cbnz(r, label);
+  if (force_32bit_branches_) {
+    cmp(r, ShifterOperand(0));
+    b(label, NE);
+  } else {
+    cbnz(r, label);
+  }
 }
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 5f24e4e..ee33bf2 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -29,10 +29,13 @@
 namespace art {
 namespace arm {
 
-
 class Thumb2Assembler FINAL : public ArmAssembler {
  public:
-  Thumb2Assembler() : force_32bit_(false), it_cond_index_(kNoItCondition), next_condition_(AL) {
+  explicit Thumb2Assembler(bool force_32bit_branches = false)
+      : force_32bit_branches_(force_32bit_branches),
+        force_32bit_(false),
+        it_cond_index_(kNoItCondition),
+        next_condition_(AL) {
   }
 
   virtual ~Thumb2Assembler() {
@@ -49,6 +52,10 @@
     return force_32bit_;
   }
 
+  bool IsForced32BitBranches() const {
+    return force_32bit_branches_;
+  }
+
   void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
     EmitBranches();
     Assembler::FinalizeInstructions(region);
@@ -412,7 +419,8 @@
   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc = false);
   void EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc = false);
 
-  bool force_32bit_;      // Force the assembler to use 32 bit thumb2 instructions.
+  bool force_32bit_branches_;  // Force the assembler to use 32 bit branch instructions.
+  bool force_32bit_;           // Force the assembler to use 32 bit thumb2 instructions.
 
   // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
   Condition it_conditions_[4];
@@ -605,6 +613,9 @@
    private:
     // Calculate the size of the branch instruction based on its type and offset.
     Size CalculateSize() const {
+      if (assembler_->IsForced32BitBranches()) {
+        return k32Bit;
+      }
       if (target_ == kUnresolved) {
         if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) {
           return k32Bit;
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 68cb656..3312f8f 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -14,7 +14,10 @@
  * limitations under the License.
  */
 
+#include <dirent.h>
 #include <fstream>
+#include <sys/types.h>
+#include <map>
 
 #include "gtest/gtest.h"
 #include "utils/arm/assembler_thumb2.h"
@@ -40,6 +43,8 @@
 static constexpr bool kPrintResults = false;
 #endif
 
+static const char* TOOL_PREFIX = "arm-linux-androideabi-";
+
 void SetAndroidData() {
   const char* data = getenv("ANDROID_DATA");
   if (data == nullptr) {
@@ -109,9 +114,9 @@
   // Suffix on toolsdir will be something like "arm-eabi-4.8"
   while ((entry = readdir(dir)) != nullptr) {
     std::string subdir = toolsdir + std::string("/") + std::string(entry->d_name);
-    size_t eabi = subdir.find("arm-eabi-");
+    size_t eabi = subdir.find(TOOL_PREFIX);
     if (eabi != std::string::npos) {
-      std::string suffix = subdir.substr(eabi + sizeof("arm-eabi-"));
+      std::string suffix = subdir.substr(eabi + strlen(TOOL_PREFIX));
       double version = strtod(suffix.c_str(), nullptr);
       if (version > maxversion) {
         maxversion = version;
@@ -166,22 +171,22 @@
   }
   out.close();
 
-  char cmd[256];
+  char cmd[1024];
 
   // Assemble the .S
-  snprintf(cmd, sizeof(cmd), "%sarm-eabi-as %s -o %s.o", toolsdir.c_str(), filename, filename);
+  snprintf(cmd, sizeof(cmd), "%s%sas %s -o %s.o", toolsdir.c_str(), TOOL_PREFIX, filename, filename);
   system(cmd);
 
   // Remove the $d symbols to prevent the disassembler dumping the instructions
   // as .word
-  snprintf(cmd, sizeof(cmd), "%sarm-eabi-objcopy -N '$d' %s.o %s.oo", toolsdir.c_str(),
+  snprintf(cmd, sizeof(cmd), "%s%sobjcopy -N '$d' %s.o %s.oo", toolsdir.c_str(), TOOL_PREFIX,
     filename, filename);
   system(cmd);
 
   // Disassemble.
 
-  snprintf(cmd, sizeof(cmd), "%sarm-eabi-objdump -d %s.oo | grep '^  *[0-9a-f][0-9a-f]*:'",
-    toolsdir.c_str(), filename);
+  snprintf(cmd, sizeof(cmd), "%s%sobjdump -d %s.oo | grep '^  *[0-9a-f][0-9a-f]*:'",
+    toolsdir.c_str(), TOOL_PREFIX, filename);
   if (kPrintResults) {
     // Print the results only, don't check. This is used to generate new output for inserting
     // into the .inc file.
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 3943e37..18035f3 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -48,8 +48,8 @@
 const char* DataProcessingImmediateResults[] = {
   "   0:	2055      	movs	r0, #85	; 0x55\n",
   "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
-  "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
-  "   a:	f1a1 0055 	sub.w	r0, r1, #85	; 0x55\n",
+  "   6:	f201 0055 	addw	r0, r1, #85	; 0x55\n",
+  "   a:	f2a1 0055 	subw	r0, r1, #85	; 0x55\n",
   "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
   "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
   "  16:	f081 0055 	eor.w	r0, r1, #85	; 0x55\n",
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
new file mode 100644
index 0000000..63adbc2
--- /dev/null
+++ b/compiler/utils/stack_checks.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_STACK_CHECKS_H_
+#define ART_COMPILER_UTILS_STACK_CHECKS_H_
+
+#include "instruction_set.h"
+
+namespace art {
+
+// Size of a frame that we definitely consider large. Anything larger than this should
+// definitely get a stack overflow check.
+static constexpr size_t kLargeFrameSize = 2 * KB;
+
+// Size of a frame that should be small. Anything leaf method smaller than this should run
+// without a stack overflow check.
+// The constant is from experience with frameworks code.
+static constexpr size_t kSmallFrameSize = 1 * KB;
+
+// Determine whether a frame is small or large, used in the decision on whether to elide a
+// stack overflow check on method entry.
+//
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static inline bool IsLargeFrame(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_STACK_CHECKS_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 56c6536..b6a5c20 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -806,6 +806,13 @@
 }
 
 
+void X86Assembler::testl(Register reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x85);
+  EmitOperand(reg, address);
+}
+
+
 void X86Assembler::testl(Register reg, const Immediate& immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2fc6049..ce20768 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -346,6 +346,7 @@
 
   void testl(Register reg1, Register reg2);
   void testl(Register reg, const Immediate& imm);
+  void testl(Register reg1, const Address& address);
 
   void andl(Register dst, const Immediate& imm);
   void andl(Register dst, Register src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 78738d8..0d14376 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -869,6 +869,22 @@
 }
 
 
+void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
+  EmitRex64(reg);
+  EmitComplex(7, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0x3B);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -934,6 +950,14 @@
 }
 
 
+void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0x85);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1063,6 +1087,14 @@
 }
 
 
+void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst);
+  EmitUint8(0x03);
+  EmitOperand(dst.LowBits(), address);
+}
+
+
 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
@@ -1118,6 +1150,14 @@
 }
 
 
+void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0x2B);
+  EmitOperand(reg.LowBits() & 7, address);
+}
+
+
 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
@@ -1201,7 +1241,7 @@
 
 
 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
-  EmitGenericShift(4, reg, imm);
+  EmitGenericShift(false, 4, reg, imm);
 }
 
 
@@ -1211,7 +1251,12 @@
 
 
 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
-  EmitGenericShift(5, reg, imm);
+  EmitGenericShift(false, 5, reg, imm);
+}
+
+
+void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 5, reg, imm);
 }
 
 
@@ -1221,7 +1266,7 @@
 
 
 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
-  EmitGenericShift(7, reg, imm);
+  EmitGenericShift(false, 7, reg, imm);
 }
 
 
@@ -1537,11 +1582,15 @@
 }
 
 
-void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
-                                    CpuRegister reg,
-                                    const Immediate& imm) {
+void X86_64Assembler::EmitGenericShift(bool wide,
+                                       int reg_or_opcode,
+                                       CpuRegister reg,
+                                       const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int8());
+  if (wide) {
+    EmitRex64(reg);
+  }
   if (imm.value() == 1) {
     EmitUint8(0xD1);
     EmitOperand(reg_or_opcode, Operand(reg));
@@ -1554,8 +1603,8 @@
 
 
 void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
-                                    CpuRegister operand,
-                                    CpuRegister shifter) {
+                                       CpuRegister operand,
+                                       CpuRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter.AsRegister(), RCX);
   EmitUint8(0xD3);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 7514854..1d6655c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -385,10 +385,14 @@
   void cmpl(const Address& address, const Immediate& imm);
 
   void cmpq(CpuRegister reg0, CpuRegister reg1);
+  void cmpq(CpuRegister reg0, const Immediate& imm);
+  void cmpq(CpuRegister reg0, const Address& address);
 
   void testl(CpuRegister reg1, CpuRegister reg2);
   void testl(CpuRegister reg, const Immediate& imm);
 
+  void testq(CpuRegister reg, const Address& address);
+
   void andl(CpuRegister dst, const Immediate& imm);
   void andl(CpuRegister dst, CpuRegister src);
   void andq(CpuRegister dst, const Immediate& imm);
@@ -408,6 +412,7 @@
 
   void addq(CpuRegister reg, const Immediate& imm);
   void addq(CpuRegister dst, CpuRegister src);
+  void addq(CpuRegister dst, const Address& address);
 
   void subl(CpuRegister dst, CpuRegister src);
   void subl(CpuRegister reg, const Immediate& imm);
@@ -415,6 +420,7 @@
 
   void subq(CpuRegister reg, const Immediate& imm);
   void subq(CpuRegister dst, CpuRegister src);
+  void subq(CpuRegister dst, const Address& address);
 
   void cdq();
 
@@ -437,6 +443,8 @@
   void sarl(CpuRegister reg, const Immediate& imm);
   void sarl(CpuRegister operand, CpuRegister shifter);
 
+  void shrq(CpuRegister reg, const Immediate& imm);
+
   void negl(CpuRegister reg);
   void notl(CpuRegister reg);
 
@@ -622,7 +630,7 @@
   void EmitLabelLink(Label* label);
   void EmitNearLabelLink(Label* label);
 
-  void EmitGenericShift(int rm, CpuRegister reg, const Immediate& imm);
+  void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
   void EmitGenericShift(int rm, CpuRegister operand, CpuRegister shifter);
 
   // If any input is not false, output the necessary rex prefix.
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 6d861d4..d6501a1 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -45,6 +45,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "elf_fixup.h"
+#include "elf_patcher.h"
 #include "elf_stripper.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
@@ -324,11 +325,28 @@
     return ReadImageClasses(image_classes_stream);
   }
 
+  bool PatchOatCode(const CompilerDriver* compiler_driver, File* oat_file,
+                    const std::string& oat_location, std::string* error_msg) {
+    // We asked to include patch information but we are not making an image. We need to fix
+    // everything up manually.
+    std::unique_ptr<ElfFile> elf_file(ElfFile::Open(oat_file, PROT_READ|PROT_WRITE,
+                                                    MAP_SHARED, error_msg));
+    if (elf_file.get() == NULL) {
+      LOG(ERROR) << error_msg;
+      return false;
+    }
+    {
+      ReaderMutexLock mu(Thread::Current(), *Locks::mutator_lock_);
+      return ElfPatcher::Patch(compiler_driver, elf_file.get(), oat_location, error_msg);
+    }
+  }
+
   const CompilerDriver* CreateOatFile(const std::string& boot_image_option,
                                       const std::string& android_root,
                                       bool is_host,
                                       const std::vector<const DexFile*>& dex_files,
                                       File* oat_file,
+                                      const std::string& oat_location,
                                       const std::string& bitcode_filename,
                                       bool image,
                                       std::unique_ptr<CompilerDriver::DescriptorSet>& image_classes,
@@ -380,6 +398,7 @@
     std::string image_file_location;
     uint32_t image_file_location_oat_checksum = 0;
     uintptr_t image_file_location_oat_data_begin = 0;
+    int32_t image_patch_delta = 0;
     if (!driver->IsImage()) {
       TimingLogger::ScopedTiming t3("Loading image checksum", &timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
@@ -387,6 +406,7 @@
       image_file_location_oat_data_begin =
           reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatDataBegin());
       image_file_location = image_space->GetImageFilename();
+      image_patch_delta = image_space->GetImageHeader().GetPatchDelta();
     }
 
     if (!image_file_location.empty()) {
@@ -395,6 +415,7 @@
 
     OatWriter oat_writer(dex_files, image_file_location_oat_checksum,
                          image_file_location_oat_data_begin,
+                         image_patch_delta,
                          driver.get(),
                          &timings,
                          key_value_store);
@@ -405,6 +426,16 @@
       return nullptr;
     }
 
+    if (!driver->IsImage() && driver->GetCompilerOptions().GetIncludePatchInformation()) {
+      t2.NewTiming("Patching ELF");
+      std::string error_msg;
+      if (!PatchOatCode(driver.get(), oat_file, oat_location, &error_msg)) {
+        LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath();
+        LOG(ERROR) << "Error was: " << error_msg;
+        return nullptr;
+      }
+    }
+
     return driver.release();
   }
 
@@ -1361,6 +1392,7 @@
                                                                         is_host,
                                                                         dex_files,
                                                                         oat_file.get(),
+                                                                        oat_location,
                                                                         bitcode_filename,
                                                                         image,
                                                                         image_classes,
@@ -1370,7 +1402,6 @@
                                                                         compiler_phases_timings,
                                                                         profile_file,
                                                                         key_value_store.get()));
-
   if (compiler.get() == nullptr) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
     return EXIT_FAILURE;
@@ -1420,9 +1451,9 @@
   // memory mapped so we could predict where its contents were based
   // on the file size. Now that it is an ELF file, we need to inspect
   // the ELF file to understand the in memory segment layout including
-  // where the oat header is located within. ImageWriter's
-  // PatchOatCodeAndMethods uses the PatchInformation from the
-  // Compiler to touch up absolute references in the oat file.
+  // where the oat header is located within. ElfPatcher's Patch method
+  // uses the PatchInformation from the Compiler to touch up absolute
+  // references in the oat file.
   //
   // 3. We fixup the ELF program headers so that dlopen will try to
   // load the .so at the desired location at runtime by offsetting the
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b8f20f3..068a450 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -165,6 +165,8 @@
                            GetQuickToInterpreterBridgeOffset);
 #undef DUMP_OAT_HEADER_OFFSET
 
+    os << "IMAGE PATCH DELTA:\n" << oat_header.GetImagePatchDelta();
+
     os << "IMAGE FILE LOCATION OAT CHECKSUM:\n";
     os << StringPrintf("0x%08x\n\n", oat_header.GetImageFileLocationOatChecksum());
 
@@ -771,6 +773,8 @@
 
     os << "OAT FILE END:" << reinterpret_cast<void*>(image_header_.GetOatFileEnd()) << "\n\n";
 
+    os << "PATCH DELTA:" << image_header_.GetPatchDelta() << "\n\n";
+
     {
       os << "ROOTS: " << reinterpret_cast<void*>(image_header_.GetImageRoots()) << "\n";
       Indenter indent1_filter(os.rdbuf(), kIndentChar, kIndentBy1Count);
@@ -819,10 +823,13 @@
     os << "OAT LOCATION: " << oat_location;
     os << "\n";
     std::string error_msg;
-    const OatFile* oat_file = class_linker->FindOatFileFromOatLocation(oat_location, &error_msg);
-    if (oat_file == NULL) {
-      os << "NOT FOUND: " << error_msg << "\n";
-      return;
+    const OatFile* oat_file = class_linker->FindOpenedOatFileFromOatLocation(oat_location);
+    if (oat_file == nullptr) {
+      oat_file = OatFile::Open(oat_location, oat_location, NULL, false, &error_msg);
+      if (oat_file == nullptr) {
+        os << "NOT FOUND: " << error_msg << "\n";
+        return;
+      }
     }
     os << "\n";
 
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 85b4e6d..4c88c9e 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -17,11 +17,14 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/file.h>
 #include <sys/stat.h>
+#include <unistd.h>
 
 #include <string>
 #include <vector>
 
+#include "base/scoped_flock.h"
 #include "base/stringpiece.h"
 #include "base/stringprintf.h"
 #include "elf_utils.h"
@@ -125,7 +128,7 @@
              delta, timings);
   t.NewTiming("Patching files");
   if (!p.PatchImage()) {
-    LOG(INFO) << "Failed to patch image file " << input_image->GetPath();
+    LOG(ERROR) << "Failed to patch image file " << input_image->GetPath();
     return false;
   }
 
@@ -216,11 +219,11 @@
              delta, timings);
   t.NewTiming("Patching files");
   if (!p.PatchElf()) {
-    LOG(INFO) << "Failed to patch oat file " << input_oat->GetPath();
+    LOG(ERROR) << "Failed to patch oat file " << input_oat->GetPath();
     return false;
   }
   if (!p.PatchImage()) {
-    LOG(INFO) << "Failed to patch image file " << input_image->GetPath();
+    LOG(ERROR) << "Failed to patch image file " << input_image->GetPath();
     return false;
   }
 
@@ -236,6 +239,12 @@
 
 bool PatchOat::WriteElf(File* out) {
   TimingLogger::ScopedTiming t("Writing Elf File", timings_);
+  std::string error_msg;
+
+  // Lock the output file.
+  ScopedFlock flock;
+  flock.Init(out, &error_msg);
+
   CHECK(oat_file_.get() != nullptr);
   CHECK(out != nullptr);
   size_t expect = oat_file_->Size();
@@ -250,6 +259,12 @@
 
 bool PatchOat::WriteImage(File* out) {
   TimingLogger::ScopedTiming t("Writing image File", timings_);
+  std::string error_msg;
+
+  // Lock the output file.
+  ScopedFlock flock;
+  flock.Init(out, &error_msg);
+
   CHECK(image_ != nullptr);
   CHECK(out != nullptr);
   size_t expect = image_->Size();
@@ -437,19 +452,50 @@
   return true;
 }
 
+bool PatchOat::PatchOatHeader() {
+  Elf32_Shdr *rodata_sec = oat_file_->FindSectionByName(".rodata");
+  if (rodata_sec == nullptr) {
+    return false;
+  }
+  OatHeader* oat_header = reinterpret_cast<OatHeader*>(oat_file_->Begin() + rodata_sec->sh_offset);
+  if (!oat_header->IsValid()) {
+    LOG(ERROR) << "Elf file " << oat_file_->GetFile().GetPath() << " has an invalid oat header";
+    return false;
+  }
+  oat_header->RelocateOat(delta_);
+  return true;
+}
+
 bool PatchOat::PatchElf() {
-  TimingLogger::ScopedTiming t("Fixup Elf Headers", timings_);
+  TimingLogger::ScopedTiming t("Fixup Elf Text Section", timings_);
+  if (!PatchTextSection()) {
+    return false;
+  }
+
+  if (!PatchOatHeader()) {
+    return false;
+  }
+
+  bool need_fixup = false;
+  t.NewTiming("Fixup Elf Headers");
   // Fixup Phdr's
   for (unsigned int i = 0; i < oat_file_->GetProgramHeaderNum(); i++) {
     Elf32_Phdr& hdr = oat_file_->GetProgramHeader(i);
-    if (hdr.p_vaddr != 0) {
+    if (hdr.p_vaddr != 0 && hdr.p_vaddr != hdr.p_offset) {
+      need_fixup = true;
       hdr.p_vaddr += delta_;
     }
-    if (hdr.p_paddr != 0) {
+    if (hdr.p_paddr != 0 && hdr.p_paddr != hdr.p_offset) {
+      need_fixup = true;
       hdr.p_paddr += delta_;
     }
   }
-  // Fixup Shdr's
+  if (!need_fixup) {
+    // This was never passed through ElfFixup so all headers/symbols just have their offset as
+    // their addr. Therefore we do not need to update these parts.
+    return true;
+  }
+  t.NewTiming("Fixup Section Headers");
   for (unsigned int i = 0; i < oat_file_->GetSectionHeaderNum(); i++) {
     Elf32_Shdr& hdr = oat_file_->GetSectionHeader(i);
     if (hdr.sh_addr != 0) {
@@ -457,7 +503,7 @@
     }
   }
 
-  // Fixup Dynamics.
+  t.NewTiming("Fixup Dynamics");
   for (Elf32_Word i = 0; i < oat_file_->GetDynamicNum(); i++) {
     Elf32_Dyn& dyn = oat_file_->GetDynamic(i);
     if (IsDynamicSectionPointer(dyn.d_tag, oat_file_->GetHeader().e_machine)) {
@@ -481,12 +527,6 @@
     }
   }
 
-  t.NewTiming("Fixup Elf Text Section");
-  // Fixup text
-  if (!PatchTextSection()) {
-    return false;
-  }
-
   return true;
 }
 
@@ -511,7 +551,7 @@
 bool PatchOat::PatchTextSection() {
   Elf32_Shdr* patches_sec = oat_file_->FindSectionByName(".oat_patches");
   if (patches_sec == nullptr) {
-    LOG(INFO) << ".oat_patches section not found. Aborting patch";
+    LOG(ERROR) << ".oat_patches section not found. Aborting patch";
     return false;
   }
   DCHECK(CheckOatFile()) << "Oat file invalid";
@@ -614,7 +654,8 @@
   UsageError("");
   UsageError("  --patched-image-location=<file.art>: Use the same patch delta as was used to");
   UsageError("      patch the given image location. If used one must also specify the");
-  UsageError("      --instruction-set flag.");
+  UsageError("      --instruction-set flag. It will search for this image in the same way that");
+  UsageError("      is done when loading one.");
   UsageError("");
   UsageError("  --dump-timings: dump out patch timing information");
   UsageError("");
@@ -909,7 +950,25 @@
     if (!isa_set) {
       Usage("specifying a location requires specifying an instruction set");
     }
-    patched_image_filename = GetSystemImageFilename(patched_image_location.c_str(), isa);
+    std::string system_filename;
+    bool has_system = false;
+    std::string cache_filename;
+    bool has_cache = false;
+    bool has_android_data_unused = false;
+    if (!gc::space::ImageSpace::FindImageFilename(patched_image_location.c_str(), isa,
+                                                  &system_filename, &has_system, &cache_filename,
+                                                  &has_android_data_unused, &has_cache)) {
+      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
+    }
+    if (has_cache) {
+      patched_image_filename = cache_filename;
+    } else if (has_system) {
+      LOG(WARNING) << "Only image file found was in /system for image location "
+                   << patched_image_location;
+      patched_image_filename = system_filename;
+    } else {
+      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
+    }
     if (debug) {
       LOG(INFO) << "Using patched-image-file " << patched_image_filename;
     }
@@ -965,10 +1024,21 @@
     } else {
       CHECK(!input_oat_filename.empty());
       input_oat.reset(OS::OpenFileForReading(input_oat_filename.c_str()));
+      if (input_oat.get() == nullptr) {
+        LOG(ERROR) << "Could not open input oat file: " << strerror(errno);
+      }
     }
 
     if (output_oat_fd != -1) {
       output_oat.reset(new File(output_oat_fd, output_oat_filename));
+    } else if (output_oat_filename == input_oat_filename) {
+      // This could be a wierd situation, since we'd be writting from an mmap'd copy of this file.
+      // Lets just unlink it.
+      if (0 != unlink(input_oat_filename.c_str())) {
+        PLOG(ERROR) << "Could not unlink " << input_oat_filename << " to make room for output";
+        return false;
+      }
+      output_oat.reset(OS::CreateEmptyFile(output_oat_filename.c_str()));
     } else {
       CHECK(!output_oat_filename.empty());
       output_oat.reset(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
@@ -994,7 +1064,9 @@
   };
 
   if (debug) {
-    LOG(INFO) << "moving offset by " << base_delta << " (0x" << std::hex << base_delta << ") bytes";
+    LOG(INFO) << "moving offset by " << base_delta
+              << " (0x" << std::hex << base_delta << ") bytes or "
+              << std::dec << (base_delta/kPageSize) << " pages.";
   }
 
   bool ret;
@@ -1011,6 +1083,7 @@
     ret = PatchOat::Patch(input_image_location, base_delta, output_image.get(), isa, &timings);
   }
   cleanup(ret);
+  sync();
   return (ret) ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index a63e6f4..6960d3b 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -79,6 +79,7 @@
   // Patches oat in place, modifying the oat_file given to the constructor.
   bool PatchElf();
   bool PatchTextSection();
+  bool PatchOatHeader();
   bool PatchSymbols(Elf32_Shdr* section);
 
   bool PatchImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index d2fc229..6fa34c4 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -238,6 +238,7 @@
   arch/x86/context_x86.cc \
   arch/x86/entrypoints_init_x86.cc \
   arch/x86/jni_entrypoints_x86.S \
+  arch/x86/memcmp16_x86.S \
   arch/x86/portable_entrypoints_x86.S \
   arch/x86/quick_entrypoints_x86.S \
   arch/x86/thread_x86.cc \
@@ -292,6 +293,7 @@
   dex_file.h \
   dex_instruction.h \
   gc/collector/gc_type.h \
+  gc/collector_type.h \
   gc/space/space.h \
   gc/heap.h \
   indirect_reference_table.h \
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index e22c56e..48582f4 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -61,7 +61,7 @@
   // get the method from the top of the stack.  However it's in r0.
   uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes);
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kArm));
   if (overflow_addr == fault_addr) {
     *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
   } else {
@@ -192,7 +192,7 @@
   VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
 
-  uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes;
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kArm);
 
   Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
   CHECK_EQ(self, Thread::Current());
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
index 1144c8c..65d2f92 100644
--- a/runtime/arch/memcmp16.h
+++ b/runtime/arch/memcmp16.h
@@ -30,7 +30,7 @@
 //
 // In both cases, MemCmp16 is declared.
 
-#if defined(__aarch64__) || defined(__arm__) || defined(__mips)
+#if defined(__aarch64__) || defined(__arm__) || defined(__mips) || defined(__i386__)
 
 extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
 #define MemCmp16 __memcmp16
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index ada1523..8786222 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1076,14 +1076,15 @@
     .cfi_startproc
     addiu    $t9, $ra, 4    # put current address into $t9 to rebuild $gp
     GENERATE_GLOBAL_POINTER
-    move     $t0, $sp       # remember bottom of caller's frame
+    move     $ra, $zero     # link register is to here, so clobber with 0 for later checks
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    move     $t0, $sp       # remember bottom of caller's frame
     addiu    $sp, $sp, -48  # save return values and set up args
     .cfi_adjust_cfa_offset 48
     sw       $v0, 32($sp)
-    .cfi_rel_offset 2, 0
+    .cfi_rel_offset 2, 32
     sw       $v1, 36($sp)
-    .cfi_rel_offset 3, 4
+    .cfi_rel_offset 3, 36 
     s.s      $f0, 40($sp)
     s.s      $f1, 44($sp)
     s.s      $f0, 16($sp)   # pass fpr result
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index ae39be1..e468c2a 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -81,6 +81,8 @@
     #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
     #define CFI_RESTORE(reg) .cfi_restore reg
     #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+    #define CFI_RESTORE_STATE .cfi_restore_state
+    #define CFI_REMEMBER_STATE .cfi_remember_state
 #else
     // Mac OS' doesn't like cfi_* directives.
     #define CFI_STARTPROC
@@ -90,6 +92,8 @@
     #define CFI_DEF_CFA_REGISTER(reg)
     #define CFI_RESTORE(reg)
     #define CFI_REL_OFFSET(reg,size)
+    #define CFI_RESTORE_STATE
+    #define CFI_REMEMBER_STATE
 #endif
 
     // Symbols.
diff --git a/runtime/arch/x86/memcmp16_x86.S b/runtime/arch/x86/memcmp16_x86.S
new file mode 100644
index 0000000..17662fa
--- /dev/null
+++ b/runtime/arch/x86/memcmp16_x86.S
@@ -0,0 +1,1038 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_x86.S"
+
+#define MEMCMP  __memcmp16
+
+/* int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count); */
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#define CFI_PUSH(REG)	\
+	CFI_ADJUST_CFA_OFFSET(4);	\
+	CFI_REL_OFFSET(REG, 0)
+
+#define CFI_POP(REG)	\
+	CFI_ADJUST_CFA_OFFSET(-4);	\
+	CFI_RESTORE(REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#define PARMS		4
+#define BLK1		PARMS
+#define BLK2		BLK1+4
+#define LEN		BLK2+4
+#define RETURN_END	POP (%edi); POP (%esi); POP (%ebx); ret
+#define RETURN		RETURN_END; CFI_RESTORE_STATE; CFI_REMEMBER_STATE
+
+DEFINE_FUNCTION MEMCMP
+	movl	LEN(%esp), %ecx
+
+	shl	$1, %ecx
+	jz	L(zero)
+
+	movl	BLK1(%esp), %eax
+	cmp	$48, %ecx
+	movl	BLK2(%esp), %edx
+	jae	L(48bytesormore)
+
+	PUSH	(%ebx)
+	add	%ecx, %edx
+	add	%ecx, %eax
+	jmp	L(less48bytes)
+
+	CFI_POP	(%ebx)
+
+	.p2align 4
+L(zero):
+	xor	%eax, %eax
+	ret
+
+	.p2align 4
+L(48bytesormore):
+	PUSH	(%ebx)
+	PUSH	(%esi)
+	PUSH	(%edi)
+	CFI_REMEMBER_STATE
+	movdqu	(%eax), %xmm3
+	movdqu	(%edx), %xmm0
+	movl	%eax, %edi
+	movl	%edx, %esi
+	pcmpeqb	%xmm0, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%edi), %edi
+
+	sub	$0xffff, %edx
+	lea	16(%esi), %esi
+	jnz	L(less16bytes)
+	mov	%edi, %edx
+	and	$0xf, %edx
+	xor	%edx, %edi
+	sub	%edx, %esi
+	add	%edx, %ecx
+	mov	%esi, %edx
+	and	$0xf, %edx
+	jz	L(shr_0)
+	xor	%edx, %esi
+
+	cmp	$0, %edx
+	je	L(shr_0)
+	cmp	$2, %edx
+	je	L(shr_2)
+	cmp	$4, %edx
+	je	L(shr_4)
+	cmp	$6, %edx
+	je	L(shr_6)
+	cmp	$8, %edx
+	je	L(shr_8)
+	cmp	$10, %edx
+	je	L(shr_10)
+	cmp	$12, %edx
+	je	L(shr_12)
+	jmp	L(shr_14)
+
+	.p2align 4
+L(shr_0):
+	cmp	$80, %ecx
+	jae	L(shr_0_gobble)
+	lea	-48(%ecx), %ecx
+	xor	%eax, %eax
+	movaps	(%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+	movaps	16(%esi), %xmm2
+	pcmpeqb	16(%edi), %xmm2
+	pand	%xmm1, %xmm2
+	pmovmskb %xmm2, %edx
+	add	$32, %edi
+	add	$32, %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_0_gobble):
+	lea	-48(%ecx), %ecx
+	movdqa	(%esi), %xmm0
+	xor	%eax, %eax
+	pcmpeqb	(%edi), %xmm0
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm2
+	pcmpeqb	16(%edi), %xmm2
+L(shr_0_gobble_loop):
+	pand	%xmm0, %xmm2
+	sub	$32, %ecx
+	pmovmskb %xmm2, %edx
+	movdqa	%xmm0, %xmm1
+	movdqa	32(%esi), %xmm0
+	movdqa	48(%esi), %xmm2
+	sbb	$0xffff, %edx
+	pcmpeqb	32(%edi), %xmm0
+	pcmpeqb	48(%edi), %xmm2
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	jz	L(shr_0_gobble_loop)
+
+	pand	%xmm0, %xmm2
+	cmp	$0, %ecx
+	jge	L(shr_0_gobble_loop_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_0_gobble_loop_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm2, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_2):
+	cmp	$80, %ecx
+	lea	-48(%ecx), %ecx
+	mov	%edx, %eax
+	jae	L(shr_2_gobble)
+
+	movdqa	16(%esi), %xmm1
+	movdqa	%xmm1, %xmm2
+	palignr	$2,(%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+
+	movdqa	32(%esi), %xmm3
+	palignr	$2,%xmm2, %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+	pand	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	2(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_2_gobble):
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm0
+	palignr	$2,(%esi), %xmm0
+	pcmpeqb	(%edi), %xmm0
+
+	movdqa	32(%esi), %xmm3
+	palignr	$2,16(%esi), %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+L(shr_2_gobble_loop):
+	pand	%xmm0, %xmm3
+	sub	$32, %ecx
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+
+	movdqa	64(%esi), %xmm3
+	palignr	$2,48(%esi), %xmm3
+	sbb	$0xffff, %edx
+	movdqa	48(%esi), %xmm0
+	palignr	$2,32(%esi), %xmm0
+	pcmpeqb	32(%edi), %xmm0
+	lea	32(%esi), %esi
+	pcmpeqb	48(%edi), %xmm3
+
+	lea	32(%edi), %edi
+	jz	L(shr_2_gobble_loop)
+	pand	%xmm0, %xmm3
+
+	cmp	$0, %ecx
+	jge	L(shr_2_gobble_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_2_gobble_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	2(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_4):
+	cmp	$80, %ecx
+	lea	-48(%ecx), %ecx
+	mov	%edx, %eax
+	jae	L(shr_4_gobble)
+
+	movdqa	16(%esi), %xmm1
+	movdqa	%xmm1, %xmm2
+	palignr	$4,(%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+
+	movdqa	32(%esi), %xmm3
+	palignr	$4,%xmm2, %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+	pand	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	4(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_4_gobble):
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm0
+	palignr	$4,(%esi), %xmm0
+	pcmpeqb	(%edi), %xmm0
+
+	movdqa	32(%esi), %xmm3
+	palignr	$4,16(%esi), %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+L(shr_4_gobble_loop):
+	pand	%xmm0, %xmm3
+	sub	$32, %ecx
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+
+	movdqa	64(%esi), %xmm3
+	palignr	$4,48(%esi), %xmm3
+	sbb	$0xffff, %edx
+	movdqa	48(%esi), %xmm0
+	palignr	$4,32(%esi), %xmm0
+	pcmpeqb	32(%edi), %xmm0
+	lea	32(%esi), %esi
+	pcmpeqb	48(%edi), %xmm3
+
+	lea	32(%edi), %edi
+	jz	L(shr_4_gobble_loop)
+	pand	%xmm0, %xmm3
+
+	cmp	$0, %ecx
+	jge	L(shr_4_gobble_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_4_gobble_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	4(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_6):
+	cmp	$80, %ecx
+	lea	-48(%ecx), %ecx
+	mov	%edx, %eax
+	jae	L(shr_6_gobble)
+
+	movdqa	16(%esi), %xmm1
+	movdqa	%xmm1, %xmm2
+	palignr	$6,(%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+
+	movdqa	32(%esi), %xmm3
+	palignr	$6,%xmm2, %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+	pand	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	6(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_6_gobble):
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm0
+	palignr	$6,(%esi), %xmm0
+	pcmpeqb	(%edi), %xmm0
+
+	movdqa	32(%esi), %xmm3
+	palignr	$6,16(%esi), %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+L(shr_6_gobble_loop):
+	pand	%xmm0, %xmm3
+	sub	$32, %ecx
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+
+	movdqa	64(%esi), %xmm3
+	palignr	$6,48(%esi), %xmm3
+	sbb	$0xffff, %edx
+	movdqa	48(%esi), %xmm0
+	palignr	$6,32(%esi), %xmm0
+	pcmpeqb	32(%edi), %xmm0
+	lea	32(%esi), %esi
+	pcmpeqb	48(%edi), %xmm3
+
+	lea	32(%edi), %edi
+	jz	L(shr_6_gobble_loop)
+	pand	%xmm0, %xmm3
+
+	cmp	$0, %ecx
+	jge	L(shr_6_gobble_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_6_gobble_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	6(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_8):
+	cmp	$80, %ecx
+	lea	-48(%ecx), %ecx
+	mov	%edx, %eax
+	jae	L(shr_8_gobble)
+
+	movdqa	16(%esi), %xmm1
+	movdqa	%xmm1, %xmm2
+	palignr	$8,(%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+
+	movdqa	32(%esi), %xmm3
+	palignr	$8,%xmm2, %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+	pand	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	8(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_8_gobble):
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm0
+	palignr	$8,(%esi), %xmm0
+	pcmpeqb	(%edi), %xmm0
+
+	movdqa	32(%esi), %xmm3
+	palignr	$8,16(%esi), %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+L(shr_8_gobble_loop):
+	pand	%xmm0, %xmm3
+	sub	$32, %ecx
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+
+	movdqa	64(%esi), %xmm3
+	palignr	$8,48(%esi), %xmm3
+	sbb	$0xffff, %edx
+	movdqa	48(%esi), %xmm0
+	palignr	$8,32(%esi), %xmm0
+	pcmpeqb	32(%edi), %xmm0
+	lea	32(%esi), %esi
+	pcmpeqb	48(%edi), %xmm3
+
+	lea	32(%edi), %edi
+	jz	L(shr_8_gobble_loop)
+	pand	%xmm0, %xmm3
+
+	cmp	$0, %ecx
+	jge	L(shr_8_gobble_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_8_gobble_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	8(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_10):
+	cmp	$80, %ecx
+	lea	-48(%ecx), %ecx
+	mov	%edx, %eax
+	jae	L(shr_10_gobble)
+
+	movdqa	16(%esi), %xmm1
+	movdqa	%xmm1, %xmm2
+	palignr	$10, (%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+
+	movdqa	32(%esi), %xmm3
+	palignr	$10,%xmm2, %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+	pand	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	10(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_10_gobble):
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm0
+	palignr	$10, (%esi), %xmm0
+	pcmpeqb	(%edi), %xmm0
+
+	movdqa	32(%esi), %xmm3
+	palignr	$10, 16(%esi), %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+L(shr_10_gobble_loop):
+	pand	%xmm0, %xmm3
+	sub	$32, %ecx
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+
+	movdqa	64(%esi), %xmm3
+	palignr	$10,48(%esi), %xmm3
+	sbb	$0xffff, %edx
+	movdqa	48(%esi), %xmm0
+	palignr	$10,32(%esi), %xmm0
+	pcmpeqb	32(%edi), %xmm0
+	lea	32(%esi), %esi
+	pcmpeqb	48(%edi), %xmm3
+
+	lea	32(%edi), %edi
+	jz	L(shr_10_gobble_loop)
+	pand	%xmm0, %xmm3
+
+	cmp	$0, %ecx
+	jge	L(shr_10_gobble_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_10_gobble_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	10(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_12):
+	cmp	$80, %ecx
+	lea	-48(%ecx), %ecx
+	mov	%edx, %eax
+	jae	L(shr_12_gobble)
+
+	movdqa	16(%esi), %xmm1
+	movdqa	%xmm1, %xmm2
+	palignr	$12, (%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+
+	movdqa	32(%esi), %xmm3
+	palignr	$12, %xmm2, %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+	pand	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	12(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_12_gobble):
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm0
+	palignr	$12, (%esi), %xmm0
+	pcmpeqb	(%edi), %xmm0
+
+	movdqa	32(%esi), %xmm3
+	palignr	$12, 16(%esi), %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+L(shr_12_gobble_loop):
+	pand	%xmm0, %xmm3
+	sub	$32, %ecx
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+
+	movdqa	64(%esi), %xmm3
+	palignr	$12,48(%esi), %xmm3
+	sbb	$0xffff, %edx
+	movdqa	48(%esi), %xmm0
+	palignr	$12,32(%esi), %xmm0
+	pcmpeqb	32(%edi), %xmm0
+	lea	32(%esi), %esi
+	pcmpeqb	48(%edi), %xmm3
+
+	lea	32(%edi), %edi
+	jz	L(shr_12_gobble_loop)
+	pand	%xmm0, %xmm3
+
+	cmp	$0, %ecx
+	jge	L(shr_12_gobble_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_12_gobble_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	12(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_14):
+	cmp	$80, %ecx
+	lea	-48(%ecx), %ecx
+	mov	%edx, %eax
+	jae	L(shr_14_gobble)
+
+	movdqa	16(%esi), %xmm1
+	movdqa	%xmm1, %xmm2
+	palignr	$14, (%esi), %xmm1
+	pcmpeqb	(%edi), %xmm1
+
+	movdqa	32(%esi), %xmm3
+	palignr	$14, %xmm2, %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+	pand	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+	lea	(%ecx, %edi,1), %eax
+	lea	14(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(shr_14_gobble):
+	sub	$32, %ecx
+	movdqa	16(%esi), %xmm0
+	palignr	$14, (%esi), %xmm0
+	pcmpeqb	(%edi), %xmm0
+
+	movdqa	32(%esi), %xmm3
+	palignr	$14, 16(%esi), %xmm3
+	pcmpeqb	16(%edi), %xmm3
+
+L(shr_14_gobble_loop):
+	pand	%xmm0, %xmm3
+	sub	$32, %ecx
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+
+	movdqa	64(%esi), %xmm3
+	palignr	$14,48(%esi), %xmm3
+	sbb	$0xffff, %edx
+	movdqa	48(%esi), %xmm0
+	palignr	$14,32(%esi), %xmm0
+	pcmpeqb	32(%edi), %xmm0
+	lea	32(%esi), %esi
+	pcmpeqb	48(%edi), %xmm3
+
+	lea	32(%edi), %edi
+	jz	L(shr_14_gobble_loop)
+	pand	%xmm0, %xmm3
+
+	cmp	$0, %ecx
+	jge	L(shr_14_gobble_next)
+	inc	%edx
+	add	$32, %ecx
+L(shr_14_gobble_next):
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pmovmskb %xmm3, %edx
+	movdqa	%xmm0, %xmm1
+	lea	32(%edi), %edi
+	lea	32(%esi), %esi
+	sub	$0xffff, %edx
+	jnz	L(exit)
+
+	lea	(%ecx, %edi,1), %eax
+	lea	14(%ecx, %esi,1), %edx
+	POP	(%edi)
+	POP	(%esi)
+	jmp	L(less48bytes)
+
+	CFI_RESTORE_STATE
+	CFI_REMEMBER_STATE
+	.p2align 4
+L(exit):
+	pmovmskb %xmm1, %ebx
+	sub	$0xffff, %ebx
+	jz	L(first16bytes)
+	lea	-16(%esi), %esi
+	lea	-16(%edi), %edi
+	mov	%ebx, %edx
+
+L(first16bytes):
+	add	%eax, %esi
+L(less16bytes):
+	test	%dl, %dl
+	jz	L(next_four_words)
+	test	$15, %dl
+	jz	L(second_two_words)
+	test	$3, %dl
+	jz	L(second_word)
+	movzwl	-16(%edi), %eax
+	movzwl	-16(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(second_word):
+	movzwl	-14(%edi), %eax
+	movzwl	-14(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(second_two_words):
+	test	$63, %dl
+	jz	L(fourth_word)
+	movzwl	-12(%edi), %eax
+	movzwl	-12(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(fourth_word):
+	movzwl	-10(%edi), %eax
+	movzwl	-10(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(next_four_words):
+	test	$15, %dh
+	jz	L(fourth_two_words)
+	test	$3, %dh
+	jz	L(sixth_word)
+	movzwl	-8(%edi), %eax
+	movzwl	-8(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(sixth_word):
+	movzwl	-6(%edi), %eax
+	movzwl	-6(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(fourth_two_words):
+	test	$63, %dh
+	jz	L(eighth_word)
+	movzwl	-4(%edi), %eax
+	movzwl	-4(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(eighth_word):
+	movzwl	-2(%edi), %eax
+	movzwl	-2(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+
+	CFI_PUSH (%ebx)
+
+	.p2align 4
+L(more8bytes):
+	cmp	$16, %ecx
+	jae	L(more16bytes)
+	cmp	$8, %ecx
+	je	L(8bytes)
+	cmp	$10, %ecx
+	je	L(10bytes)
+	cmp	$12, %ecx
+	je	L(12bytes)
+	jmp	L(14bytes)
+
+	.p2align 4
+L(more16bytes):
+	cmp	$24, %ecx
+	jae	L(more24bytes)
+	cmp	$16, %ecx
+	je	L(16bytes)
+	cmp	$18, %ecx
+	je	L(18bytes)
+	cmp	$20, %ecx
+	je	L(20bytes)
+	jmp	L(22bytes)
+
+	.p2align 4
+L(more24bytes):
+	cmp	$32, %ecx
+	jae	L(more32bytes)
+	cmp	$24, %ecx
+	je	L(24bytes)
+	cmp	$26, %ecx
+	je	L(26bytes)
+	cmp	$28, %ecx
+	je	L(28bytes)
+	jmp	L(30bytes)
+
+	.p2align 4
+L(more32bytes):
+	cmp	$40, %ecx
+	jae	L(more40bytes)
+	cmp	$32, %ecx
+	je	L(32bytes)
+	cmp	$34, %ecx
+	je	L(34bytes)
+	cmp	$36, %ecx
+	je	L(36bytes)
+	jmp	L(38bytes)
+
+	.p2align 4
+L(less48bytes):
+	cmp	$8, %ecx
+	jae	L(more8bytes)
+	cmp	$2, %ecx
+	je	L(2bytes)
+	cmp	$4, %ecx
+	je	L(4bytes)
+	jmp	L(6bytes)
+
+	.p2align 4
+L(more40bytes):
+	cmp	$40, %ecx
+	je	L(40bytes)
+	cmp	$42, %ecx
+	je	L(42bytes)
+	cmp	$44, %ecx
+	je	L(44bytes)
+	jmp	L(46bytes)
+
+	.p2align 4
+L(46bytes):
+	movzwl	-46(%eax), %ecx
+	movzwl	-46(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(44bytes):
+	movzwl	-44(%eax), %ecx
+	movzwl	-44(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(42bytes):
+	movzwl	-42(%eax), %ecx
+	movzwl	-42(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(40bytes):
+	movzwl	-40(%eax), %ecx
+	movzwl	-40(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(38bytes):
+	movzwl	-38(%eax), %ecx
+	movzwl	-38(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(36bytes):
+	movzwl	-36(%eax), %ecx
+	movzwl	-36(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(34bytes):
+	movzwl	-34(%eax), %ecx
+	movzwl	-34(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(32bytes):
+	movzwl	-32(%eax), %ecx
+	movzwl	-32(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(30bytes):
+	movzwl	-30(%eax), %ecx
+	movzwl	-30(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(28bytes):
+	movzwl	-28(%eax), %ecx
+	movzwl	-28(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(26bytes):
+	movzwl	-26(%eax), %ecx
+	movzwl	-26(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(24bytes):
+	movzwl	-24(%eax), %ecx
+	movzwl	-24(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(22bytes):
+	movzwl	-22(%eax), %ecx
+	movzwl	-22(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(20bytes):
+	movzwl	-20(%eax), %ecx
+	movzwl	-20(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(18bytes):
+	movzwl	-18(%eax), %ecx
+	movzwl	-18(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(16bytes):
+	movzwl	-16(%eax), %ecx
+	movzwl	-16(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(14bytes):
+	movzwl	-14(%eax), %ecx
+	movzwl	-14(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(12bytes):
+	movzwl	-12(%eax), %ecx
+	movzwl	-12(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(10bytes):
+	movzwl	-10(%eax), %ecx
+	movzwl	-10(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(8bytes):
+	movzwl	-8(%eax), %ecx
+	movzwl	-8(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(6bytes):
+	movzwl	-6(%eax), %ecx
+	movzwl	-6(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(4bytes):
+	movzwl	-4(%eax), %ecx
+	movzwl	-4(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(2bytes):
+	movzwl	-2(%eax), %eax
+	movzwl	-2(%edx), %ebx
+	subl	%ebx, %eax
+	POP	(%ebx)
+	ret
+	CFI_PUSH (%ebx)
+
+	.p2align 4
+L(memcmp16_exit):
+	POP	(%ebx)
+	mov	%ecx, %eax
+	ret
+END_FUNCTION MEMCMP
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 68f46ad..1522129 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1277,14 +1277,12 @@
     mov  %esp, %ecx               // Remember SP
     subl LITERAL(8), %esp         // Save float return value.
     CFI_ADJUST_CFA_OFFSET(8)
-    movd %xmm0, (%esp)
+    movq %xmm0, (%esp)
     PUSH edx                      // Save gpr return value.
     PUSH eax
-    subl LITERAL(8), %esp         // Align stack
-    movd %xmm0, (%esp)
-    subl LITERAL(8), %esp         // Pass float return value.
-    CFI_ADJUST_CFA_OFFSET(8)
-    movd %xmm0, (%esp)
+    subl LITERAL(16), %esp        // Align stack
+    CFI_ADJUST_CFA_OFFSET(16)
+    movq %xmm0, (%esp)            // Pass float return value.
     PUSH edx                      // Pass gpr return value.
     PUSH eax
     PUSH ecx                      // Pass SP.
@@ -1299,7 +1297,7 @@
                                   // (ebx is pretending to be our LR).
     POP eax                       // Restore gpr return value.
     POP edx
-    movd (%esp), %xmm0            // Restore fpr return value.
+    movq (%esp), %xmm0            // Restore fpr return value.
     addl LITERAL(8), %esp
     CFI_ADJUST_CFA_OFFSET(-8)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 50b2de4..48bc240 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1495,7 +1495,7 @@
     PUSH rax                  // Save integer result.
     subq LITERAL(8), %rsp     // Save floating-point result.
     CFI_ADJUST_CFA_OFFSET(8)
-    movd %xmm0, (%rsp)
+    movq %xmm0, (%rsp)
 
     movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
     movq  %rax, %rdx                          // Pass integer result.
@@ -1506,7 +1506,7 @@
     movq  %rax, %rdi          // Store return PC
     movq  %rdx, %rsi          // Store second return PC in hidden arg.
 
-    movd (%rsp), %xmm0        // Restore floating-point result.
+    movq (%rsp), %xmm0        // Restore floating-point result.
     addq LITERAL(8), %rsp
     CFI_ADJUST_CFA_OFFSET(-8)
     POP rax                   // Restore integer result.
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 7779547..c0a865f 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -41,6 +41,7 @@
 ReaderWriterMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
+Mutex* Locks::thread_list_suspend_thread_lock_ = nullptr;
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
@@ -149,7 +150,8 @@
     for (int i = kLockLevelCount - 1; i >= 0; --i) {
       if (i != level_) {
         BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-        if (held_mutex != NULL) {
+        // We expect waits to happen while holding the thread list suspend thread lock.
+        if (held_mutex != NULL && i != kThreadListSuspendThreadLock) {
           LOG(ERROR) << "Holding \"" << held_mutex->name_ << "\" "
                      << "(level " << LockLevel(i) << ") while performing wait on "
                      << "\"" << name_ << "\" (level " << level_ << ")";
@@ -161,16 +163,10 @@
   }
 }
 
-inline void BaseMutex::ContentionLogData::AddToWaitTime(uint64_t value) {
+void BaseMutex::ContentionLogData::AddToWaitTime(uint64_t value) {
   if (kLogLockContentions) {
     // Atomically add value to wait_time.
-    uint64_t new_val, old_val;
-    volatile int64_t* addr = reinterpret_cast<volatile int64_t*>(&wait_time);
-    volatile const int64_t* caddr = const_cast<volatile const int64_t*>(addr);
-    do {
-      old_val = static_cast<uint64_t>(QuasiAtomic::Read64(caddr));
-      new_val = old_val + value;
-    } while (!QuasiAtomic::Cas64(static_cast<int64_t>(old_val), static_cast<int64_t>(new_val), addr));
+    wait_time.FetchAndAddSequentiallyConsistent(value);
   }
 }
 
@@ -204,7 +200,7 @@
   if (kLogLockContentions) {
     const ContentionLogData* data = contention_log_data_;
     const ContentionLogEntry* log = data->contention_log;
-    uint64_t wait_time = data->wait_time;
+    uint64_t wait_time = data->wait_time.LoadRelaxed();
     uint32_t contention_count = data->contention_count.LoadRelaxed();
     if (contention_count == 0) {
       os << "never contended";
@@ -841,6 +837,7 @@
     DCHECK(logging_lock_ != nullptr);
     DCHECK(mutator_lock_ != nullptr);
     DCHECK(thread_list_lock_ != nullptr);
+    DCHECK(thread_list_suspend_thread_lock_ != nullptr);
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
     DCHECK(profiler_lock_ != nullptr);
@@ -848,13 +845,18 @@
     DCHECK(intern_table_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
-    LockLevel current_lock_level = kMutatorLock;
-    DCHECK(mutator_lock_ == nullptr);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
+    LockLevel current_lock_level = kThreadListSuspendThreadLock;
+    DCHECK(thread_list_suspend_thread_lock_ == nullptr);
+    thread_list_suspend_thread_lock_ =
+        new Mutex("thread list suspend thread by .. lock", current_lock_level);
 
     #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
-        DCHECK_LT(new_level, current_lock_level); \
-        current_lock_level = new_level;
+      DCHECK_LT(new_level, current_lock_level); \
+      current_lock_level = new_level;
+
+    UPDATE_CURRENT_LOCK_LEVEL(kMutatorLock);
+    DCHECK(mutator_lock_ == nullptr);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
 
     UPDATE_CURRENT_LOCK_LEVEL(kHeapBitmapLock);
     DCHECK(heap_bitmap_lock_ == nullptr);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 8d2cd07..818f9d9 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -93,6 +93,7 @@
   kRuntimeShutdownLock,
   kHeapBitmapLock,
   kMutatorLock,
+  kThreadListSuspendThreadLock,
   kZygoteCreationLock,
 
   kLockLevelCount  // Must come last.
@@ -160,7 +161,7 @@
     // Number of times the Mutex has been contended.
     AtomicInteger contention_count;
     // Sum of time waited by all contenders in ns.
-    volatile uint64_t wait_time;
+    Atomic<uint64_t> wait_time;
     void AddToWaitTime(uint64_t value);
     ContentionLogData() : wait_time(0) {}
   };
@@ -474,6 +475,15 @@
  public:
   static void Init();
 
+  // There's a potential race for two threads to try to suspend each other and for both of them
+  // to succeed and get blocked becoming runnable. This lock ensures that only one thread is
+  // requesting suspension of another at any time. As the the thread list suspend thread logic
+  // transitions to runnable, if the current thread were tried to be suspended then this thread
+  // would block holding this lock until it could safely request thread suspension of the other
+  // thread without that thread having a suspension request against this thread. This avoids a
+  // potential deadlock cycle.
+  static Mutex* thread_list_suspend_thread_lock_;
+
   // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
   // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
   // a share on the mutator_lock_. The garbage collector may also execute with shared access but
@@ -532,7 +542,7 @@
   // else                                          |  .. running ..
   //   Goto x                                      |  .. running ..
   //  .. running ..                                |  .. running ..
-  static ReaderWriterMutex* mutator_lock_;
+  static ReaderWriterMutex* mutator_lock_ ACQUIRED_AFTER(thread_list_suspend_thread_lock_);
 
   // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
   static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 351de3d..bf091d0 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -58,6 +58,22 @@
   }
 }
 
+bool ScopedFlock::Init(File* file, std::string* error_msg) {
+  file_.reset(new File(dup(file->Fd())));
+  if (file_->Fd() == -1) {
+    file_.reset();
+    *error_msg = StringPrintf("Failed to duplicate open file '%s': %s",
+                              file->GetPath().c_str(), strerror(errno));
+    return false;
+  }
+  if (0 != TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX))) {
+    file_.reset();
+    *error_msg = StringPrintf("Failed to lock file '%s': %s", file->GetPath().c_str(), strerror(errno));
+    return false;
+  }
+  return true;
+}
+
 File* ScopedFlock::GetFile() {
   CHECK(file_.get() != NULL);
   return file_.get();
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index f8ed805..08612e3 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -37,6 +37,10 @@
   // changed (usually due to a new file being created at the same path)
   // between attempts to lock it.
   bool Init(const char* filename, std::string* error_msg);
+  // Attempt to acquire an exclusive file lock (see flock(2)) on 'file'.
+  // Returns true if the lock could be acquired or false if an error
+  // occured.
+  bool Init(File* file, std::string* error_msg);
 
   // Returns the (locked) file associated with this instance.
   File* GetFile();
@@ -45,6 +49,7 @@
   bool HasFile();
 
   ~ScopedFlock();
+
  private:
   std::unique_ptr<File> file_;
   DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 2c11f8b..7f89156 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -814,6 +814,7 @@
         return false;
       }
 
+      // TODO Caller specifically asks for this oat_location. We should honor it. Probably?
       open_oat_file.reset(FindOatFileInOatLocationForDexFile(dex_location, dex_location_checksum,
                                                              oat_location, &error_msg));
 
@@ -833,8 +834,6 @@
       // There's no point in going forward and eventually try to regenerate the
       // file if we couldn't remove the obsolete one. Mostly likely we will fail
       // with the same error when trying to write the new file.
-      // In case the clean up failure is due to permission issues it's *mandatory*
-      // to stop to avoid regenerating under the wrong user.
       // TODO: should we maybe do this only when we get permission issues? (i.e. EACCESS).
       if (obsolete_file_cleanup_failed) {
         return false;
@@ -940,6 +939,13 @@
                               actual_image_oat_offset);
     return nullptr;
   }
+  int32_t expected_patch_delta = image_header.GetPatchDelta();
+  int32_t actual_patch_delta = oat_file->GetOatHeader().GetImagePatchDelta();
+  if (expected_patch_delta != actual_patch_delta) {
+    *error_msg = StringPrintf("Failed to find oat file at '%s' with expected patch delta %d, "
+                              " found %d", oat_location, expected_patch_delta, actual_patch_delta);
+    return nullptr;
+  }
   const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
                                                                     &dex_location_checksum);
   if (oat_dex_file == nullptr) {
@@ -1000,19 +1006,23 @@
   // image header from the image for the right instruction set.
   uint32_t image_oat_checksum = 0;
   uintptr_t image_oat_data_begin = 0;
-  if (instruction_set == kRuntimeISA) {
+  int32_t image_patch_delta = 0;
+  if (instruction_set == Runtime::Current()->GetInstructionSet()) {
     const ImageHeader& image_header = image_space->GetImageHeader();
     image_oat_checksum = image_header.GetOatChecksum();
     image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
+    image_patch_delta = image_header.GetPatchDelta();
   } else {
     std::unique_ptr<ImageHeader> image_header(gc::space::ImageSpace::ReadImageHeaderOrDie(
         image_space->GetImageLocation().c_str(), instruction_set));
     image_oat_checksum = image_header->GetOatChecksum();
     image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin());
+    image_patch_delta = image_header->GetPatchDelta();
   }
   const OatHeader& oat_header = oat_file->GetOatHeader();
   bool image_check = ((oat_header.GetImageFileLocationOatChecksum() == image_oat_checksum)
-                      && (oat_header.GetImageFileLocationOatDataBegin() == image_oat_data_begin));
+                      && (oat_header.GetImageFileLocationOatDataBegin() == image_oat_data_begin)
+                      && (oat_header.GetImagePatchDelta() == image_patch_delta));
 
   const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
                                                                     &dex_location_checksum);
@@ -1051,16 +1061,11 @@
   return false;
 }
 
-const OatFile* ClassLinker::LoadOatFileAndVerifyDexFile(const std::string& oat_file_location,
-                                                        const char* dex_location,
-                                                        std::string* error_msg,
-                                                        bool* open_failed) {
-  std::unique_ptr<const OatFile> oat_file(FindOatFileFromOatLocation(oat_file_location, error_msg));
-  if (oat_file.get() == nullptr) {
-    *open_failed = true;
-    return nullptr;
-  }
-  *open_failed = false;
+bool ClassLinker::VerifyOatWithDexFile(const OatFile* oat_file,
+                                       const char* dex_location,
+                                       std::string* error_msg) {
+  CHECK(oat_file != nullptr);
+  CHECK(dex_location != nullptr);
   std::unique_ptr<const DexFile> dex_file;
   uint32_t dex_location_checksum;
   if (!DexFile::GetChecksum(dex_location, &dex_location_checksum, error_msg)) {
@@ -1070,26 +1075,21 @@
     const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location, NULL);
     if (oat_dex_file == nullptr) {
       *error_msg = StringPrintf("Dex checksum mismatch for location '%s' and failed to find oat "
-                                "dex file '%s': %s", oat_file_location.c_str(), dex_location,
+                                "dex file '%s': %s", oat_file->GetLocation().c_str(), dex_location,
                                 error_msg->c_str());
-      return nullptr;
+      return false;
     }
     dex_file.reset(oat_dex_file->OpenDexFile(error_msg));
   } else {
-    bool verified = VerifyOatFileChecksums(oat_file.get(), dex_location, dex_location_checksum,
+    bool verified = VerifyOatFileChecksums(oat_file, dex_location, dex_location_checksum,
                                            kRuntimeISA, error_msg);
     if (!verified) {
-      return nullptr;
+      return false;
     }
     dex_file.reset(oat_file->GetOatDexFile(dex_location,
                                            &dex_location_checksum)->OpenDexFile(error_msg));
   }
-
-  if (dex_file.get() != nullptr) {
-    return oat_file.release();
-  } else {
-    return nullptr;
-  }
+  return dex_file.get() != nullptr;
 }
 
 const OatFile* ClassLinker::FindOatFileContainingDexFileFromDexLocation(
@@ -1099,51 +1099,25 @@
     std::vector<std::string>* error_msgs,
     bool* obsolete_file_cleanup_failed) {
   *obsolete_file_cleanup_failed = false;
-  // Look for an existing file next to dex. for example, for
-  // /foo/bar/baz.jar, look for /foo/bar/<isa>/baz.odex.
-  std::string odex_filename(DexFilenameToOdexFilename(dex_location, isa));
-  bool open_failed;
+  bool already_opened = false;
+  std::string dex_location_str(dex_location);
+  std::unique_ptr<const OatFile> oat_file(OpenOatFileFromDexLocation(dex_location_str, isa,
+                                                                     &already_opened,
+                                                                     obsolete_file_cleanup_failed,
+                                                                     error_msgs));
   std::string error_msg;
-  const OatFile* oat_file = LoadOatFileAndVerifyDexFile(odex_filename, dex_location, &error_msg,
-                                                        &open_failed);
-  if (oat_file != nullptr) {
-    return oat_file;
-  }
-  if (dex_location_checksum == nullptr) {
-    error_msgs->push_back(StringPrintf("Failed to open oat file from %s and no classes.dex found in"
-                                      "%s: %s", odex_filename.c_str(), dex_location,
+  if (oat_file.get() == nullptr) {
+    error_msgs->push_back(StringPrintf("Failed to open oat file from dex location '%s'",
+                                       dex_location));
+    return nullptr;
+  } else if (!VerifyOatWithDexFile(oat_file.get(), dex_location, &error_msg)) {
+    error_msgs->push_back(StringPrintf("Failed to verify oat file '%s' found for dex location "
+                                       "'%s': %s", oat_file->GetLocation().c_str(), dex_location,
                                        error_msg.c_str()));
     return nullptr;
+  } else {
+    return oat_file.release();
   }
-
-  std::string cache_error_msg;
-  const std::string dalvik_cache(GetDalvikCacheOrDie(GetInstructionSetString(kRuntimeISA)));
-  std::string cache_location(GetDalvikCacheFilenameOrDie(dex_location,
-                                                         dalvik_cache.c_str()));
-  oat_file = LoadOatFileAndVerifyDexFile(cache_location, dex_location, &cache_error_msg,
-                                         &open_failed);
-  if (oat_file != nullptr) {
-    return oat_file;
-  }
-
-  if (!open_failed && TEMP_FAILURE_RETRY(unlink(cache_location.c_str())) != 0) {
-    std::string error_msg = StringPrintf("Failed to remove obsolete file from %s when searching"
-                                         "for dex file %s: %s",
-                                         cache_location.c_str(), dex_location, strerror(errno));
-    error_msgs->push_back(error_msg);
-    VLOG(class_linker) << error_msg;
-    // Let the caller know that we couldn't remove the obsolete file.
-    // This is a good indication that further writes may fail as well.
-    *obsolete_file_cleanup_failed = true;
-  }
-
-  std::string compound_msg = StringPrintf("Failed to open oat file from %s (error '%s') or %s "
-                                          "(error '%s').", odex_filename.c_str(), error_msg.c_str(),
-                                          cache_location.c_str(), cache_error_msg.c_str());
-  VLOG(class_linker) << compound_msg;
-  error_msgs->push_back(compound_msg);
-
-  return nullptr;
 }
 
 const OatFile* ClassLinker::FindOpenedOatFileFromOatLocation(const std::string& oat_location) {
@@ -1158,6 +1132,277 @@
   return nullptr;
 }
 
+const OatFile* ClassLinker::OpenOatFileFromDexLocation(const std::string& dex_location,
+                                                       InstructionSet isa,
+                                                       bool *already_opened,
+                                                       bool *obsolete_file_cleanup_failed,
+                                                       std::vector<std::string>* error_msgs) {
+  // Find out if we've already opened the file
+  const OatFile* ret = nullptr;
+  std::string odex_filename(DexFilenameToOdexFilename(dex_location, isa));
+  ret = FindOpenedOatFileFromOatLocation(odex_filename);
+  if (ret != nullptr) {
+    *already_opened = true;
+    return ret;
+  }
+
+  std::string dalvik_cache;
+  bool have_android_data = false;
+  bool have_dalvik_cache = false;
+  GetDalvikCache(GetInstructionSetString(kRuntimeISA), false, &dalvik_cache,
+                 &have_android_data, &have_dalvik_cache);
+  std::string cache_filename;
+  if (have_dalvik_cache) {
+    cache_filename = GetDalvikCacheFilenameOrDie(dex_location.c_str(), dalvik_cache.c_str());
+    ret = FindOpenedOatFileFromOatLocation(cache_filename);
+    if (ret != nullptr) {
+      *already_opened = true;
+      return ret;
+    }
+  } else {
+    // If we need to relocate we should just place odex back where it started.
+    cache_filename = odex_filename;
+  }
+
+  ret = nullptr;
+
+  // We know that neither the odex nor the cache'd version is already in use, if it even exists.
+  //
+  // Now we do the following:
+  // 1) Try and open the odex version
+  // 2) If present, checksum-verified & relocated correctly return it
+  // 3) Close the odex version to free up its address space.
+  // 4) Try and open the cache version
+  // 5) If present, checksum-verified & relocated correctly return it
+  // 6) Close the cache version to free up its address space.
+  // 7) If we should relocate:
+  //   a) If we have opened and checksum-verified the odex version relocate it to
+  //      'cache_filename' and return it
+  //   b) If we have opened and checksum-verified the cache version relocate it in place and return
+  //      it. This should not happen often (I think only the run-test's will hit this case).
+  // 8) If the cache-version was present we should delete it since it must be obsolete if we get to
+  //    this point.
+  // 9) Return nullptr
+
+  *already_opened = false;
+  const Runtime* runtime = Runtime::Current();
+  CHECK(runtime != nullptr);
+  bool executable = !runtime->IsCompiler();
+
+  std::string odex_error_msg;
+  bool should_patch_system = false;
+  bool odex_checksum_verified = false;
+  {
+    // There is a high probability that these both these oat files map similar/the same address
+    // spaces so we must scope them like this so they each gets its turn.
+    std::unique_ptr<OatFile> odex_oat_file(OatFile::Open(odex_filename, odex_filename, NULL,
+                                                         executable, &odex_error_msg));
+    if (odex_oat_file.get() != nullptr && CheckOatFile(odex_oat_file.get(), isa,
+                                                       &odex_checksum_verified,
+                                                       &odex_error_msg)) {
+      error_msgs->push_back(odex_error_msg);
+      return odex_oat_file.release();
+    } else if (odex_checksum_verified) {
+      // We can just relocate
+      should_patch_system = true;
+      odex_error_msg = "Image Patches are incorrect";
+    }
+  }
+
+
+  std::string cache_error_msg;
+  bool should_patch_cache = false;
+  bool cache_checksum_verified = false;
+  if (have_dalvik_cache) {
+    std::unique_ptr<OatFile> cache_oat_file(OatFile::Open(cache_filename, cache_filename, NULL,
+                                                          executable, &cache_error_msg));
+    if (cache_oat_file.get() != nullptr && CheckOatFile(cache_oat_file.get(), isa,
+                                                        &cache_checksum_verified,
+                                                        &cache_error_msg)) {
+      error_msgs->push_back(cache_error_msg);
+      return cache_oat_file.release();
+    } else if (cache_checksum_verified) {
+      // We can just relocate
+      should_patch_cache = true;
+      cache_error_msg = "Image Patches are incorrect";
+    }
+  } else if (have_android_data) {
+    // dalvik_cache does not exist but android data does. This means we should be able to create
+    // it, so we should try.
+    GetDalvikCacheOrDie(GetInstructionSetString(kRuntimeISA), true);
+  }
+
+  ret = nullptr;
+  std::string error_msg;
+  if (runtime->CanRelocate()) {
+    // Run relocation
+    const std::string& image_location =
+        Runtime::Current()->GetHeap()->GetImageSpace()->GetImageLocation();
+    if (odex_checksum_verified && should_patch_system) {
+      ret = PatchAndRetrieveOat(odex_filename, cache_filename, image_location, isa, &error_msg);
+    } else if (cache_checksum_verified && should_patch_cache) {
+      CHECK(have_dalvik_cache);
+      ret = PatchAndRetrieveOat(cache_filename, cache_filename, image_location, isa, &error_msg);
+    }
+  }
+  if (ret == nullptr && have_dalvik_cache && OS::FileExists(cache_filename.c_str())) {
+    // implicitly: were able to fine where the cached version is but we were unable to use it,
+    // either as a destination for relocation or to open a file. We should delete it if it is
+    // there.
+    if (TEMP_FAILURE_RETRY(unlink(cache_filename.c_str())) != 0) {
+      std::string rm_error_msg = StringPrintf("Failed to remove obsolete file from %s when "
+                                              "searching for dex file %s: %s",
+                                              cache_filename.c_str(), dex_location.c_str(),
+                                              strerror(errno));
+      error_msgs->push_back(rm_error_msg);
+      VLOG(class_linker) << rm_error_msg;
+      // Let the caller know that we couldn't remove the obsolete file.
+      // This is a good indication that further writes may fail as well.
+      *obsolete_file_cleanup_failed = true;
+    }
+  }
+  if (ret == nullptr) {
+    VLOG(class_linker) << error_msg;
+    error_msgs->push_back(error_msg);
+    std::string relocation_msg;
+    if (runtime->CanRelocate()) {
+      relocation_msg = StringPrintf(" and relocation failed");
+    }
+    if (have_dalvik_cache && cache_checksum_verified) {
+      error_msg = StringPrintf("Failed to open oat file from %s (error %s) or %s "
+                                "(error %s)%s.", odex_filename.c_str(), odex_error_msg.c_str(),
+                                cache_filename.c_str(), cache_error_msg.c_str(),
+                                relocation_msg.c_str());
+    } else {
+      error_msg = StringPrintf("Failed to open oat file from %s (error %s) (no "
+                               "dalvik_cache availible)%s.", odex_filename.c_str(),
+                               odex_error_msg.c_str(), relocation_msg.c_str());
+    }
+    VLOG(class_linker) << error_msg;
+    error_msgs->push_back(error_msg);
+  }
+  return ret;
+}
+
+const OatFile* ClassLinker::PatchAndRetrieveOat(const std::string& input_oat,
+                                                const std::string& output_oat,
+                                                const std::string& image_location,
+                                                InstructionSet isa,
+                                                std::string* error_msg) {
+  Locks::mutator_lock_->AssertNotHeld(Thread::Current());  // Avoid starving GC.
+  std::string patchoat(Runtime::Current()->GetPatchoatExecutable());
+
+  std::string isa_arg("--instruction-set=");
+  isa_arg += GetInstructionSetString(isa);
+  std::string input_oat_filename_arg("--input-oat-file=");
+  input_oat_filename_arg += input_oat;
+  std::string output_oat_filename_arg("--output-oat-file=");
+  output_oat_filename_arg += output_oat;
+  std::string patched_image_arg("--patched-image-location=");
+  patched_image_arg += image_location;
+
+  std::vector<std::string> argv;
+  argv.push_back(patchoat);
+  argv.push_back(isa_arg);
+  argv.push_back(input_oat_filename_arg);
+  argv.push_back(output_oat_filename_arg);
+  argv.push_back(patched_image_arg);
+
+  std::string command_line(Join(argv, ' '));
+  LOG(INFO) << "Relocate Oat File: " << command_line;
+  bool success = Exec(argv, error_msg);
+  if (success) {
+    std::unique_ptr<OatFile> output(OatFile::Open(output_oat, output_oat, NULL,
+                                                  !Runtime::Current()->IsCompiler(), error_msg));
+    bool checksum_verified = false;
+    if (output.get() != nullptr && CheckOatFile(output.get(), isa, &checksum_verified, error_msg)) {
+      return output.release();
+    } else if (output.get() != nullptr) {
+      *error_msg = StringPrintf("Patching of oat file '%s' succeeded "
+                                "but output file '%s' failed verifcation: %s",
+                                input_oat.c_str(), output_oat.c_str(), error_msg->c_str());
+    } else {
+      *error_msg = StringPrintf("Patching of oat file '%s' succeeded "
+                                "but was unable to open output file '%s': %s",
+                                input_oat.c_str(), output_oat.c_str(), error_msg->c_str());
+    }
+  } else {
+    *error_msg = StringPrintf("Patching of oat file '%s to '%s' "
+                              "failed: %s", input_oat.c_str(), output_oat.c_str(),
+                              error_msg->c_str());
+  }
+  return nullptr;
+}
+
+int32_t ClassLinker::GetRequiredDelta(const OatFile* oat_file, InstructionSet isa) {
+  Runtime* runtime = Runtime::Current();
+  int32_t real_patch_delta;
+  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+  if (isa == Runtime::Current()->GetInstructionSet()) {
+    const ImageHeader& image_header = image_space->GetImageHeader();
+    real_patch_delta = image_header.GetPatchDelta();
+  } else {
+    std::unique_ptr<ImageHeader> image_header(gc::space::ImageSpace::ReadImageHeaderOrDie(
+        image_space->GetImageLocation().c_str(), isa));
+    real_patch_delta = image_header->GetPatchDelta();
+  }
+  const OatHeader& oat_header = oat_file->GetOatHeader();
+  return real_patch_delta - oat_header.GetImagePatchDelta();
+}
+
+bool ClassLinker::CheckOatFile(const OatFile* oat_file, InstructionSet isa,
+                               bool* checksum_verified,
+                               std::string* error_msg) {
+  std::string compound_msg("Oat file failed to verify: ");
+  Runtime* runtime = Runtime::Current();
+  uint32_t real_image_checksum;
+  void* real_image_oat_offset;
+  int32_t real_patch_delta;
+  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+  if (isa == Runtime::Current()->GetInstructionSet()) {
+    const ImageHeader& image_header = image_space->GetImageHeader();
+    real_image_checksum = image_header.GetOatChecksum();
+    real_image_oat_offset = image_header.GetOatDataBegin();
+    real_patch_delta = image_header.GetPatchDelta();
+  } else {
+    std::unique_ptr<ImageHeader> image_header(gc::space::ImageSpace::ReadImageHeaderOrDie(
+        image_space->GetImageLocation().c_str(), isa));
+    real_image_checksum = image_header->GetOatChecksum();
+    real_image_oat_offset = image_header->GetOatDataBegin();
+    real_patch_delta = image_header->GetPatchDelta();
+  }
+
+  const OatHeader& oat_header = oat_file->GetOatHeader();
+
+  uint32_t oat_image_checksum = oat_header.GetImageFileLocationOatChecksum();
+  *checksum_verified = oat_image_checksum == real_image_checksum;
+  if (!*checksum_verified) {
+    compound_msg += StringPrintf(" Oat Image Checksum Incorrect (expected 0x%x, recieved 0x%x)",
+                                 real_image_checksum, oat_image_checksum);
+  }
+
+  void* oat_image_oat_offset =
+      reinterpret_cast<void*>(oat_header.GetImageFileLocationOatDataBegin());
+  bool offset_verified = oat_image_oat_offset == real_image_oat_offset;
+  if (!offset_verified) {
+    compound_msg += StringPrintf(" Oat Image oat offset incorrect (expected 0x%p, recieved 0x%p)",
+                                 real_image_oat_offset, oat_image_oat_offset);
+  }
+
+  int32_t oat_patch_delta = oat_header.GetImagePatchDelta();
+  bool patch_delta_verified = oat_patch_delta == real_patch_delta;
+  if (!patch_delta_verified) {
+    compound_msg += StringPrintf(" Oat image patch delta incorrect (expected 0x%x, recieved 0x%x)",
+                                 real_patch_delta, oat_patch_delta);
+  }
+
+  bool ret = (*checksum_verified && offset_verified && patch_delta_verified);
+  if (ret) {
+    *error_msg = compound_msg;
+  }
+  return ret;
+}
+
 const OatFile* ClassLinker::FindOatFileFromOatLocation(const std::string& oat_location,
                                                        std::string* error_msg) {
   const OatFile* oat_file = FindOpenedOatFileFromOatLocation(oat_location);
@@ -1660,23 +1905,26 @@
     // size when the class becomes resolved.
     klass.Assign(AllocClass(self, SizeOfClassWithoutEmbeddedTables(dex_file, dex_class_def)));
   }
-  if (UNLIKELY(klass.Get() == NULL)) {
+  if (UNLIKELY(klass.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());  // Expect an OOME.
-    return NULL;
+    return nullptr;
   }
   klass->SetDexCache(FindDexCache(dex_file));
   LoadClass(dex_file, dex_class_def, klass, class_loader.Get());
-  // Check for a pending exception during load
-  if (self->IsExceptionPending()) {
-    klass->SetStatus(mirror::Class::kStatusError, self);
-    return NULL;
-  }
   ObjectLock<mirror::Class> lock(self, klass);
+  if (self->IsExceptionPending()) {
+    // An exception occured during load, set status to erroneous while holding klass' lock in case
+    // notification is necessary.
+    if (!klass->IsErroneous()) {
+      klass->SetStatus(mirror::Class::kStatusError, self);
+    }
+    return nullptr;
+  }
   klass->SetClinitThreadId(self->GetTid());
 
   // Add the newly loaded class to the loaded classes table.
   mirror::Class* existing = InsertClass(descriptor, klass.Get(), Hash(descriptor));
-  if (existing != NULL) {
+  if (existing != nullptr) {
     // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
     // this thread to block.
     return EnsureResolved(self, descriptor, existing);
@@ -1686,8 +1934,10 @@
   CHECK(!klass->IsLoaded());
   if (!LoadSuperAndInterfaces(klass, dex_file)) {
     // Loading failed.
-    klass->SetStatus(mirror::Class::kStatusError, self);
-    return NULL;
+    if (!klass->IsErroneous()) {
+      klass->SetStatus(mirror::Class::kStatusError, self);
+    }
+    return nullptr;
   }
   CHECK(klass->IsLoaded());
   // Link the class (if necessary)
@@ -1698,8 +1948,10 @@
   mirror::Class* new_class = nullptr;
   if (!LinkClass(self, descriptor, klass, interfaces, &new_class)) {
     // Linking failed.
-    klass->SetStatus(mirror::Class::kStatusError, self);
-    return NULL;
+    if (!klass->IsErroneous()) {
+      klass->SetStatus(mirror::Class::kStatusError, self);
+    }
+    return nullptr;
   }
   CHECK(new_class != nullptr) << descriptor;
   CHECK(new_class->IsResolved()) << descriptor;
@@ -2603,8 +2855,9 @@
 
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end(); it != end && it->first == hash;
        ++it) {
-    mirror::Class* entry = it->second;
-    if (entry == existing) {
+    mirror::Class** root = &it->second;
+    mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
+    if (klass == existing) {
       class_table_.erase(it);
       break;
     }
@@ -3570,9 +3823,9 @@
   MethodHelper super_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
   if (klass->HasSuperClass() &&
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
-    for (int i = klass->GetSuperClass()->GetVTable()->GetLength() - 1; i >= 0; --i) {
-      mh.ChangeMethod(klass->GetVTable()->GetWithoutChecks(i));
-      super_mh.ChangeMethod(klass->GetSuperClass()->GetVTable()->GetWithoutChecks(i));
+    for (int i = klass->GetSuperClass()->GetVTableLength() - 1; i >= 0; --i) {
+      mh.ChangeMethod(klass->GetVTableEntry(i));
+      super_mh.ChangeMethod(klass->GetSuperClass()->GetVTableEntry(i));
       if (mh.GetMethod() != super_mh.GetMethod() &&
           !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
         ThrowLinkageError(klass.Get(),
@@ -3730,10 +3983,6 @@
     // This will notify waiters on new_class that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     new_class_h->SetStatus(mirror::Class::kStatusResolved, self);
-
-    // Only embedded imt should be used from this point.
-    new_class_h->SetImTable(NULL);
-    // TODO: remove vtable and only use embedded vtable.
   }
   return true;
 }
@@ -3866,17 +4115,31 @@
 bool ClassLinker::LinkVirtualMethods(Thread* self, Handle<mirror::Class> klass) {
   if (klass->HasSuperClass()) {
     uint32_t max_count = klass->NumVirtualMethods() +
-        klass->GetSuperClass()->GetVTable()->GetLength();
-    size_t actual_count = klass->GetSuperClass()->GetVTable()->GetLength();
+        klass->GetSuperClass()->GetVTableLength();
+    size_t actual_count = klass->GetSuperClass()->GetVTableLength();
     CHECK_LE(actual_count, max_count);
-    // TODO: do not assign to the vtable field until it is fully constructed.
     StackHandleScope<3> hs(self);
-    Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable(
-        hs.NewHandle(klass->GetSuperClass()->GetVTable()->CopyOf(self, max_count)));
-    if (UNLIKELY(vtable.Get() == NULL)) {
-      CHECK(self->IsExceptionPending());  // OOME.
-      return false;
+    Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable;
+    mirror::Class* super_class = klass->GetSuperClass();
+    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
+      vtable = hs.NewHandle(AllocArtMethodArray(self, max_count));
+      if (UNLIKELY(vtable.Get() == nullptr)) {
+        CHECK(self->IsExceptionPending());  // OOME.
+        return false;
+      }
+      int len = super_class->GetVTableLength();
+      for (int i = 0; i < len; i++) {
+        vtable->Set<false>(i, super_class->GetVTableEntry(i));
+      }
+    } else {
+      CHECK(super_class->GetVTable() != nullptr) << PrettyClass(super_class);
+      vtable = hs.NewHandle(super_class->GetVTable()->CopyOf(self, max_count));
+      if (UNLIKELY(vtable.Get() == nullptr)) {
+        CHECK(self->IsExceptionPending());  // OOME.
+        return false;
+      }
     }
+
     // See if any of our virtual methods override the superclass.
     MethodHelper local_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
     MethodHelper super_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index c17f88d..b108f61 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -265,10 +265,6 @@
                        std::string* error_msg)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  const OatFile* FindOatFileFromOatLocation(const std::string& location,
-                                            std::string* error_msg)
-      LOCKS_EXCLUDED(dex_lock_);
-
   // Find or create the oat file holding dex_location. Then load all corresponding dex files
   // (if multidex) into the given vector.
   bool OpenDexFilesFromOat(const char* dex_location, const char* oat_location,
@@ -546,9 +542,31 @@
   const OatFile* FindOpenedOatFile(const char* oat_location, const char* dex_location,
                                    const uint32_t* const dex_location_checksum)
       LOCKS_EXCLUDED(dex_lock_);
+
+  // Will open the oat file directly without relocating, even if we could/should do relocation.
+  const OatFile* FindOatFileFromOatLocation(const std::string& oat_location,
+                                            std::string* error_msg)
+      LOCKS_EXCLUDED(dex_lock_);
+
   const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location)
       LOCKS_EXCLUDED(dex_lock_);
 
+  const OatFile* OpenOatFileFromDexLocation(const std::string& dex_location,
+                                            InstructionSet isa,
+                                            bool* already_opened,
+                                            bool* obsolete_file_cleanup_failed,
+                                            std::vector<std::string>* error_msg)
+      LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
+
+  const OatFile* PatchAndRetrieveOat(const std::string& input, const std::string& output,
+                                     const std::string& image_location, InstructionSet isa,
+                                     std::string* error_msg)
+      LOCKS_EXCLUDED(Locks::mutator_lock_);
+
+  bool CheckOatFile(const OatFile* oat_file, InstructionSet isa,
+                    bool* checksum_verified, std::string* error_msg);
+  int32_t GetRequiredDelta(const OatFile* oat_file, InstructionSet isa);
+
   // Note: will not register the oat file.
   const OatFile* FindOatFileInOatLocationForDexFile(const char* dex_location,
                                                     uint32_t dex_location_checksum,
@@ -575,14 +593,10 @@
                                                              bool* obsolete_file_cleanup_failed)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
 
-  // Find a verify an oat file with the given dex file. Will return nullptr when the oat file
-  // was not found or the dex file could not be verified.
-  // Note: Does not register the oat file.
-  const OatFile* LoadOatFileAndVerifyDexFile(const std::string& oat_file_location,
-                                             const char* dex_location,
-                                             std::string* error_msg,
-                                             bool* open_failed)
-      LOCKS_EXCLUDED(dex_lock_);
+  // verify an oat file with the given dex file. Will return false when the dex file could not be
+  // verified. Will return true otherwise.
+  bool VerifyOatWithDexFile(const OatFile* oat_file, const char* dex_location,
+                            std::string* error_msg);
 
   mirror::ArtMethod* CreateProxyConstructor(Thread* self, Handle<mirror::Class> klass,
                                             mirror::Class* proxy_class)
@@ -720,6 +734,8 @@
   const void* quick_to_interpreter_bridge_trampoline_;
 
   friend class ImageWriter;  // for GetClassRoots
+  friend class ImageDumper;  // for FindOpenedOatFileFromOatLocation
+  friend class ElfPatcher;  // for FindOpenedOatFileForDexFile & FindOpenedOatFileFromOatLocation
   FRIEND_TEST(ClassLinkerTest, ClassRootDescriptors);
   FRIEND_TEST(mirror::DexCacheTest, Open);
   FRIEND_TEST(ExceptionTest, FindExceptionHandler);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 21fe006..8e16d9b 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -91,7 +91,7 @@
     EXPECT_EQ(0U, primitive->NumInstanceFields());
     EXPECT_EQ(0U, primitive->NumStaticFields());
     EXPECT_EQ(0U, primitive->NumDirectInterfaces());
-    EXPECT_TRUE(primitive->GetVTable() == NULL);
+    EXPECT_FALSE(primitive->HasVTable());
     EXPECT_EQ(0, primitive->GetIfTableCount());
     EXPECT_TRUE(primitive->GetIfTable() == NULL);
     EXPECT_EQ(kAccPublic | kAccFinal | kAccAbstract, primitive->GetAccessFlags());
@@ -143,7 +143,7 @@
     EXPECT_EQ(0U, array->NumInstanceFields());
     EXPECT_EQ(0U, array->NumStaticFields());
     EXPECT_EQ(2U, array->NumDirectInterfaces());
-    EXPECT_TRUE(array->GetVTable() != NULL);
+    EXPECT_TRUE(array->ShouldHaveEmbeddedImtAndVTable());
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != NULL);
     mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
@@ -216,7 +216,7 @@
         EXPECT_NE(0U, klass->NumDirectMethods());
       }
     }
-    EXPECT_EQ(klass->IsInterface(), klass->GetVTable() == NULL);
+    EXPECT_EQ(klass->IsInterface(), !klass->HasVTable());
     mirror::IfTable* iftable = klass->GetIfTable();
     for (int i = 0; i < klass->GetIfTableCount(); i++) {
       mirror::Class* interface = iftable->GetInterface(i);
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index f47f13d..2826f89 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -199,12 +199,13 @@
   runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
 }
 
-void CommonRuntimeTest::TearDown() {
-  const char* android_data = getenv("ANDROID_DATA");
-  ASSERT_TRUE(android_data != nullptr);
-  DIR* dir = opendir(dalvik_cache_.c_str());
+
+void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
+  ASSERT_TRUE(dirpath != nullptr);
+  DIR* dir = opendir(dirpath);
   ASSERT_TRUE(dir != nullptr);
   dirent* e;
+  struct stat s;
   while ((e = readdir(dir)) != nullptr) {
     if ((strcmp(e->d_name, ".") == 0) || (strcmp(e->d_name, "..") == 0)) {
       continue;
@@ -212,10 +213,24 @@
     std::string filename(dalvik_cache_);
     filename.push_back('/');
     filename.append(e->d_name);
-    int unlink_result = unlink(filename.c_str());
-    ASSERT_EQ(0, unlink_result);
+    int stat_result = lstat(filename.c_str(), &s);
+    ASSERT_EQ(0, stat_result) << "unable to stat " << filename;
+    if (S_ISDIR(s.st_mode)) {
+      ClearDirectory(filename.c_str());
+      int rmdir_result = rmdir(filename.c_str());
+      ASSERT_EQ(0, rmdir_result) << filename;
+    } else {
+      int unlink_result = unlink(filename.c_str());
+      ASSERT_EQ(0, unlink_result) << filename;
+    }
   }
   closedir(dir);
+}
+
+void CommonRuntimeTest::TearDown() {
+  const char* android_data = getenv("ANDROID_DATA");
+  ASSERT_TRUE(android_data != nullptr);
+  ClearDirectory(dalvik_cache_.c_str());
   int rmdir_cache_result = rmdir(dalvik_cache_.c_str());
   ASSERT_EQ(0, rmdir_cache_result);
   int rmdir_data_result = rmdir(android_data_.c_str());
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index d045031..eb96352 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -81,6 +81,8 @@
   // Allow subclases such as CommonCompilerTest to add extra options.
   virtual void SetUpRuntimeOptions(RuntimeOptions* options) {}
 
+  void ClearDirectory(const char* dirpath);
+
   virtual void TearDown();
 
   std::string GetLibCoreDexFileName();
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index b07043f..d1a6861 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -36,6 +36,10 @@
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
     virtual void ClassRejected(ClassReference ref) = 0;
 
+    // Return true if we should attempt to relocate to a random base address if we have not already
+    // done so. Return false if relocating in this way would be problematic.
+    virtual bool IsRelocationPossible() = 0;
+
   protected:
     CompilerCallbacks() { }
 };
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index e4ab9f5..5e784b1 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2250,15 +2250,18 @@
 }
 
 JDWP::JdwpError Dbg::SuspendThread(JDWP::ObjectId thread_id, bool request_suspension) {
-  ScopedLocalRef<jobject> peer(Thread::Current()->GetJniEnv(), NULL);
+  Thread* self = Thread::Current();
+  ScopedLocalRef<jobject> peer(self->GetJniEnv(), NULL);
   {
-    ScopedObjectAccess soa(Thread::Current());
+    ScopedObjectAccess soa(self);
     peer.reset(soa.AddLocalReference<jobject>(gRegistry->Get<mirror::Object*>(thread_id)));
   }
   if (peer.get() == NULL) {
     return JDWP::ERR_THREAD_NOT_ALIVE;
   }
-  // Suspend thread to build stack trace.
+  // Suspend thread to build stack trace. Take suspend thread lock to avoid races with threads
+  // trying to suspend this one.
+  MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
   bool timed_out;
   Thread* thread = ThreadList::SuspendThreadByPeer(peer.get(), request_suspension, true,
                                                    &timed_out);
@@ -3032,7 +3035,7 @@
 
 // Sanity checks all existing breakpoints on the same method.
 static void SanityCheckExistingBreakpoints(mirror::ArtMethod* m, bool need_full_deoptimization)
-    EXCLUSIVE_LOCKS_REQUIRED(Locks::breakpoint_lock_)  {
+    EXCLUSIVE_LOCKS_REQUIRED(Locks::breakpoint_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (kIsDebugBuild) {
     for (const Breakpoint& breakpoint : gBreakpoints) {
       CHECK_EQ(need_full_deoptimization, breakpoint.NeedFullDeoptimization());
@@ -3128,7 +3131,7 @@
   ScopedThreadSuspension(Thread* self, JDWP::ObjectId thread_id)
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
-      thread_(NULL),
+      thread_(nullptr),
       error_(JDWP::ERR_NONE),
       self_suspend_(false),
       other_suspend_(false) {
@@ -3144,10 +3147,15 @@
         soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
         jobject thread_peer = gRegistry->GetJObject(thread_id);
         bool timed_out;
-        Thread* suspended_thread = ThreadList::SuspendThreadByPeer(thread_peer, true, true,
-                                                                   &timed_out);
+        Thread* suspended_thread;
+        {
+          // Take suspend thread lock to avoid races with threads trying to suspend this one.
+          MutexLock mu(soa.Self(), *Locks::thread_list_suspend_thread_lock_);
+          suspended_thread = ThreadList::SuspendThreadByPeer(thread_peer, true, true,
+                                                             &timed_out);
+        }
         CHECK_EQ(soa.Self()->TransitionFromSuspendedToRunnable(), kWaitingForDebuggerSuspension);
-        if (suspended_thread == NULL) {
+        if (suspended_thread == nullptr) {
           // Thread terminated from under us while suspending.
           error_ = JDWP::ERR_INVALID_THREAD;
         } else {
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 291e2d0..48dcdca 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -170,13 +170,29 @@
   return true;
 }
 
-bool DexFileVerifier::CheckPointerRange(const void* start, const void* end, const char* label) {
+bool DexFileVerifier::CheckListSize(const void* start, size_t count, size_t elem_size,
+                                        const char* label) {
+  // Check that size is not 0.
+  CHECK_NE(elem_size, 0U);
+
   const byte* range_start = reinterpret_cast<const byte*>(start);
-  const byte* range_end = reinterpret_cast<const byte*>(end);
   const byte* file_start = reinterpret_cast<const byte*>(begin_);
+
+  // Check for overflow.
+  uintptr_t max = 0 - 1;
+  size_t available_bytes_till_end_of_mem = max - reinterpret_cast<uintptr_t>(start);
+  size_t max_count = available_bytes_till_end_of_mem / elem_size;
+  if (max_count < count) {
+    ErrorStringPrintf("Overflow in range for %s: %zx for %zu@%zu", label,
+                      static_cast<size_t>(range_start - file_start),
+                      count, elem_size);
+    return false;
+  }
+
+  const byte* range_end = range_start + count * elem_size;
   const byte* file_end = file_start + size_;
-  if (UNLIKELY((range_start < file_start) || (range_start > file_end) ||
-               (range_end < file_start) || (range_end > file_end))) {
+  if (UNLIKELY((range_start < file_start) || (range_end > file_end))) {
+    // Note: these two tests are enough as we make sure above that there's no overflow.
     ErrorStringPrintf("Bad range for %s: %zx to %zx", label,
                       static_cast<size_t>(range_start - file_start),
                       static_cast<size_t>(range_end - file_start));
@@ -185,12 +201,6 @@
   return true;
 }
 
-bool DexFileVerifier::CheckListSize(const void* start, uint32_t count,
-                                    uint32_t element_size, const char* label) {
-  const byte* list_start = reinterpret_cast<const byte*>(start);
-  return CheckPointerRange(list_start, list_start + (count * element_size), label);
-}
-
 bool DexFileVerifier::CheckIndex(uint32_t field, uint32_t limit, const char* label) {
   if (UNLIKELY(field >= limit)) {
     ErrorStringPrintf("Bad index for %s: %x >= %x", label, field, limit);
@@ -329,7 +339,7 @@
 
 uint32_t DexFileVerifier::ReadUnsignedLittleEndian(uint32_t size) {
   uint32_t result = 0;
-  if (LIKELY(CheckPointerRange(ptr_, ptr_ + size, "encoded_value"))) {
+  if (LIKELY(CheckListSize(ptr_, size, sizeof(byte), "encoded_value"))) {
     for (uint32_t i = 0; i < size; i++) {
       result |= ((uint32_t) *(ptr_++)) << (i * 8);
     }
@@ -447,7 +457,7 @@
 
 bool DexFileVerifier::CheckPadding(size_t offset, uint32_t aligned_offset) {
   if (offset < aligned_offset) {
-    if (!CheckPointerRange(begin_ + offset, begin_ + aligned_offset, "section")) {
+    if (!CheckListSize(begin_ + offset, aligned_offset - offset, sizeof(byte), "section")) {
       return false;
     }
     while (offset < aligned_offset) {
@@ -463,7 +473,7 @@
 }
 
 bool DexFileVerifier::CheckEncodedValue() {
-  if (!CheckPointerRange(ptr_, ptr_ + 1, "encoded_value header")) {
+  if (!CheckListSize(ptr_, 1, sizeof(byte), "encoded_value header")) {
     return false;
   }
 
@@ -656,7 +666,7 @@
 
 bool DexFileVerifier::CheckIntraCodeItem() {
   const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(ptr_);
-  if (!CheckPointerRange(code_item, code_item + 1, "code")) {
+  if (!CheckListSize(code_item, 1, sizeof(DexFile::CodeItem), "code")) {
     return false;
   }
 
@@ -945,7 +955,7 @@
 }
 
 bool DexFileVerifier::CheckIntraAnnotationItem() {
-  if (!CheckPointerRange(ptr_, ptr_ + 1, "annotation visibility")) {
+  if (!CheckListSize(ptr_, 1, sizeof(byte), "annotation visibility")) {
     return false;
   }
 
@@ -970,7 +980,7 @@
 bool DexFileVerifier::CheckIntraAnnotationsDirectoryItem() {
   const DexFile::AnnotationsDirectoryItem* item =
       reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr_);
-  if (!CheckPointerRange(item, item + 1, "annotations_directory")) {
+  if (!CheckListSize(item, 1, sizeof(DexFile::AnnotationsDirectoryItem), "annotations_directory")) {
     return false;
   }
 
@@ -1064,42 +1074,42 @@
     // Check depending on the section type.
     switch (type) {
       case DexFile::kDexTypeStringIdItem: {
-        if (!CheckPointerRange(ptr_, ptr_ + sizeof(DexFile::StringId), "string_ids")) {
+        if (!CheckListSize(ptr_, 1, sizeof(DexFile::StringId), "string_ids")) {
           return false;
         }
         ptr_ += sizeof(DexFile::StringId);
         break;
       }
       case DexFile::kDexTypeTypeIdItem: {
-        if (!CheckPointerRange(ptr_, ptr_ + sizeof(DexFile::TypeId), "type_ids")) {
+        if (!CheckListSize(ptr_, 1, sizeof(DexFile::TypeId), "type_ids")) {
           return false;
         }
         ptr_ += sizeof(DexFile::TypeId);
         break;
       }
       case DexFile::kDexTypeProtoIdItem: {
-        if (!CheckPointerRange(ptr_, ptr_ + sizeof(DexFile::ProtoId), "proto_ids")) {
+        if (!CheckListSize(ptr_, 1, sizeof(DexFile::ProtoId), "proto_ids")) {
           return false;
         }
         ptr_ += sizeof(DexFile::ProtoId);
         break;
       }
       case DexFile::kDexTypeFieldIdItem: {
-        if (!CheckPointerRange(ptr_, ptr_ + sizeof(DexFile::FieldId), "field_ids")) {
+        if (!CheckListSize(ptr_, 1, sizeof(DexFile::FieldId), "field_ids")) {
           return false;
         }
         ptr_ += sizeof(DexFile::FieldId);
         break;
       }
       case DexFile::kDexTypeMethodIdItem: {
-        if (!CheckPointerRange(ptr_, ptr_ + sizeof(DexFile::MethodId), "method_ids")) {
+        if (!CheckListSize(ptr_, 1, sizeof(DexFile::MethodId), "method_ids")) {
           return false;
         }
         ptr_ += sizeof(DexFile::MethodId);
         break;
       }
       case DexFile::kDexTypeClassDefItem: {
-        if (!CheckPointerRange(ptr_, ptr_ + sizeof(DexFile::ClassDef), "class_defs")) {
+        if (!CheckListSize(ptr_, 1, sizeof(DexFile::ClassDef), "class_defs")) {
           return false;
         }
         ptr_ += sizeof(DexFile::ClassDef);
@@ -1110,7 +1120,7 @@
         const DexFile::TypeItem* item = &list->GetTypeItem(0);
         uint32_t count = list->Size();
 
-        if (!CheckPointerRange(list, list + 1, "type_list") ||
+        if (!CheckListSize(list, 1, sizeof(DexFile::TypeList), "type_list") ||
             !CheckListSize(item, count, sizeof(DexFile::TypeItem), "type_list size")) {
           return false;
         }
@@ -1123,7 +1133,8 @@
         const DexFile::AnnotationSetRefItem* item = list->list_;
         uint32_t count = list->size_;
 
-        if (!CheckPointerRange(list, list + 1, "annotation_set_ref_list") ||
+        if (!CheckListSize(list, 1, sizeof(DexFile::AnnotationSetRefList),
+                               "annotation_set_ref_list") ||
             !CheckListSize(item, count, sizeof(DexFile::AnnotationSetRefItem),
                            "annotation_set_ref_list size")) {
           return false;
@@ -1137,7 +1148,7 @@
         const uint32_t* item = set->entries_;
         uint32_t count = set->size_;
 
-        if (!CheckPointerRange(set, set + 1, "annotation_set_item") ||
+        if (!CheckListSize(set, 1, sizeof(DexFile::AnnotationSetItem), "annotation_set_item") ||
             !CheckListSize(item, count, sizeof(uint32_t), "annotation_set_item size")) {
           return false;
         }
@@ -1650,6 +1661,12 @@
     return false;
   }
 
+  // Only allow non-runtime modifiers.
+  if ((item->access_flags_ & ~kAccJavaFlagsMask) != 0) {
+    ErrorStringPrintf("Invalid class flags: '%d'", item->access_flags_);
+    return false;
+  }
+
   if (item->interfaces_off_ != 0 &&
       !CheckOffsetToTypeMap(item->interfaces_off_, DexFile::kDexTypeTypeList)) {
     return false;
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index f845993..cae1063 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -40,8 +40,7 @@
   bool Verify();
 
   bool CheckShortyDescriptorMatch(char shorty_char, const char* descriptor, bool is_return_type);
-  bool CheckPointerRange(const void* start, const void* end, const char* label);
-  bool CheckListSize(const void* start, uint32_t count, uint32_t element_size, const char* label);
+  bool CheckListSize(const void* start, size_t count, size_t element_size, const char* label);
   bool CheckIndex(uint32_t field, uint32_t limit, const char* label);
 
   bool CheckHeader();
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 90c8fcf..cb0be04 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -390,26 +390,26 @@
     case kDirect:
       return resolved_method;
     case kVirtual: {
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = (*this_object)->GetClass()->GetVTable();
+      mirror::Class* klass = (*this_object)->GetClass();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
       if (access_check &&
-          (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
+          (!klass->HasVTable() ||
+           vtable_index >= static_cast<uint32_t>(klass->GetVTableLength()))) {
         // Behavior to agree with that of the verifier.
         ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
                                resolved_method->GetName(), resolved_method->GetSignature());
         return nullptr;  // Failure.
       }
-      DCHECK(vtable != nullptr);
-      return vtable->GetWithoutChecks(vtable_index);
+      DCHECK(klass->HasVTable()) << PrettyClass(klass);
+      return klass->GetVTableEntry(vtable_index);
     }
     case kSuper: {
       mirror::Class* super_class = (*referrer)->GetDeclaringClass()->GetSuperClass();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
-      mirror::ObjectArray<mirror::ArtMethod>* vtable;
       if (access_check) {
         // Check existence of super class.
-        vtable = (super_class != nullptr) ? super_class->GetVTable() : nullptr;
-        if (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength())) {
+        if (super_class == nullptr || !super_class->HasVTable() ||
+            vtable_index >= static_cast<uint32_t>(super_class->GetVTableLength())) {
           // Behavior to agree with that of the verifier.
           ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
                                  resolved_method->GetName(), resolved_method->GetSignature());
@@ -418,10 +418,9 @@
       } else {
         // Super class must exist.
         DCHECK(super_class != nullptr);
-        vtable = super_class->GetVTable();
       }
-      DCHECK(vtable != nullptr);
-      return vtable->GetWithoutChecks(vtable_index);
+      DCHECK(super_class->HasVTable());
+      return super_class->GetVTableEntry(vtable_index);
     }
     case kInterface: {
       uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
@@ -555,11 +554,11 @@
   } else if (is_direct) {
     return resolved_method;
   } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTable()->
-        Get(resolved_method->GetMethodIndex());
+    return referrer->GetDeclaringClass()->GetSuperClass()
+                   ->GetVTableEntry(resolved_method->GetMethodIndex());
   } else {
     DCHECK(type == kVirtual);
-    return this_object->GetClass()->GetVTable()->Get(resolved_method->GetMethodIndex());
+    return this_object->GetClass()->GetVTableEntry(resolved_method->GetMethodIndex());
   }
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 0fa0e41..c1c7631 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -110,8 +110,8 @@
 
 void ThrowStackOverflowError(Thread* self) {
   if (self->IsHandlingStackOverflow()) {
-      LOG(ERROR) << "Recursive stack overflow.";
-      // We don't fail here because SetStackEndForStackOverflow will print better diagnostics.
+    LOG(ERROR) << "Recursive stack overflow.";
+    // We don't fail here because SetStackEndForStackOverflow will print better diagnostics.
   }
 
   if (Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()) {
@@ -123,15 +123,90 @@
   JNIEnvExt* env = self->GetJniEnv();
   std::string msg("stack size ");
   msg += PrettySize(self->GetStackSize());
-  // Use low-level JNI routine and pre-baked error class to avoid class linking operations that
-  // would consume more stack.
-  int rc = ::art::ThrowNewException(env, WellKnownClasses::java_lang_StackOverflowError,
-                                    msg.c_str(), NULL);
-  if (rc != JNI_OK) {
-    // TODO: ThrowNewException failed presumably because of an OOME, we continue to throw the OOME
-    //       or die in the CHECK below. We may want to throw a pre-baked StackOverflowError
-    //       instead.
-    LOG(ERROR) << "Couldn't throw new StackOverflowError because JNI ThrowNew failed.";
+
+  // Avoid running Java code for exception initialization.
+  // TODO: Checks to make this a bit less brittle.
+
+  std::string error_msg;
+
+  // Allocate an uninitialized object.
+  ScopedLocalRef<jobject> exc(env,
+                              env->AllocObject(WellKnownClasses::java_lang_StackOverflowError));
+  if (exc.get() != nullptr) {
+    // "Initialize".
+    // StackOverflowError -> VirtualMachineError -> Error -> Throwable -> Object.
+    // Only Throwable has "custom" fields:
+    //   String detailMessage.
+    //   Throwable cause (= this).
+    //   List<Throwable> suppressedExceptions (= Collections.emptyList()).
+    //   Object stackState;
+    //   StackTraceElement[] stackTrace;
+    // Only Throwable has a non-empty constructor:
+    //   this.stackTrace = EmptyArray.STACK_TRACE_ELEMENT;
+    //   fillInStackTrace();
+
+    // detailMessage.
+    // TODO: Use String::FromModifiedUTF...?
+    ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg.c_str()));
+    if (s.get() != nullptr) {
+      jfieldID detail_message_id = env->GetFieldID(WellKnownClasses::java_lang_Throwable,
+                                                   "detailMessage", "Ljava/lang/String;");
+      env->SetObjectField(exc.get(), detail_message_id, s.get());
+
+      // cause.
+      jfieldID cause_id = env->GetFieldID(WellKnownClasses::java_lang_Throwable,
+                                          "cause", "Ljava/lang/Throwable;");
+      env->SetObjectField(exc.get(), cause_id, exc.get());
+
+      // suppressedExceptions.
+      jfieldID emptylist_id = env->GetStaticFieldID(WellKnownClasses::java_util_Collections,
+                                                    "EMPTY_LIST", "Ljava/util/List;");
+      ScopedLocalRef<jobject> emptylist(env, env->GetStaticObjectField(
+              WellKnownClasses::java_util_Collections, emptylist_id));
+      CHECK(emptylist.get() != nullptr);
+      jfieldID suppressed_id = env->GetFieldID(WellKnownClasses::java_lang_Throwable,
+                                               "suppressedExceptions", "Ljava/util/List;");
+      env->SetObjectField(exc.get(), suppressed_id, emptylist.get());
+
+      // stackState is set as result of fillInStackTrace. fillInStackTrace calls
+      // nativeFillInStackTrace.
+      ScopedLocalRef<jobject> stack_state_val(env, nullptr);
+      {
+        ScopedObjectAccessUnchecked soa(env);
+        stack_state_val.reset(soa.Self()->CreateInternalStackTrace<false>(soa));
+      }
+      if (stack_state_val.get() != nullptr) {
+        jfieldID stackstateID = env->GetFieldID(WellKnownClasses::java_lang_Throwable,
+            "stackState", "Ljava/lang/Object;");
+        env->SetObjectField(exc.get(), stackstateID, stack_state_val.get());
+
+        // stackTrace.
+        jfieldID stack_trace_elem_id = env->GetStaticFieldID(
+            WellKnownClasses::libcore_util_EmptyArray, "STACK_TRACE_ELEMENT",
+            "[Ljava/lang/StackTraceElement;");
+        ScopedLocalRef<jobject> stack_trace_elem(env, env->GetStaticObjectField(
+                WellKnownClasses::libcore_util_EmptyArray, stack_trace_elem_id));
+        jfieldID stacktrace_id = env->GetFieldID(
+            WellKnownClasses::java_lang_Throwable, "stackTrace", "[Ljava/lang/StackTraceElement;");
+        env->SetObjectField(exc.get(), stacktrace_id, stack_trace_elem.get());
+
+        // Throw the exception.
+        ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+        self->SetException(throw_location,
+            reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get())));
+      } else {
+        error_msg = "Could not create stack trace.";
+      }
+    } else {
+      // Could not allocate a string object.
+      error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed.";
+    }
+  } else {
+    error_msg = "Could not allocate StackOverflowError object.";
+  }
+
+  if (!error_msg.empty()) {
+    LOG(ERROR) << error_msg;
     CHECK(self->IsExceptionPending());
   }
 
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index c5d67aa..44c89ad 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -40,7 +40,6 @@
 class ScopedObjectAccessAlreadyRunnable;
 class Thread;
 
-// TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <const bool kAccessCheck>
 ALWAYS_INLINE static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
                                                             mirror::ArtMethod* method,
@@ -56,7 +55,6 @@
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
-// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
                                                                 mirror::ArtMethod* method,
@@ -65,7 +63,6 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method and a resolved class, create an instance.
-// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
 ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
                                                                         mirror::ArtMethod* method,
@@ -74,7 +71,6 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Given the context of a calling Method and an initialized class, create an instance.
-// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
 ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
                                                                            mirror::ArtMethod* method,
@@ -83,7 +79,6 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
-// TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck>
 ALWAYS_INLINE static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
                                                            mirror::ArtMethod* method,
@@ -95,7 +90,6 @@
 // it cannot be resolved, throw an error. If it can, use it to create an array.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
-// TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
                                                               mirror::ArtMethod* method,
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index c7c567f..8fb33ce 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -69,9 +69,9 @@
     } else if (space->GetLiveBitmap() != nullptr) {
       if (space == to_space_ || collect_from_space_only_) {
         if (collect_from_space_only_) {
-          // Bind the main free list space and the non-moving space to the immune space if a bump
-          // pointer space only collection.
-          CHECK(space == to_space_ || space == GetHeap()->GetPrimaryFreeListSpace() ||
+          // Bind the bitmaps of the main free list space and the non-moving space we are doing a
+          // bump pointer space only collection.
+          CHECK(space == GetHeap()->GetPrimaryFreeListSpace() ||
                 space == GetHeap()->GetNonMovingSpace());
         }
         CHECK(space->IsContinuousMemMapAllocSpace());
@@ -222,7 +222,6 @@
   heap_->GetCardTable()->ClearCardTable();
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
-  t.NewTiming("SwapStacks");
   if (kUseThreadLocalAllocationStack) {
     TimingLogger::ScopedTiming t("RevokeAllThreadLocalAllocationStacks", GetTimings());
     heap_->RevokeAllThreadLocalAllocationStacks(self_);
@@ -492,7 +491,7 @@
       // If out of space, fall back to the to-space.
       forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
       // No logic for marking the bitmap, so it must be null.
-      DCHECK(to_space_->GetLiveBitmap() == nullptr);
+      DCHECK(to_space_live_bitmap_ == nullptr);
     } else {
       bytes_promoted_ += bytes_allocated;
       // Dirty the card at the destionation as it may contain
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4e38335..48ae84d 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -96,6 +96,7 @@
 static const char* kDlMallocSpaceName[2] = {"main dlmalloc space", "main dlmalloc space 1"};
 static const char* kRosAllocSpaceName[2] = {"main rosalloc space", "main rosalloc space 1"};
 static const char* kMemMapSpaceName[2] = {"main space", "main space 1"};
+static constexpr size_t kGSSBumpPointerSpaceCapacity = 32 * MB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, double foreground_heap_growth_multiplier, size_t capacity,
@@ -179,16 +180,16 @@
       running_on_valgrind_(Runtime::Current()->RunningOnValgrind()),
       use_tlab_(use_tlab),
       main_space_backup_(nullptr),
-      min_interval_homogeneous_space_compaction_by_oom_(min_interval_homogeneous_space_compaction_by_oom),
+      min_interval_homogeneous_space_compaction_by_oom_(
+          min_interval_homogeneous_space_compaction_by_oom),
       last_time_homogeneous_space_compaction_by_oom_(NanoTime()),
       use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
-  const bool is_zygote = Runtime::Current()->IsZygote();
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
-  if (!is_zygote) {
+  if (!Runtime::Current()->IsZygote()) {
     large_object_threshold_ = kDefaultLargeObjectThreshold;
     // Background compaction is currently not supported for command line runs.
     if (background_collector_type_ != foreground_collector_type_) {
@@ -197,7 +198,6 @@
     }
   }
   ChangeCollector(desired_collector_type_);
-
   live_bitmap_.reset(new accounting::HeapBitmap(this));
   mark_bitmap_.reset(new accounting::HeapBitmap(this));
   // Requested begin for the alloc space, to follow the mapped image and oat files
@@ -213,130 +213,117 @@
     CHECK_GT(oat_file_end_addr, image_space->End());
     requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
   }
-
   /*
   requested_alloc_space_begin ->     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
                                      +-  nonmoving space (kNonMovingSpaceCapacity) +-
                                      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
-                                     +-        main alloc space (capacity_)        +-
+                                     +-main alloc space / bump space 1 (capacity_) +-
                                      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
-                                     +-       main alloc space 1 (capacity_)       +-
+                                     +-????????????????????????????????????????????+-
+                                     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+                                     +-main alloc space2 / bump space 2 (capacity_)+-
                                      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
   */
-  bool create_backup_main_space =
+  bool support_homogeneous_space_compaction =
       background_collector_type == gc::kCollectorTypeHomogeneousSpaceCompact ||
       use_homogeneous_space_compaction_for_oom;
-  if (is_zygote) {
-    // Reserve the address range before we create the non moving space to make sure bitmaps don't
-    // take it.
-    std::string error_str;
-    MemMap* main_space_map = MemMap::MapAnonymous(
-        kMemMapSpaceName[0], requested_alloc_space_begin + kNonMovingSpaceCapacity, capacity_,
-        PROT_READ | PROT_WRITE, true, &error_str);
-    CHECK(main_space_map != nullptr) << error_str;
-    MemMap* main_space_1_map = nullptr;
-    // Attempt to reserve an extra mem_map for homogeneous space compaction right after the main space map.
-    if (create_backup_main_space) {
-      main_space_1_map = MemMap::MapAnonymous(kMemMapSpaceName[1], main_space_map->End(), capacity_,
-                                               PROT_READ | PROT_WRITE, true, &error_str);
-      if (main_space_1_map == nullptr) {
-        LOG(WARNING) << "Failed to create map " <<  kMemMapSpaceName[1] << " with error "
-                     << error_str;
-      }
-    }
+  // We may use the same space the main space for the non moving space if we don't need to compact
+  // from the main space.
+  // This is not the case if we support homogeneous compaction or have a moving background
+  // collector type.
+  const bool is_zygote = Runtime::Current()->IsZygote();
+  bool separate_non_moving_space = is_zygote ||
+      support_homogeneous_space_compaction || IsMovingGc(foreground_collector_type_) ||
+      IsMovingGc(background_collector_type_);
+  if (foreground_collector_type == kCollectorTypeGSS) {
+    separate_non_moving_space = false;
+  }
+  std::unique_ptr<MemMap> main_mem_map_1;
+  std::unique_ptr<MemMap> main_mem_map_2;
+  byte* request_begin = requested_alloc_space_begin;
+  if (request_begin != nullptr && separate_non_moving_space) {
+    request_begin += kNonMovingSpaceCapacity;
+  }
+  std::string error_str;
+  std::unique_ptr<MemMap> non_moving_space_mem_map;
+  if (separate_non_moving_space) {
+    // Reserve the non moving mem map before the other two since it needs to be at a specific
+    // address.
+    non_moving_space_mem_map.reset(
+        MemMap::MapAnonymous("non moving space", requested_alloc_space_begin,
+                             kNonMovingSpaceCapacity, PROT_READ | PROT_WRITE, true, &error_str));
+    CHECK(non_moving_space_mem_map != nullptr) << error_str;
+  }
+  // Attempt to create 2 mem maps at or after the requested begin.
+  main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0], request_begin, capacity_,
+                                                    PROT_READ | PROT_WRITE, &error_str));
+  CHECK(main_mem_map_1.get() != nullptr) << error_str;
+  if (support_homogeneous_space_compaction ||
+      background_collector_type_ == kCollectorTypeSS ||
+      foreground_collector_type_ == kCollectorTypeSS) {
+    main_mem_map_2.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[1], main_mem_map_1->End(),
+                                                      capacity_, PROT_READ | PROT_WRITE,
+                                                      &error_str));
+    CHECK(main_mem_map_2.get() != nullptr) << error_str;
+  }
+  // Create the non moving space first so that bitmaps don't take up the address range.
+  if (separate_non_moving_space) {
     // Non moving space is always dlmalloc since we currently don't have support for multiple
     // active rosalloc spaces.
-    non_moving_space_ = space::DlMallocSpace::Create(
-        "zygote / non moving space", initial_size, kNonMovingSpaceCapacity,
-        kNonMovingSpaceCapacity, requested_alloc_space_begin, false);
+    const size_t size = non_moving_space_mem_map->Size();
+    non_moving_space_ = space::DlMallocSpace::CreateFromMemMap(
+        non_moving_space_mem_map.release(), "zygote / non moving space", initial_size,
+        initial_size, size, size, false);
     non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
-    CreateMainMallocSpace(main_space_map, initial_size, growth_limit_, capacity_);
-    if (main_space_1_map != nullptr) {
-      const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
-      main_space_backup_ = CreateMallocSpaceFromMemMap(main_space_1_map, initial_size,
-                                                       growth_limit_, capacity_, name, true);
-    }
+    CHECK(non_moving_space_ != nullptr) << "Failed creating non moving space "
+        << requested_alloc_space_begin;
+    AddSpace(non_moving_space_);
+  }
+  // Create other spaces based on whether or not we have a moving GC.
+  if (IsMovingGc(foreground_collector_type_) && foreground_collector_type_ != kCollectorTypeGSS) {
+    // Create bump pointer spaces.
+    // We only to create the bump pointer if the foreground collector is a compacting GC.
+    // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
+    bump_pointer_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 1",
+                                                                    main_mem_map_1.release());
+    CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
+    AddSpace(bump_pointer_space_);
+    temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2",
+                                                            main_mem_map_2.release());
+    CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
+    AddSpace(temp_space_);
+    CHECK(separate_non_moving_space);
   } else {
-    std::string error_str;
-    byte* request_begin = requested_alloc_space_begin;
-    if (request_begin == nullptr) {
-      // Disable homogeneous space compaction since we don't have an image.
-      create_backup_main_space = false;
-    }
-    MemMap* main_space_1_map = nullptr;
-    if (create_backup_main_space) {
-      request_begin += kNonMovingSpaceCapacity;
-      // Attempt to reserve an extra mem_map for homogeneous space compaction right after the main space map.
-      main_space_1_map = MemMap::MapAnonymous(kMemMapSpaceName[1], request_begin + capacity_,
-                                               capacity_, PROT_READ | PROT_WRITE, true, &error_str);
-      if (main_space_1_map == nullptr) {
-        LOG(WARNING) << "Failed to create map " <<  kMemMapSpaceName[1] << " with error "
-                     << error_str;
-        request_begin = requested_alloc_space_begin;
-      }
-    }
-    MemMap* main_space_map = MemMap::MapAnonymous(kMemMapSpaceName[0], request_begin, capacity_,
-                                                  PROT_READ | PROT_WRITE, true, &error_str);
-    CHECK(main_space_map != nullptr) << error_str;
-    // Introduce a seperate non moving space.
-    if (main_space_1_map != nullptr) {
-      // Do this before creating the main malloc space to prevent bitmaps from being placed here.
-      non_moving_space_ = space::DlMallocSpace::Create(
-          "non moving space", kDefaultInitialSize, kNonMovingSpaceCapacity, kNonMovingSpaceCapacity,
-          requested_alloc_space_begin, false);
-      non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
-    }
-    // Create the main free list space, which doubles as the non moving space. We can do this since
-    // non zygote means that we won't have any background compaction.
-    CreateMainMallocSpace(main_space_map, initial_size, growth_limit_, capacity_);
-    if (main_space_1_map != nullptr) {
-      const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
-      main_space_backup_ = CreateMallocSpaceFromMemMap(main_space_1_map, initial_size,
-                                                       growth_limit_, capacity_, name, true);
-      CHECK(main_space_backup_ != nullptr);
-    } else {
+    CreateMainMallocSpace(main_mem_map_1.release(), initial_size, growth_limit_, capacity_);
+    CHECK(main_space_ != nullptr);
+    AddSpace(main_space_);
+    if (!separate_non_moving_space) {
       non_moving_space_ = main_space_;
+      CHECK(!non_moving_space_->CanMoveObjects());
+    }
+    if (foreground_collector_type_ == kCollectorTypeGSS) {
+      CHECK_EQ(foreground_collector_type_, background_collector_type_);
+      // Create bump pointer spaces instead of a backup space.
+      main_mem_map_2.release();
+      bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space 1",
+                                                            kGSSBumpPointerSpaceCapacity, nullptr);
+      CHECK(bump_pointer_space_ != nullptr);
+      AddSpace(bump_pointer_space_);
+      temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2",
+                                                    kGSSBumpPointerSpaceCapacity, nullptr);
+      CHECK(temp_space_ != nullptr);
+      AddSpace(temp_space_);
+    } else if (main_mem_map_2.get() != nullptr) {
+      const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
+      main_space_backup_.reset(CreateMallocSpaceFromMemMap(main_mem_map_2.release(), initial_size,
+                                                           growth_limit_, capacity_, name, true));
+      CHECK(main_space_backup_.get() != nullptr);
+      // Add the space so its accounted for in the heap_begin and heap_end.
+      AddSpace(main_space_backup_.get());
     }
   }
   CHECK(non_moving_space_ != nullptr);
-
-  // We need to create the bump pointer if the foreground collector is a compacting GC. We only
-  // create the bump pointer space if we are not a moving foreground collector but have a moving
-  // background collector since the heap transition code will create the temp space by recycling
-  // the bitmap from the main space.
-  if (kMovingCollector &&
-      (IsMovingGc(foreground_collector_type_) || IsMovingGc(background_collector_type_))) {
-    // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
-    // Divide by 2 for a temporary fix for reducing virtual memory usage.
-    const size_t bump_pointer_space_capacity = capacity_ / 2;
-    bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
-                                                          bump_pointer_space_capacity, nullptr);
-    CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
-    AddSpace(bump_pointer_space_);
-    temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2",
-                                                  bump_pointer_space_capacity, nullptr);
-    CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
-    AddSpace(temp_space_);
-  }
-  if (non_moving_space_ != main_space_) {
-    AddSpace(non_moving_space_);
-  }
-  if (main_space_backup_ != nullptr) {
-    AddSpace(main_space_backup_);
-  } else {
-    const char* disable_msg = "Disabling homogenous space compact due to no backup main space";
-    if (background_collector_type_ == gc::kCollectorTypeHomogeneousSpaceCompact) {
-      background_collector_type_ = collector_type_;
-      LOG(WARNING) << disable_msg;
-    } else if (use_homogeneous_space_compaction_for_oom_) {
-      LOG(WARNING) << disable_msg;
-    }
-    use_homogeneous_space_compaction_for_oom_ = false;
-  }
-  if (main_space_ != nullptr) {
-    AddSpace(main_space_);
-  }
-
+  CHECK(!non_moving_space_->CanMoveObjects());
   // Allocate the large object space.
   if (kUseFreeListSpaceForLOS) {
     large_object_space_ = space::FreeListSpace::Create("large object space", nullptr, capacity_);
@@ -345,19 +332,19 @@
   }
   CHECK(large_object_space_ != nullptr) << "Failed to create large object space";
   AddSpace(large_object_space_);
-
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
   CHECK(!continuous_spaces_.empty());
-
   // Relies on the spaces being sorted.
   byte* heap_begin = continuous_spaces_.front()->Begin();
   byte* heap_end = continuous_spaces_.back()->Limit();
   size_t heap_capacity = heap_end - heap_begin;
-
+  // Remove the main backup space since it slows down the GC to have unused extra spaces.
+  if (main_space_backup_.get() != nullptr) {
+    RemoveSpace(main_space_backup_.get());
+  }
   // Allocate the card table.
   card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity));
   CHECK(card_table_.get() != NULL) << "Failed to create card table";
-
   // Card cache for now since it makes it easier for us to update the references to the copying
   // spaces.
   accounting::ModUnionTable* mod_union_table =
@@ -365,17 +352,14 @@
                                                       GetImageSpace());
   CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
   AddModUnionTable(mod_union_table);
-
   if (collector::SemiSpace::kUseRememberedSet && non_moving_space_ != main_space_) {
     accounting::RememberedSet* non_moving_space_rem_set =
         new accounting::RememberedSet("Non-moving space remembered set", this, non_moving_space_);
     CHECK(non_moving_space_rem_set != nullptr) << "Failed to create non-moving space remembered set";
     AddRememberedSet(non_moving_space_rem_set);
   }
-
-  // TODO: Count objects in the image space here.
+  // TODO: Count objects in the image space here?
   num_bytes_allocated_.StoreRelaxed(0);
-
   mark_stack_.reset(accounting::ObjectStack::Create("mark stack", kDefaultMarkStackSize,
                                                     kDefaultMarkStackSize));
   const size_t alloc_stack_capacity = max_allocation_stack_size_ + kAllocationStackReserveSize;
@@ -383,7 +367,6 @@
       "allocation stack", max_allocation_stack_size_, alloc_stack_capacity));
   live_stack_.reset(accounting::ObjectStack::Create(
       "live stack", max_allocation_stack_size_, alloc_stack_capacity));
-
   // It's still too early to take a lock because there are no threads yet, but we can create locks
   // now. We don't create it earlier to make it clear that you can't use locks during heap
   // initialization.
@@ -392,13 +375,11 @@
                                                 *gc_complete_lock_));
   heap_trim_request_lock_ = new Mutex("Heap trim request lock");
   last_gc_size_ = GetBytesAllocated();
-
   if (ignore_max_footprint_) {
     SetIdealFootprint(std::numeric_limits<size_t>::max());
     concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   }
   CHECK_NE(max_allowed_footprint_, 0U);
-
   // Create our garbage collectors.
   for (size_t i = 0; i < 2; ++i) {
     const bool concurrent = i != 0;
@@ -417,26 +398,38 @@
     mark_compact_collector_ = new collector::MarkCompact(this);
     garbage_collectors_.push_back(mark_compact_collector_);
   }
-
-  if (GetImageSpace() != nullptr && main_space_ != nullptr) {
-    // Check that there's no gap between the image space and the main space so that the immune
-    // region won't break (eg. due to a large object allocated in the gap).
-    bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(), main_space_->GetMemMap());
+  if (GetImageSpace() != nullptr && non_moving_space_ != nullptr) {
+    // Check that there's no gap between the image space and the non moving space so that the
+    // immune region won't break (eg. due to a large object allocated in the gap).
+    bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(),
+                                      non_moving_space_->GetMemMap());
     if (!no_gap) {
       MemMap::DumpMaps(LOG(ERROR));
       LOG(FATAL) << "There's a gap between the image space and the main space";
     }
   }
-
   if (running_on_valgrind_) {
     Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   }
-
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
   }
 }
 
+MemMap* Heap::MapAnonymousPreferredAddress(const char* name, byte* request_begin, size_t capacity,
+                                           int prot_flags, std::string* out_error_str) {
+  while (true) {
+    MemMap* map = MemMap::MapAnonymous(kMemMapSpaceName[0], request_begin, capacity,
+                                       PROT_READ | PROT_WRITE, true, out_error_str);
+    if (map != nullptr || request_begin == nullptr) {
+      return map;
+    }
+    // Retry a  second time with no specified request begin.
+    request_begin = nullptr;
+  }
+  return nullptr;
+}
+
 space::MallocSpace* Heap::CreateMallocSpaceFromMemMap(MemMap* mem_map, size_t initial_size,
                                                       size_t growth_limit, size_t capacity,
                                                       const char* name, bool can_move_objects) {
@@ -474,7 +467,8 @@
   if (kCompactZygote && Runtime::Current()->IsZygote() && !can_move_objects) {
     // After the zygote we want this to be false if we don't have background compaction enabled so
     // that getting primitive array elements is faster.
-    can_move_objects = !have_zygote_space_;
+    // We never have homogeneous compaction with GSS and don't need a space with movable objects.
+    can_move_objects = !have_zygote_space_ && foreground_collector_type_ != kCollectorTypeGSS;
   }
   if (collector::SemiSpace::kUseRememberedSet && main_space_ != nullptr) {
     RemoveRememberedSet(main_space_);
@@ -899,12 +893,15 @@
       << " free bytes";
   // If the allocation failed due to fragmentation, print out the largest continuous allocation.
   if (total_bytes_free >= byte_count) {
-    space::MallocSpace* space = nullptr;
+    space::AllocSpace* space = nullptr;
     if (allocator_type == kAllocatorTypeNonMoving) {
       space = non_moving_space_;
     } else if (allocator_type == kAllocatorTypeRosAlloc ||
                allocator_type == kAllocatorTypeDlMalloc) {
       space = main_space_;
+    } else if (allocator_type == kAllocatorTypeBumpPointer ||
+               allocator_type == kAllocatorTypeTLAB) {
+      space = bump_pointer_space_;
     }
     if (space != nullptr) {
       space->LogFragmentationAllocFailure(oss, byte_count);
@@ -1512,15 +1509,18 @@
   tl->SuspendAll();
   uint64_t start_time = NanoTime();
   // Launch compaction.
-  space::MallocSpace* to_space = main_space_backup_;
+  space::MallocSpace* to_space = main_space_backup_.release();
   space::MallocSpace* from_space = main_space_;
   to_space->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
   const uint64_t space_size_before_compaction = from_space->Size();
+  AddSpace(to_space);
   Compact(to_space, from_space, kGcCauseHomogeneousSpaceCompact);
   // Leave as prot read so that we can still run ROSAlloc verification on this space.
   from_space->GetMemMap()->Protect(PROT_READ);
   const uint64_t space_size_after_compaction = to_space->Size();
-  std::swap(main_space_, main_space_backup_);
+  main_space_ = to_space;
+  main_space_backup_.reset(from_space);
+  RemoveSpace(from_space);
   SetSpaceAsDefault(main_space_);  // Set as default to reset the proper dlmalloc space.
   // Update performed homogeneous space compaction count.
   count_performed_homogeneous_space_compaction_++;
@@ -1587,17 +1587,27 @@
   }
   tl->SuspendAll();
   switch (collector_type) {
-    case kCollectorTypeSS:
-      // Fall-through.
-    case kCollectorTypeGSS: {
+    case kCollectorTypeSS: {
       if (!IsMovingGc(collector_type_)) {
+        // Create the bump pointer space from the backup space.
+        CHECK(main_space_backup_ != nullptr);
+        std::unique_ptr<MemMap> mem_map(main_space_backup_->ReleaseMemMap());
         // We are transitioning from non moving GC -> moving GC, since we copied from the bump
         // pointer space last transition it will be protected.
-        bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+        CHECK(mem_map != nullptr);
+        mem_map->Protect(PROT_READ | PROT_WRITE);
+        bump_pointer_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space",
+                                                                        mem_map.release());
+        AddSpace(bump_pointer_space_);
         Compact(bump_pointer_space_, main_space_, kGcCauseCollectorTransition);
+        // Use the now empty main space mem map for the bump pointer temp space.
+        mem_map.reset(main_space_->ReleaseMemMap());
         // Remove the main space so that we don't try to trim it, this doens't work for debug
         // builds since RosAlloc attempts to read the magic number from a protected page.
         RemoveSpace(main_space_);
+        temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2",
+                                                                mem_map.release());
+        AddSpace(temp_space_);
       }
       break;
     }
@@ -1605,10 +1615,25 @@
       // Fall through.
     case kCollectorTypeCMS: {
       if (IsMovingGc(collector_type_)) {
+        CHECK(temp_space_ != nullptr);
+        std::unique_ptr<MemMap> mem_map(temp_space_->ReleaseMemMap());
+        RemoveSpace(temp_space_);
+        temp_space_ = nullptr;
+        CreateMainMallocSpace(mem_map.get(), kDefaultInitialSize, mem_map->Size(),
+                              mem_map->Size());
+        mem_map.release();
         // Compact to the main space from the bump pointer space, don't need to swap semispaces.
         AddSpace(main_space_);
         main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
         Compact(main_space_, bump_pointer_space_, kGcCauseCollectorTransition);
+        mem_map.reset(bump_pointer_space_->ReleaseMemMap());
+        RemoveSpace(bump_pointer_space_);
+        bump_pointer_space_ = nullptr;
+        const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
+        main_space_backup_.reset(CreateMallocSpaceFromMemMap(mem_map.get(), kDefaultInitialSize,
+                                                             mem_map->Size(), mem_map->Size(),
+                                                             name, true));
+        mem_map.release();
       }
       break;
     }
@@ -1811,6 +1836,7 @@
   // there.
   non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
   // Change the collector to the post zygote one.
+  bool same_space = non_moving_space_ == main_space_;
   if (kCompactZygote) {
     DCHECK(semi_space_collector_ != nullptr);
     // Temporarily disable rosalloc verification because the zygote
@@ -1877,6 +1903,11 @@
   space::ZygoteSpace* zygote_space = old_alloc_space->CreateZygoteSpace("alloc space",
                                                                         low_memory_mode_,
                                                                         &non_moving_space_);
+  CHECK(!non_moving_space_->CanMoveObjects());
+  if (same_space) {
+    main_space_ = non_moving_space_;
+    SetSpaceAsDefault(main_space_);
+  }
   delete old_alloc_space;
   CHECK(zygote_space != nullptr) << "Failed creating zygote space";
   AddSpace(zygote_space);
@@ -2178,7 +2209,7 @@
         LOG(ERROR) << "Object " << obj << " class(" << obj->GetClass() << ") not a heap address";
       }
 
-      // Attmept to find the class inside of the recently freed objects.
+      // Attempt to find the class inside of the recently freed objects.
       space::ContinuousSpace* ref_space = heap_->FindContinuousSpaceFromObject(ref, true);
       if (ref_space != nullptr && ref_space->IsMallocSpace()) {
         space::MallocSpace* space = ref_space->AsMallocSpace();
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0da113f..56c6d6d 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -444,8 +444,7 @@
                                                               bool fail_ok) const;
   space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const;
 
-  void DumpForSigQuit(std::ostream& os);
-
+  void DumpForSigQuit(std::ostream& os) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Do a pending heap transition or trim.
   void DoPendingTransitionOrTrim() LOCKS_EXCLUDED(heap_trim_request_lock_);
@@ -594,8 +593,13 @@
 
   void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
 
+  // Create a mem map with a preferred base address.
+  static MemMap* MapAnonymousPreferredAddress(const char* name, byte* request_begin,
+                                              size_t capacity, int prot_flags,
+                                              std::string* out_error_str);
+
   bool SupportHSpaceCompaction() const {
-    // Returns true if we can do hspace compaction.
+    // Returns true if we can do hspace compaction
     return main_space_backup_ != nullptr;
   }
 
@@ -1007,7 +1011,8 @@
   const bool use_tlab_;
 
   // Pointer to the space which becomes the new main space when we do homogeneous space compaction.
-  space::MallocSpace* main_space_backup_;
+  // Use unique_ptr since the space is only added during the homogeneous compaction phase.
+  std::unique_ptr<space::MallocSpace> main_space_backup_;
 
   // Minimal interval allowed between two homogeneous space compactions caused by OOM.
   uint64_t min_interval_homogeneous_space_compaction_by_oom_;
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 8b35692..fb6bbac 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -258,6 +258,14 @@
   return true;
 }
 
+void BumpPointerSpace::LogFragmentationAllocFailure(std::ostream& os,
+                                                    size_t /* failed_alloc_bytes */) {
+  size_t max_contiguous_allocation = Limit() - End();
+  os << "; failed due to fragmentation (largest possible contiguous allocation "
+     <<  max_contiguous_allocation << " bytes)";
+  // Caller's job to print failed_alloc_bytes.
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index feee34f..71b15ba 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -151,6 +151,9 @@
     bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes);
   }
 
+  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Object alignment within the space.
   static constexpr size_t kAlignment = 8;
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index a87aa89..7a3cbf3 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -16,6 +16,8 @@
 
 #include "image_space.h"
 
+#include <random>
+
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "base/scoped_flock.h"
@@ -94,86 +96,304 @@
 
 bool ImageSpace::FindImageFilename(const char* image_location,
                                    const InstructionSet image_isa,
-                                   std::string* image_filename,
-                                   bool *is_system) {
+                                   std::string* system_filename,
+                                   bool* has_system,
+                                   std::string* cache_filename,
+                                   bool* dalvik_cache_exists,
+                                   bool* has_cache) {
+  *has_system = false;
+  *has_cache = false;
   // image_location = /system/framework/boot.art
   // system_image_location = /system/framework/<image_isa>/boot.art
   std::string system_image_filename(GetSystemImageFilename(image_location, image_isa));
   if (OS::FileExists(system_image_filename.c_str())) {
-    *image_filename = system_image_filename;
-    *is_system = true;
-    return true;
+    *system_filename = system_image_filename;
+    *has_system = true;
   }
 
-  const std::string dalvik_cache = GetDalvikCacheOrDie(GetInstructionSetString(image_isa));
+  bool have_android_data = false;
+  *dalvik_cache_exists = false;
+  std::string dalvik_cache;
+  GetDalvikCache(GetInstructionSetString(image_isa), true, &dalvik_cache,
+                 &have_android_data, dalvik_cache_exists);
 
-  // Always set output location even if it does not exist,
-  // so that the caller knows where to create the image.
-  //
-  // image_location = /system/framework/boot.art
-  // *image_filename = /data/dalvik-cache/<image_isa>/boot.art
-  *image_filename = GetDalvikCacheFilenameOrDie(image_location, dalvik_cache.c_str());
-  *is_system = false;
-  return OS::FileExists(image_filename->c_str());
+  if (have_android_data && *dalvik_cache_exists) {
+    // Always set output location even if it does not exist,
+    // so that the caller knows where to create the image.
+    //
+    // image_location = /system/framework/boot.art
+    // *image_filename = /data/dalvik-cache/<image_isa>/boot.art
+    std::string error_msg;
+    if (!GetDalvikCacheFilename(image_location, dalvik_cache.c_str(), cache_filename, &error_msg)) {
+      LOG(WARNING) << error_msg;
+      return *has_system;
+    }
+    *has_cache = OS::FileExists(cache_filename->c_str());
+  }
+  return *has_system || *has_cache;
+}
+
+static bool ReadSpecificImageHeader(const char* filename, ImageHeader* image_header) {
+    std::unique_ptr<File> image_file(OS::OpenFileForReading(filename));
+    if (image_file.get() == nullptr) {
+      return false;
+    }
+    const bool success = image_file->ReadFully(image_header, sizeof(ImageHeader));
+    if (!success || !image_header->IsValid()) {
+      return false;
+    }
+    return true;
+}
+
+static int32_t ChooseRelocationOffsetDelta(int32_t min_delta, int32_t max_delta) {
+  CHECK_ALIGNED(min_delta, kPageSize);
+  CHECK_ALIGNED(max_delta, kPageSize);
+  CHECK_LT(min_delta, max_delta);
+
+  std::default_random_engine generator;
+  generator.seed(NanoTime() * getpid());
+  std::uniform_int_distribution<int32_t> distribution(min_delta, max_delta);
+  int32_t r = distribution(generator);
+  if (r % 2 == 0) {
+    r = RoundUp(r, kPageSize);
+  } else {
+    r = RoundDown(r, kPageSize);
+  }
+  CHECK_LE(min_delta, r);
+  CHECK_GE(max_delta, r);
+  CHECK_ALIGNED(r, kPageSize);
+  return r;
+}
+
+bool ImageSpace::RelocateImage(const char* image_location, const char* dest_filename,
+                               InstructionSet isa, std::string* error_msg) {
+  std::string patchoat(Runtime::Current()->GetPatchoatExecutable());
+
+  std::string input_image_location_arg("--input-image-location=");
+  input_image_location_arg += image_location;
+
+  std::string output_image_filename_arg("--output-image-file=");
+  output_image_filename_arg += dest_filename;
+
+  std::string input_oat_location_arg("--input-oat-location=");
+  input_oat_location_arg += ImageHeader::GetOatLocationFromImageLocation(image_location);
+
+  std::string output_oat_filename_arg("--output-oat-file=");
+  output_oat_filename_arg += ImageHeader::GetOatLocationFromImageLocation(dest_filename);
+
+  std::string instruction_set_arg("--instruction-set=");
+  instruction_set_arg += GetInstructionSetString(isa);
+
+  std::string base_offset_arg("--base-offset-delta=");
+  StringAppendF(&base_offset_arg, "%d", ChooseRelocationOffsetDelta(ART_BASE_ADDRESS_MIN_DELTA,
+                                                                    ART_BASE_ADDRESS_MAX_DELTA));
+
+  std::vector<std::string> argv;
+  argv.push_back(patchoat);
+
+  argv.push_back(input_image_location_arg);
+  argv.push_back(output_image_filename_arg);
+
+  argv.push_back(input_oat_location_arg);
+  argv.push_back(output_oat_filename_arg);
+
+  argv.push_back(instruction_set_arg);
+  argv.push_back(base_offset_arg);
+
+  std::string command_line(Join(argv, ' '));
+  LOG(INFO) << "RelocateImage: " << command_line;
+  return Exec(argv, error_msg);
+}
+
+static ImageHeader* ReadSpecificImageHeaderOrDie(const char* filename) {
+  std::unique_ptr<ImageHeader> hdr(new ImageHeader);
+  if (!ReadSpecificImageHeader(filename, hdr.get())) {
+    LOG(FATAL) << "Unable to read image header for " << filename;
+    return nullptr;
+  }
+  return hdr.release();
 }
 
 ImageHeader* ImageSpace::ReadImageHeaderOrDie(const char* image_location,
                                               const InstructionSet image_isa) {
-  std::string image_filename;
-  bool is_system = false;
-  if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) {
-    std::unique_ptr<File> image_file(OS::OpenFileForReading(image_filename.c_str()));
-    std::unique_ptr<ImageHeader> image_header(new ImageHeader);
-    const bool success = image_file->ReadFully(image_header.get(), sizeof(ImageHeader));
-    if (!success || !image_header->IsValid()) {
-      LOG(FATAL) << "Invalid Image header for: " << image_filename;
-      return nullptr;
+  std::string system_filename;
+  bool has_system = false;
+  std::string cache_filename;
+  bool has_cache = false;
+  bool dalvik_cache_exists = false;
+  if (FindImageFilename(image_location, image_isa, &system_filename, &has_system,
+                        &cache_filename, &dalvik_cache_exists, &has_cache)) {
+    if (Runtime::Current()->ShouldRelocate()) {
+      if (has_system && has_cache) {
+        std::unique_ptr<ImageHeader> sys_hdr(new ImageHeader);
+        std::unique_ptr<ImageHeader> cache_hdr(new ImageHeader);
+        if (!ReadSpecificImageHeader(system_filename.c_str(), sys_hdr.get())) {
+          LOG(FATAL) << "Unable to read image header for " << image_location << " at "
+                     << system_filename;
+          return nullptr;
+        }
+        if (!ReadSpecificImageHeader(cache_filename.c_str(), cache_hdr.get())) {
+          LOG(FATAL) << "Unable to read image header for " << image_location << " at "
+                     << cache_filename;
+          return nullptr;
+        }
+        if (sys_hdr->GetOatChecksum() != cache_hdr->GetOatChecksum()) {
+          LOG(FATAL) << "Unable to find a relocated version of image file " << image_location;
+          return nullptr;
+        }
+        return cache_hdr.release();
+      } else if (!has_cache) {
+        LOG(FATAL) << "Unable to find a relocated version of image file " << image_location;
+        return nullptr;
+      } else if (!has_system && has_cache) {
+        // This can probably just use the cache one.
+        return ReadSpecificImageHeaderOrDie(cache_filename.c_str());
+      }
+    } else {
+      // We don't want to relocate, Just pick the appropriate one if we have it and return.
+      if (has_system && has_cache) {
+        // We want the cache if the checksum matches, otherwise the system.
+        std::unique_ptr<ImageHeader> system(ReadSpecificImageHeaderOrDie(system_filename.c_str()));
+        std::unique_ptr<ImageHeader> cache(ReadSpecificImageHeaderOrDie(cache_filename.c_str()));
+        if (system.get() == nullptr ||
+            (cache.get() != nullptr && cache->GetOatChecksum() == system->GetOatChecksum())) {
+          return cache.release();
+        } else {
+          return system.release();
+        }
+      } else if (has_system) {
+        return ReadSpecificImageHeaderOrDie(system_filename.c_str());
+      } else if (has_cache) {
+        return ReadSpecificImageHeaderOrDie(cache_filename.c_str());
+      }
     }
-
-    return image_header.release();
   }
 
   LOG(FATAL) << "Unable to find image file for: " << image_location;
   return nullptr;
 }
 
+static bool ChecksumsMatch(const char* image_a, const char* image_b) {
+  ImageHeader hdr_a;
+  ImageHeader hdr_b;
+  return ReadSpecificImageHeader(image_a, &hdr_a) && ReadSpecificImageHeader(image_b, &hdr_b)
+      && hdr_a.GetOatChecksum() == hdr_b.GetOatChecksum();
+}
+
 ImageSpace* ImageSpace::Create(const char* image_location,
                                const InstructionSet image_isa) {
-  std::string image_filename;
   std::string error_msg;
-  bool is_system = false;
-  const bool found_image = FindImageFilename(image_location, image_isa, &image_filename,
-                                             &is_system);
+  std::string system_filename;
+  bool has_system = false;
+  std::string cache_filename;
+  bool has_cache = false;
+  bool dalvik_cache_exists = false;
+  const bool found_image = FindImageFilename(image_location, image_isa, &system_filename,
+                                             &has_system, &cache_filename, &dalvik_cache_exists,
+                                             &has_cache);
 
-  // Note that we must not use the file descriptor associated with
-  // ScopedFlock::GetFile to Init the image file. We want the file
-  // descriptor (and the associated exclusive lock) to be released when
-  // we leave Create.
-  ScopedFlock image_lock;
-  image_lock.Init(image_filename.c_str(), &error_msg);
-
+  ImageSpace* space;
+  bool relocate = Runtime::Current()->ShouldRelocate();
   if (found_image) {
-    ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, !is_system,
-                                         &error_msg);
+    const std::string* image_filename;
+    bool is_system = false;
+    bool relocated_version_used = false;
+    if (relocate) {
+      CHECK(dalvik_cache_exists) << "Requiring relocation for image " << image_location << " "
+                                 << "at " << system_filename << " but we do not have any "
+                                 << "dalvik_cache to find/place it in.";
+      if (has_system) {
+        if (has_cache && ChecksumsMatch(system_filename.c_str(), cache_filename.c_str())) {
+          // We already have a relocated version
+          image_filename = &cache_filename;
+          relocated_version_used = true;
+        } else {
+          // We cannot have a relocated version, Relocate the system one and use it.
+          if (RelocateImage(image_location, cache_filename.c_str(), image_isa,
+                            &error_msg)) {
+            relocated_version_used = true;
+            image_filename = &cache_filename;
+          } else {
+            LOG(FATAL) << "Unable to relocate image " << image_location << " "
+                       << "from " << system_filename << " to " << cache_filename << ": "
+                       << error_msg;
+            return nullptr;
+          }
+        }
+      } else {
+        CHECK(has_cache);
+        // We can just use cache's since it should be fine. This might or might not be relocated.
+        image_filename = &cache_filename;
+      }
+    } else {
+      if (has_system && has_cache) {
+        // Check they have the same cksum. If they do use the cache. Otherwise system.
+        if (ChecksumsMatch(system_filename.c_str(), cache_filename.c_str())) {
+          image_filename = &cache_filename;
+          relocated_version_used = true;
+        } else {
+          image_filename = &system_filename;
+        }
+      } else if (has_system) {
+        image_filename = &system_filename;
+      } else {
+        CHECK(has_cache);
+        image_filename = &cache_filename;
+      }
+    }
+    {
+      // Note that we must not use the file descriptor associated with
+      // ScopedFlock::GetFile to Init the image file. We want the file
+      // descriptor (and the associated exclusive lock) to be released when
+      // we leave Create.
+      ScopedFlock image_lock;
+      image_lock.Init(image_filename->c_str(), &error_msg);
+      LOG(INFO) << "Using image file " << image_filename->c_str() << " for image location "
+                << image_location;
+      space = ImageSpace::Init(image_filename->c_str(), image_location,
+                               false, &error_msg);
+    }
     if (space != nullptr) {
       return space;
     }
 
-    // If the /system file exists, it should be up-to-date, don't try to generate it.
-    // If it's not the /system file, log a warning and fall through to GenerateImage.
-    if (is_system) {
-      LOG(FATAL) << "Failed to load image '" << image_filename << "': " << error_msg;
+    // If the /system file exists, it should be up-to-date, don't try to generate it. Same if it is
+    // a relocated copy from something in /system (i.e. checksum's match).
+    // Otherwise, log a warning and fall through to GenerateImage.
+    if (relocated_version_used) {
+      LOG(FATAL) << "Attempted to use relocated version of " << image_location << " "
+                 << "at " << cache_filename << " generated from " << system_filename << " "
+                 << "but image failed to load: " << error_msg;
+      return nullptr;
+    } else if (is_system) {
+      LOG(FATAL) << "Failed to load /system image '" << *image_filename << "': " << error_msg;
       return nullptr;
     } else {
       LOG(WARNING) << error_msg;
     }
   }
 
-  CHECK(GenerateImage(image_filename, &error_msg))
-      << "Failed to generate image '" << image_filename << "': " << error_msg;
-  ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, true, &error_msg);
+  CHECK(dalvik_cache_exists) << "No place to put generated image.";
+  CHECK(GenerateImage(cache_filename, &error_msg))
+      << "Failed to generate image '" << cache_filename << "': " << error_msg;
+  // TODO Should I relocate this image? Sure
+  if (relocate) {
+    if (!RelocateImage(cache_filename.c_str(), cache_filename.c_str(), image_isa, &error_msg)) {
+      LOG(FATAL) << "Failed to relocate newly created image " << cache_filename.c_str();
+      return nullptr;
+    }
+  }
+  {
+    // Note that we must not use the file descriptor associated with
+    // ScopedFlock::GetFile to Init the image file. We want the file
+    // descriptor (and the associated exclusive lock) to be released when
+    // we leave Create.
+    ScopedFlock image_lock;
+    image_lock.Init(cache_filename.c_str(), &error_msg);
+    space = ImageSpace::Init(cache_filename.c_str(), image_location, true, &error_msg);
+  }
   if (space == nullptr) {
-    LOG(FATAL) << "Failed to load image '" << image_filename << "': " << error_msg;
+    LOG(FATAL) << "Failed to load generated image '" << cache_filename << "': " << error_msg;
   }
   return space;
 }
@@ -316,6 +536,15 @@
                               " in image %s", oat_checksum, image_oat_checksum, GetName());
     return nullptr;
   }
+  int32_t image_patch_delta = image_header.GetPatchDelta();
+  int32_t oat_patch_delta = oat_file->GetOatHeader().GetImagePatchDelta();
+  if (oat_patch_delta != image_patch_delta) {
+    // We should have already relocated by this point. Bail out.
+    *error_msg = StringPrintf("Failed to match oat file patch delta %d to expected patch delta %d "
+                              "in image %s", oat_patch_delta, image_patch_delta, GetName());
+    return nullptr;
+  }
+
   return oat_file;
 }
 
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index dd9b580..debca52 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -98,6 +98,20 @@
     return false;
   }
 
+  // Returns the filename of the image corresponding to
+  // requested image_location, or the filename where a new image
+  // should be written if one doesn't exist. Looks for a generated
+  // image in the specified location and then in the dalvik-cache.
+  //
+  // Returns true if an image was found, false otherwise.
+  static bool FindImageFilename(const char* image_location,
+                                InstructionSet image_isa,
+                                std::string* system_location,
+                                bool* has_system,
+                                std::string* data_location,
+                                bool* dalvik_cache_exists,
+                                bool* has_data);
+
  private:
   // Tries to initialize an ImageSpace from the given image path,
   // returning NULL on error.
@@ -110,16 +124,8 @@
                           bool validate_oat_file, std::string* error_msg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Returns the filename of the image corresponding to
-  // requested image_location, or the filename where a new image
-  // should be written if one doesn't exist. Looks for a generated
-  // image in the specified location and then in the dalvik-cache.
-  //
-  // Returns true if an image was found, false otherwise.
-  static bool FindImageFilename(const char* image_location,
-                                InstructionSet image_isa,
-                                std::string* location,
-                                bool* is_system);
+  static bool RelocateImage(const char* image_location, const char* dest_filename,
+                            InstructionSet isa, std::string* error_msg);
 
   OatFile* OpenOatFile(const char* image, std::string* error_msg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index abae8ff..d5a03c6 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -431,6 +431,11 @@
   return scc.freed;
 }
 
+void LargeObjectSpace::LogFragmentationAllocFailure(std::ostream& /*os*/,
+                                                    size_t /*failed_alloc_bytes*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 01982d0..b1c20ca 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -89,6 +89,9 @@
     return end_;
   }
 
+  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  protected:
   explicit LargeObjectSpace(const std::string& name, byte* begin, byte* end);
 
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 6f49fbf..a52b92b 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -133,8 +133,6 @@
     return can_move_objects_;
   }
 
-  virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0;
-
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
               byte* limit, size_t growth_limit, bool create_bitmaps, bool can_move_objects,
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 71c8eb5..523d4fe 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -223,6 +223,8 @@
   // threads, if the alloc space implementation uses any.
   virtual void RevokeAllThreadLocalBuffers() = 0;
 
+  virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0;
+
  protected:
   struct SweepCallbackContext {
     SweepCallbackContext(bool swap_bitmaps, space::Space* space);
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index fb3a12e..51d84f5 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -77,25 +77,30 @@
 
 mirror::Object* ZygoteSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
                                    size_t* usable_size) {
-  LOG(FATAL) << "Unimplemented";
+  UNIMPLEMENTED(FATAL);
   return nullptr;
 }
 
 size_t ZygoteSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) {
-  LOG(FATAL) << "Unimplemented";
+  UNIMPLEMENTED(FATAL);
   return 0;
 }
 
 size_t ZygoteSpace::Free(Thread* self, mirror::Object* ptr) {
-  LOG(FATAL) << "Unimplemented";
+  UNIMPLEMENTED(FATAL);
   return 0;
 }
 
 size_t ZygoteSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
-  LOG(FATAL) << "Unimplemented";
+  UNIMPLEMENTED(FATAL);
   return 0;
 }
 
+void ZygoteSpace::LogFragmentationAllocFailure(std::ostream& /*os*/,
+                                               size_t /*failed_alloc_bytes*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
 void ZygoteSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
   SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
   DCHECK(context->space->IsZygoteSpace());
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
index 5d5fe76..0cf4bb1 100644
--- a/runtime/gc/space/zygote_space.h
+++ b/runtime/gc/space/zygote_space.h
@@ -74,6 +74,9 @@
     return false;
   }
 
+  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  protected:
   virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() {
     return &SweepCallback;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 7e3b6ba..fd67197 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -151,7 +151,8 @@
 enum HprofHeapId {
   HPROF_HEAP_DEFAULT = 0,
   HPROF_HEAP_ZYGOTE = 'Z',
-  HPROF_HEAP_APP = 'A'
+  HPROF_HEAP_APP = 'A',
+  HPROF_HEAP_IMAGE = 'I',
 };
 
 enum HprofBasicType {
@@ -633,8 +634,12 @@
     // U1: NUL-terminated magic string.
     fwrite(magic, 1, sizeof(magic), header_fp_);
 
-    // U4: size of identifiers.  We're using addresses as IDs, so make sure a pointer fits.
-    U4_TO_BUF_BE(buf, 0, sizeof(void*));
+    // U4: size of identifiers.  We're using addresses as IDs and our heap references are stored
+    // as uint32_t.
+    // Note of warning: hprof-conv hard-codes the size of identifiers to 4.
+    COMPILE_ASSERT(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(uint32_t),
+      UnexpectedHeapReferenceSize);
+    U4_TO_BUF_BE(buf, 0, sizeof(uint32_t));
     fwrite(buf, 1, sizeof(uint32_t), header_fp_);
 
     // The current time, in milliseconds since 0:00 GMT, 1/1/70.
@@ -842,26 +847,37 @@
 
 int Hprof::DumpHeapObject(mirror::Object* obj) {
   HprofRecord* rec = &current_record_;
-  HprofHeapId desiredHeap = false ? HPROF_HEAP_ZYGOTE : HPROF_HEAP_APP;  // TODO: zygote objects?
-
+  gc::space::ContinuousSpace* space =
+      Runtime::Current()->GetHeap()->FindContinuousSpaceFromObject(obj, true);
+  HprofHeapId heap_type = HPROF_HEAP_APP;
+  if (space != nullptr) {
+    if (space->IsZygoteSpace()) {
+      heap_type = HPROF_HEAP_ZYGOTE;
+    } else if (space->IsImageSpace()) {
+      heap_type = HPROF_HEAP_IMAGE;
+    }
+  }
   if (objects_in_segment_ >= OBJECTS_PER_SEGMENT || rec->Size() >= BYTES_PER_SEGMENT) {
     StartNewHeapDumpSegment();
   }
 
-  if (desiredHeap != current_heap_) {
+  if (heap_type != current_heap_) {
     HprofStringId nameId;
 
     // This object is in a different heap than the current one.
     // Emit a HEAP_DUMP_INFO tag to change heaps.
     rec->AddU1(HPROF_HEAP_DUMP_INFO);
-    rec->AddU4((uint32_t)desiredHeap);   // uint32_t: heap id
-    switch (desiredHeap) {
+    rec->AddU4(static_cast<uint32_t>(heap_type));   // uint32_t: heap type
+    switch (heap_type) {
     case HPROF_HEAP_APP:
       nameId = LookupStringId("app");
       break;
     case HPROF_HEAP_ZYGOTE:
       nameId = LookupStringId("zygote");
       break;
+    case HPROF_HEAP_IMAGE:
+      nameId = LookupStringId("image");
+      break;
     default:
       // Internal error
       LOG(ERROR) << "Unexpected desiredHeap";
@@ -869,7 +885,7 @@
       break;
     }
     rec->AddStringId(nameId);
-    current_heap_ = desiredHeap;
+    current_heap_ = heap_type;
   }
 
   mirror::Class* c = obj->GetClass();
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index ad798ed..26ddba2 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -74,6 +74,7 @@
   table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
                                             PROT_READ | PROT_WRITE, false, &error_str));
   CHECK(table_mem_map_.get() != nullptr) << error_str;
+  CHECK_EQ(table_mem_map_->Size(), table_bytes);
 
   table_ = reinterpret_cast<mirror::Object**>(table_mem_map_->Begin());
   CHECK(table_ != nullptr);
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index 5b60396..d7e358c 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -83,6 +83,44 @@
   }
 }
 
+
+static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
+static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+
+// TODO: Lower once implicit stack-overflow checks can work with less than 16K.
+static constexpr size_t kArmStackOverflowReservedBytes =    (kIsDebugBuild ? 16 : 16) * KB;
+static constexpr size_t kArm64StackOverflowReservedBytes =  (kIsDebugBuild ? 16 : 16) * KB;
+static constexpr size_t kX86StackOverflowReservedBytes =    (kIsDebugBuild ? 16 : 16) * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = (kIsDebugBuild ? 16 : 16) * KB;
+
+size_t GetStackOverflowReservedBytes(InstructionSet isa) {
+  switch (isa) {
+    case kArm:      // Intentional fall-through.
+    case kThumb2:
+      return kArmStackOverflowReservedBytes;
+
+    case kArm64:
+      return kArm64StackOverflowReservedBytes;
+
+    case kMips:
+      return kMipsStackOverflowReservedBytes;
+
+    case kX86:
+      return kX86StackOverflowReservedBytes;
+
+    case kX86_64:
+      return kX86_64StackOverflowReservedBytes;
+
+    case kNone:
+      LOG(FATAL) << "kNone has no stack overflow size";
+      return 0;
+
+    default:
+      LOG(FATAL) << "Unknown instruction set" << isa;
+      return 0;
+  }
+}
+
 std::string InstructionSetFeatures::GetFeatureString() const {
   std::string result;
   if ((mask_ & kHwDiv) != 0) {
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index dce1c15..f212811 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -169,33 +169,7 @@
   }
 }
 
-static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
-static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
-static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
-
-// TODO: shrink reserved space, in particular for 64bit.
-
-// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
-// But this one works rather well.
-static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB;
-// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
-// test-art-host-run-test-interpreter-018-stack-overflow
-// test-art-host-run-test-interpreter-107-int-math2
-static constexpr size_t kX86StackOverflowReservedBytes = (kIsDebugBuild ? 32 : 24) * KB;
-static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB;
-
-static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) {
-  return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes :
-           isa == kArm64 ? kArm64StackOverflowReservedBytes :
-           isa == kMips ? kMipsStackOverflowReservedBytes :
-           isa == kX86 ? kX86StackOverflowReservedBytes :
-           isa == kX86_64 ? kX86_64StackOverflowReservedBytes :
-           isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) :
-           (LOG(FATAL) << "Unknown instruction set" << isa, 0);
-}
-
-static constexpr size_t kRuntimeStackOverflowReservedBytes =
-    GetStackOverflowReservedBytes(kRuntimeISA);
+size_t GetStackOverflowReservedBytes(InstructionSet isa);
 
 enum InstructionFeatures {
   kHwDiv  = 0x1,              // Supports hardware divide.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index f4eaa61..8e375cf 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -519,7 +519,7 @@
     bool empty;
     {
       ReaderMutexLock mu(self, deoptimized_methods_lock_);
-      empty = deoptimized_methods_.empty();  // Avoid lock violation.
+      empty = IsDeoptimizedMethodsEmpty();  // Avoid lock violation.
     }
     if (empty) {
       instrumentation_stubs_installed_ = false;
@@ -580,7 +580,7 @@
 }
 
 void Instrumentation::UpdateMethodsCode(mirror::ArtMethod* method, const void* quick_code,
-                                        const void* portable_code, bool have_portable_code) const {
+                                        const void* portable_code, bool have_portable_code) {
   const void* new_portable_code;
   const void* new_quick_code;
   bool new_have_portable_code;
@@ -617,20 +617,77 @@
   UpdateEntrypoints(method, new_quick_code, new_portable_code, new_have_portable_code);
 }
 
+bool Instrumentation::AddDeoptimizedMethod(mirror::ArtMethod* method) {
+  // Note that the insert() below isn't read barrier-aware. So, this
+  // FindDeoptimizedMethod() call is necessary or else we would end up
+  // storing the same method twice in the map (the from-space and the
+  // to-space ones).
+  if (FindDeoptimizedMethod(method)) {
+    // Already in the map. Return.
+    return false;
+  }
+  // Not found. Add it.
+  int32_t hash_code = method->IdentityHashCode();
+  deoptimized_methods_.insert(std::make_pair(hash_code, method));
+  return true;
+}
+
+bool Instrumentation::FindDeoptimizedMethod(mirror::ArtMethod* method) {
+  int32_t hash_code = method->IdentityHashCode();
+  auto range = deoptimized_methods_.equal_range(hash_code);
+  for (auto it = range.first; it != range.second; ++it) {
+    mirror::ArtMethod** root = &it->second;
+    mirror::ArtMethod* m = ReadBarrier::BarrierForRoot<mirror::ArtMethod>(root);
+    if (m == method) {
+      // Found.
+      return true;
+    }
+  }
+  // Not found.
+  return false;
+}
+
+mirror::ArtMethod* Instrumentation::BeginDeoptimizedMethod() {
+  auto it = deoptimized_methods_.begin();
+  if (it == deoptimized_methods_.end()) {
+    // Empty.
+    return nullptr;
+  }
+  mirror::ArtMethod** root = &it->second;
+  return ReadBarrier::BarrierForRoot<mirror::ArtMethod>(root);
+}
+
+bool Instrumentation::RemoveDeoptimizedMethod(mirror::ArtMethod* method) {
+  int32_t hash_code = method->IdentityHashCode();
+  auto range = deoptimized_methods_.equal_range(hash_code);
+  for (auto it = range.first; it != range.second; ++it) {
+    mirror::ArtMethod** root = &it->second;
+    mirror::ArtMethod* m = ReadBarrier::BarrierForRoot<mirror::ArtMethod>(root);
+    if (m == method) {
+      // Found. Erase and return.
+      deoptimized_methods_.erase(it);
+      return true;
+    }
+  }
+  // Not found.
+  return false;
+}
+
+bool Instrumentation::IsDeoptimizedMethodsEmpty() const {
+  return deoptimized_methods_.empty();
+}
+
 void Instrumentation::Deoptimize(mirror::ArtMethod* method) {
   CHECK(!method->IsNative());
   CHECK(!method->IsProxyMethod());
   CHECK(!method->IsAbstract());
 
   Thread* self = Thread::Current();
-  std::pair<std::set<mirror::ArtMethod*>::iterator, bool> pair;
   {
     WriterMutexLock mu(self, deoptimized_methods_lock_);
-    pair = deoptimized_methods_.insert(method);
+    bool has_not_been_deoptimized = AddDeoptimizedMethod(method);
+    CHECK(has_not_been_deoptimized) << "Method " << PrettyMethod(method) << " is already deoptimized";
   }
-  bool already_deoptimized = !pair.second;
-  CHECK(!already_deoptimized) << "Method " << PrettyMethod(method) << " is already deoptimized";
-
   if (!interpreter_stubs_installed_) {
     UpdateEntrypoints(method, GetQuickInstrumentationEntryPoint(), GetPortableToInterpreterBridge(),
                       false);
@@ -652,11 +709,10 @@
   bool empty;
   {
     WriterMutexLock mu(self, deoptimized_methods_lock_);
-    auto it = deoptimized_methods_.find(method);
-    CHECK(it != deoptimized_methods_.end()) << "Method " << PrettyMethod(method)
+    bool found_and_erased = RemoveDeoptimizedMethod(method);
+    CHECK(found_and_erased) << "Method " << PrettyMethod(method)
         << " is not deoptimized";
-    deoptimized_methods_.erase(it);
-    empty = deoptimized_methods_.empty();
+    empty = IsDeoptimizedMethodsEmpty();
   }
 
   // Restore code and possibly stack only if we did not deoptimize everything.
@@ -684,15 +740,15 @@
   }
 }
 
-bool Instrumentation::IsDeoptimized(mirror::ArtMethod* method) const {
-  ReaderMutexLock mu(Thread::Current(), deoptimized_methods_lock_);
+bool Instrumentation::IsDeoptimized(mirror::ArtMethod* method) {
   DCHECK(method != nullptr);
-  return deoptimized_methods_.find(method) != deoptimized_methods_.end();
+  ReaderMutexLock mu(Thread::Current(), deoptimized_methods_lock_);
+  return FindDeoptimizedMethod(method);
 }
 
 void Instrumentation::EnableDeoptimization() {
   ReaderMutexLock mu(Thread::Current(), deoptimized_methods_lock_);
-  CHECK(deoptimized_methods_.empty());
+  CHECK(IsDeoptimizedMethodsEmpty());
   CHECK_EQ(deoptimization_enabled_, false);
   deoptimization_enabled_ = true;
 }
@@ -708,10 +764,11 @@
     mirror::ArtMethod* method;
     {
       ReaderMutexLock mu(Thread::Current(), deoptimized_methods_lock_);
-      if (deoptimized_methods_.empty()) {
+      if (IsDeoptimizedMethodsEmpty()) {
         break;
       }
-      method = *deoptimized_methods_.begin();
+      method = BeginDeoptimizedMethod();
+      CHECK(method != nullptr);
     }
     Undeoptimize(method);
   }
@@ -963,16 +1020,13 @@
 
 void Instrumentation::VisitRoots(RootCallback* callback, void* arg) {
   WriterMutexLock mu(Thread::Current(), deoptimized_methods_lock_);
-  if (deoptimized_methods_.empty()) {
+  if (IsDeoptimizedMethodsEmpty()) {
     return;
   }
-  std::set<mirror::ArtMethod*> new_deoptimized_methods;
-  for (mirror::ArtMethod* method : deoptimized_methods_) {
-    DCHECK(method != nullptr);
-    callback(reinterpret_cast<mirror::Object**>(&method), arg, 0, kRootVMInternal);
-    new_deoptimized_methods.insert(method);
+  for (auto pair : deoptimized_methods_) {
+    mirror::ArtMethod** root = &pair.second;
+    callback(reinterpret_cast<mirror::Object**>(root), arg, 0, kRootVMInternal);
   }
-  deoptimized_methods_ = new_deoptimized_methods;
 }
 
 std::string InstrumentationStackFrame::Dump() const {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index d0cb4de..cabb0e9 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -18,8 +18,8 @@
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
 #include <stdint.h>
-#include <set>
 #include <list>
+#include <map>
 
 #include "atomic.h"
 #include "instruction_set.h"
@@ -162,7 +162,9 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_, deoptimized_methods_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsDeoptimized(mirror::ArtMethod* method) const LOCKS_EXCLUDED(deoptimized_methods_lock_);
+  bool IsDeoptimized(mirror::ArtMethod* method)
+      LOCKS_EXCLUDED(deoptimized_methods_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Enable method tracing by installing instrumentation entry/exit stubs.
   void EnableMethodTracing()
@@ -186,7 +188,7 @@
 
   // Update the code of a method respecting any installed stubs.
   void UpdateMethodsCode(mirror::ArtMethod* method, const void* quick_code,
-                         const void* portable_code, bool have_portable_code) const
+                         const void* portable_code, bool have_portable_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get the quick code for the given method. More efficient than asking the class linker as it
@@ -367,6 +369,23 @@
                            mirror::ArtField* field, const JValue& field_value) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Read barrier-aware utility functions for accessing deoptimized_methods_
+  bool AddDeoptimizedMethod(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(deoptimized_methods_lock_);
+  bool FindDeoptimizedMethod(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(deoptimized_methods_lock_);
+  bool RemoveDeoptimizedMethod(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(deoptimized_methods_lock_);
+  mirror::ArtMethod* BeginDeoptimizedMethod()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(deoptimized_methods_lock_);
+  bool IsDeoptimizedMethodsEmpty() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(deoptimized_methods_lock_);
+
   // Have we hijacked ArtMethod::code_ so that it calls instrumentation/interpreter code?
   bool instrumentation_stubs_installed_;
 
@@ -421,7 +440,7 @@
   // The set of methods being deoptimized (by the debugger) which must be executed with interpreter
   // only.
   mutable ReaderWriterMutex deoptimized_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::set<mirror::ArtMethod*> deoptimized_methods_ GUARDED_BY(deoptimized_methods_lock_);
+  std::multimap<int32_t, mirror::ArtMethod*> deoptimized_methods_ GUARDED_BY(deoptimized_methods_lock_);
   bool deoptimization_enabled_;
 
   // Current interpreter handler table. This is updated each time the thread state flags are
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index e3068b3..47a7f0d 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -397,7 +397,8 @@
 void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receiver,
                                 uint32_t* args, JValue* result) {
   DCHECK_EQ(self, Thread::Current());
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
+  bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
+  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
     ThrowStackOverflowError(self);
     return;
   }
@@ -509,7 +510,8 @@
 JValue EnterInterpreterFromStub(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
                                 ShadowFrame& shadow_frame) {
   DCHECK_EQ(self, Thread::Current());
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
+  bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
+  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
     ThrowStackOverflowError(self);
     return JValue();
   }
@@ -520,7 +522,8 @@
 extern "C" void artInterpreterToInterpreterBridge(Thread* self, MethodHelper& mh,
                                                   const DexFile::CodeItem* code_item,
                                                   ShadowFrame* shadow_frame, JValue* result) {
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
+  bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
+  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
     ThrowStackOverflowError(self);
     return;
   }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 1bcd27e..5a1d01e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -140,7 +140,8 @@
     return false;
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  ArtMethod* const method = receiver->GetClass()->GetVTable()->GetWithoutChecks(vtable_idx);
+  CHECK(receiver->GetClass()->ShouldHaveEmbeddedImtAndVTable());
+  ArtMethod* const method = receiver->GetClass()->GetEmbeddedVTableEntry(vtable_idx);
   if (UNLIKELY(method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index 325b089..8fd07cc 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -339,7 +339,7 @@
   ConditionVariable attach_cond_ GUARDED_BY(attach_lock_);
 
   // Time of last debugger activity, in milliseconds.
-  int64_t last_activity_time_ms_;
+  Atomic<int64_t> last_activity_time_ms_;
 
   // Global counters and a mutex to protect them.
   AtomicInteger request_serial_;
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 05bfe0d..b9379f5 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -1671,7 +1671,7 @@
      * so waitForDebugger() doesn't return if we stall for a bit here.
      */
     Dbg::GoActive();
-    QuasiAtomic::Write64(&last_activity_time_ms_, 0);
+    last_activity_time_ms_.StoreSequentiallyConsistent(0);
   }
 
   /*
@@ -1751,7 +1751,7 @@
    * the initial setup.  Only update if this is a non-DDMS packet.
    */
   if (request.GetCommandSet() != kJDWPDdmCmdSet) {
-    QuasiAtomic::Write64(&last_activity_time_ms_, MilliTime());
+    last_activity_time_ms_.StoreSequentiallyConsistent(MilliTime());
   }
 
   /* tell the VM that GC is okay again */
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 64e9f37..7795b7c 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -577,7 +577,7 @@
     return -1;
   }
 
-  int64_t last = QuasiAtomic::Read64(&last_activity_time_ms_);
+  int64_t last = last_activity_time_ms_.LoadSequentiallyConsistent();
 
   /* initializing or in the middle of something? */
   if (last == 0) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 167f848..211ba1d 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -130,12 +130,11 @@
   Class* declaring_class = GetDeclaringClass();
   Class* super_class = declaring_class->GetSuperClass();
   uint16_t method_index = GetMethodIndex();
-  ObjectArray<ArtMethod>* super_class_vtable = super_class->GetVTable();
   ArtMethod* result = NULL;
   // Did this method override a super class method? If so load the result from the super class'
   // vtable
-  if (super_class_vtable != NULL && method_index < super_class_vtable->GetLength()) {
-    result = super_class_vtable->Get(method_index);
+  if (super_class->HasVTable() && method_index < super_class->GetVTableLength()) {
+    result = super_class->GetVTableEntry(method_index);
   } else {
     // Method didn't override superclass method so search interfaces
     if (IsProxyMethod()) {
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 329a984..c3754d7 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -161,6 +161,37 @@
   CHECK(method == GetImTable()->Get(i));
 }
 
+inline bool Class::HasVTable() {
+  return (GetVTable() != nullptr) || ShouldHaveEmbeddedImtAndVTable();
+}
+
+inline int32_t Class::GetVTableLength() {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
+    return GetEmbeddedVTableLength();
+  }
+  return (GetVTable() != nullptr) ? GetVTable()->GetLength() : 0;
+}
+
+inline ArtMethod* Class::GetVTableEntry(uint32_t i) {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
+    return GetEmbeddedVTableEntry(i);
+  }
+  return (GetVTable() != nullptr) ? GetVTable()->Get(i) : nullptr;
+}
+
+inline int32_t Class::GetEmbeddedVTableLength() {
+  return GetField32(EmbeddedVTableLengthOffset());
+}
+
+inline void Class::SetEmbeddedVTableLength(int32_t len) {
+  SetField32<false>(EmbeddedVTableLengthOffset(), len);
+}
+
+inline ArtMethod* Class::GetEmbeddedVTableEntry(uint32_t i) {
+  uint32_t offset = EmbeddedVTableOffset().Uint32Value() + i * sizeof(VTableEntry);
+  return GetFieldObject<mirror::ArtMethod>(MemberOffset(offset));
+}
+
 inline void Class::SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method) {
   uint32_t offset = EmbeddedVTableOffset().Uint32Value() + i * sizeof(VTableEntry);
   SetFieldObject<false>(MemberOffset(offset), method);
@@ -340,12 +371,12 @@
   DCHECK(!method->GetDeclaringClass()->IsInterface() || method->IsMiranda());
   // The argument method may from a super class.
   // Use the index to a potentially overridden one for this instance's class.
-  return GetVTable()->Get(method->GetMethodIndex());
+  return GetVTableEntry(method->GetMethodIndex());
 }
 
 inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method) {
   DCHECK(!method->GetDeclaringClass()->IsInterface());
-  return GetSuperClass()->GetVTable()->Get(method->GetMethodIndex());
+  return GetSuperClass()->GetVTableEntry(method->GetMethodIndex());
 }
 
 inline ArtMethod* Class::FindVirtualMethodForVirtualOrInterface(ArtMethod* method) {
@@ -534,13 +565,19 @@
   if (has_embedded_tables) {
     uint32_t embedded_imt_size = kImtSize * sizeof(ImTableEntry);
     uint32_t embedded_vtable_size = num_vtable_entries * sizeof(VTableEntry);
-    size += embedded_imt_size + embedded_vtable_size;
+    size += embedded_imt_size +
+            sizeof(int32_t) /* vtable len */ +
+            embedded_vtable_size;
   }
   // Space used by reference statics.
   size +=  num_ref_static_fields * sizeof(HeapReference<Object>);
   // Possible pad for alignment.
-  if (((size & 7) != 0) && (num_64bit_static_fields > 0) && (num_32bit_static_fields == 0)) {
+  if (((size & 7) != 0) && (num_64bit_static_fields > 0)) {
     size += sizeof(uint32_t);
+    if (num_32bit_static_fields != 0) {
+      // Shuffle one 32 bit static field forward.
+      num_32bit_static_fields--;
+    }
   }
   // Space used for primitive static fields.
   size += (num_32bit_static_fields * sizeof(uint32_t)) +
@@ -574,7 +611,10 @@
     pos += sizeof(ImTableEntry);
   }
 
-  count = ((GetVTable() != NULL) ? GetVTable()->GetLength() : 0);
+  // Skip vtable length.
+  pos += sizeof(int32_t);
+
+  count = GetEmbeddedVTableLength();
   for (size_t i = 0; i < count; ++i) {
     MemberOffset offset = MemberOffset(pos);
     visitor(this, offset, true);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index fadf80e..a218b1c 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -827,19 +827,56 @@
   }
 
   table = GetVTableDuringLinking();
-  CHECK(table != nullptr);
+  CHECK(table != nullptr) << PrettyClass(this);
+  SetEmbeddedVTableLength(table->GetLength());
   for (int32_t i = 0; i < table->GetLength(); i++) {
     SetEmbeddedVTableEntry(i, table->Get(i));
   }
+
+  SetImTable(nullptr);
+  // Keep java.lang.Object class's vtable around for since it's easier
+  // to be reused by array classes during their linking.
+  if (!IsObjectClass()) {
+    SetVTable(nullptr);
+  }
 }
 
+// The pre-fence visitor for Class::CopyOf().
+class CopyClassVisitor {
+ public:
+  explicit CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig,
+                            size_t new_length, size_t copy_bytes)
+      : self_(self), orig_(orig), new_length_(new_length),
+        copy_bytes_(copy_bytes) {
+  }
+
+  void operator()(Object* obj, size_t usable_size) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    UNUSED(usable_size);
+    mirror::Class* new_class_obj = obj->AsClass();
+    mirror::Object::CopyObject(self_, new_class_obj, orig_->Get(), copy_bytes_);
+    new_class_obj->SetStatus(Class::kStatusResolving, self_);
+    new_class_obj->PopulateEmbeddedImtAndVTable();
+    new_class_obj->SetClassSize(new_length_);
+  }
+
+ private:
+  Thread* const self_;
+  Handle<mirror::Class>* const orig_;
+  const size_t new_length_;
+  const size_t copy_bytes_;
+  DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
+};
+
 Class* Class::CopyOf(Thread* self, int32_t new_length) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> h_this(hs.NewHandle(this));
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  InitializeClassVisitor visitor(new_length);
+  // The num_bytes (3rd param) is sizeof(Class) as opposed to SizeOf()
+  // to skip copying the tail part that we will overwrite here.
+  CopyClassVisitor visitor(self, &h_this, new_length, sizeof(Class));
 
   mirror::Object* new_class =
       kMovingClasses ? heap->AllocObject<true>(self, java_lang_Class_, new_length, visitor)
@@ -849,17 +886,7 @@
     return NULL;
   }
 
-  mirror::Class* new_class_obj = new_class->AsClass();
-  memcpy(new_class_obj, h_this.Get(), sizeof(Class));
-
-  new_class_obj->SetStatus(kStatusResolving, self);
-  new_class_obj->PopulateEmbeddedImtAndVTable();
-  // Correct some fields.
-  new_class_obj->SetLockWord(LockWord(), false);
-  new_class_obj->SetClassSize(new_length);
-
-  Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(new_class_obj);
-  return new_class_obj;
+  return new_class->AsClass();
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 648bdde..0525abf 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -692,18 +692,34 @@
     return MemberOffset(sizeof(Class));
   }
 
-  static MemberOffset EmbeddedVTableOffset() {
+  static MemberOffset EmbeddedVTableLengthOffset() {
     return MemberOffset(sizeof(Class) + kImtSize * sizeof(mirror::Class::ImTableEntry));
   }
 
+  static MemberOffset EmbeddedVTableOffset() {
+    return MemberOffset(sizeof(Class) + kImtSize * sizeof(mirror::Class::ImTableEntry) + sizeof(int32_t));
+  }
+
   bool ShouldHaveEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return IsInstantiable();
   }
 
+  bool HasVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ArtMethod* GetEmbeddedImTableEntry(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  int32_t GetVTableLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* GetVTableEntry(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  int32_t GetEmbeddedVTableLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetEmbeddedVTableLength(int32_t len) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ArtMethod* GetEmbeddedVTableEntry(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void SetEmbeddedVTableEntry(uint32_t i, ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PopulateEmbeddedImtAndVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 961bc64..3543654 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -65,8 +65,8 @@
   Object* const dest_obj_;
 };
 
-static Object* CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src, size_t num_bytes)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+Object* Object::CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src,
+                           size_t num_bytes) {
   // Copy instance data.  We assume memcpy copies by words.
   // TODO: expose and use move32.
   byte* src_bytes = reinterpret_cast<byte*>(src);
@@ -107,7 +107,7 @@
   void operator()(Object* obj, size_t usable_size) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     UNUSED(usable_size);
-    CopyObject(self_, obj, orig_->Get(), num_bytes_);
+    Object::CopyObject(self_, obj, orig_->Get(), num_bytes_);
   }
 
  private:
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 4fae470..a6b6227 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -370,6 +370,13 @@
   // Generate an identity hash code.
   static int32_t GenerateIdentityHashCode();
 
+  // A utility function that copies an object in a read barrier and
+  // write barrier-aware way. This is internally used by Clone() and
+  // Class::CopyOf().
+  static Object* CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src,
+                            size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // The Class representing the type of the object.
   HeapReference<Class> klass_;
   // Monitor and hash code information.
@@ -386,6 +393,8 @@
   friend class art::ImageWriter;
   friend class art::Monitor;
   friend struct art::ObjectOffsets;  // for verifying offset information
+  friend class CopyObjectVisitor;  // for CopyObject().
+  friend class CopyClassVisitor;   // for CopyObject().
   DISALLOW_IMPLICIT_CONSTRUCTORS(Object);
 };
 
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 4b26eda..aee3086 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -680,6 +680,8 @@
     Thread* owner;
     {
       ScopedThreadStateChange tsc(self, kBlocked);
+      // Take suspend thread lock to avoid races with threads trying to suspend this one.
+      MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
       owner = thread_list->SuspendThreadByThreadId(owner_thread_id, false, &timed_out);
     }
     if (owner != nullptr) {
@@ -745,10 +747,10 @@
           contention_count++;
           Runtime* runtime = Runtime::Current();
           if (contention_count <= runtime->GetMaxSpinsBeforeThinkLockInflation()) {
-            // TODO: Consider switch thread state to kBlocked when we are yielding.
+            // TODO: Consider switching the thread state to kBlocked when we are yielding.
             // Use sched_yield instead of NanoSleep since NanoSleep can wait much longer than the
             // parameter you pass in. This can cause thread suspension to take excessively long
-            // make long pauses. See b/16307460.
+            // and make long pauses. See b/16307460.
             sched_yield();
           } else {
             contention_count = 0;
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index cf31064..5f718ba 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -35,7 +35,12 @@
     // Suspend thread to build stack trace.
     soa.Self()->TransitionFromRunnableToSuspended(kNative);
     bool timed_out;
-    Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+    Thread* thread;
+    {
+      // Take suspend thread lock to avoid races with threads trying to suspend this one.
+      MutexLock mu(soa.Self(), *Locks::thread_list_suspend_thread_lock_);
+      thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+    }
     if (thread != nullptr) {
       // Must be runnable to create returned array.
       CHECK_EQ(soa.Self()->TransitionFromSuspendedToRunnable(), kNative);
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index e577c2c..124bdf5 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -71,7 +71,10 @@
     jthrowable cnfe = reinterpret_cast<jthrowable>(env->NewObject(WellKnownClasses::java_lang_ClassNotFoundException,
                                                                   WellKnownClasses::java_lang_ClassNotFoundException_init,
                                                                   javaName, cause.get()));
-    env->Throw(cnfe);
+    if (cnfe != nullptr) {
+      // Make sure allocation didn't fail with an OOME.
+      env->Throw(cnfe);
+    }
     return nullptr;
   }
   if (initialize) {
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index bae67f2..8f83f96 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -116,18 +116,25 @@
 
 static void Thread_nativeSetName(JNIEnv* env, jobject peer, jstring java_name) {
   ScopedUtfChars name(env, java_name);
+  Thread* self;
   {
     ScopedObjectAccess soa(env);
     if (soa.Decode<mirror::Object*>(peer) == soa.Self()->GetPeer()) {
       soa.Self()->SetThreadName(name.c_str());
       return;
     }
+    self = soa.Self();
   }
   // Suspend thread to avoid it from killing itself while we set its name. We don't just hold the
   // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
   // in the DDMS send code.
   bool timed_out;
-  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+  // Take suspend thread lock to avoid races with threads trying to suspend this one.
+  Thread* thread;
+  {
+    MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
+    thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+  }
   if (thread != NULL) {
     {
       ScopedObjectAccess soa(env);
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index e17e60a..45ef9ae 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -61,7 +61,12 @@
     }
 
     // Suspend thread to build stack trace.
-    Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
+    Thread* thread;
+    {
+      // Take suspend thread lock to avoid races with threads trying to suspend this one.
+      MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
+      thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
+    }
     if (thread != nullptr) {
       {
         ScopedObjectAccess soa(env);
diff --git a/runtime/noop_compiler_callbacks.h b/runtime/noop_compiler_callbacks.h
index 65498de..e9ad353 100644
--- a/runtime/noop_compiler_callbacks.h
+++ b/runtime/noop_compiler_callbacks.h
@@ -32,6 +32,11 @@
 
   void ClassRejected(ClassReference ref) OVERRIDE {}
 
+  // This is only used by compilers which need to be able to run without relocation even when it
+  // would normally be enabled. For example the patchoat executable, and dex2oat --image, both need
+  // to disable the relocation since both deal with writing out the images directly.
+  bool IsRelocationPossible() OVERRIDE { return false; }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(NoopCompilerCallbacks);
 };
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 1421baf..0a8c35b 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '7', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '8', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
@@ -67,6 +67,8 @@
                      const SafeMap<std::string, std::string>* variable_data) {
   memcpy(magic_, kOatMagic, sizeof(kOatMagic));
   memcpy(version_, kOatVersion, sizeof(kOatVersion));
+  executable_offset_ = 0;
+  image_patch_delta_ = 0;
 
   adler32_checksum_ = adler32(0L, Z_NULL, 0);
 
@@ -98,7 +100,6 @@
     UpdateChecksum(&key_value_store_, key_value_store_size_);
   }
 
-  executable_offset_ = 0;
   interpreter_to_interpreter_bridge_offset_ = 0;
   interpreter_to_compiled_code_bridge_offset_ = 0;
   jni_dlsym_lookup_offset_ = 0;
@@ -118,6 +119,12 @@
   if (memcmp(version_, kOatVersion, sizeof(kOatVersion)) != 0) {
     return false;
   }
+  if (!IsAligned<kPageSize>(executable_offset_)) {
+    return false;
+  }
+  if (!IsAligned<kPageSize>(image_patch_delta_)) {
+    return false;
+  }
   return true;
 }
 
@@ -355,6 +362,26 @@
   UpdateChecksum(&quick_to_interpreter_bridge_offset_, sizeof(offset));
 }
 
+int32_t OatHeader::GetImagePatchDelta() const {
+  CHECK(IsValid());
+  return image_patch_delta_;
+}
+
+void OatHeader::RelocateOat(off_t delta) {
+  CHECK(IsValid());
+  CHECK_ALIGNED(delta, kPageSize);
+  image_patch_delta_ += delta;
+  if (image_file_location_oat_data_begin_ != 0) {
+    image_file_location_oat_data_begin_ += delta;
+  }
+}
+
+void OatHeader::SetImagePatchDelta(int32_t off) {
+  CHECK(IsValid());
+  CHECK_ALIGNED(off, kPageSize);
+  image_patch_delta_ = off;
+}
+
 uint32_t OatHeader::GetImageFileLocationOatChecksum() const {
   CHECK(IsValid());
   return image_file_location_oat_checksum_;
diff --git a/runtime/oat.h b/runtime/oat.h
index fbed596..6d5fefe 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -88,6 +88,10 @@
   uint32_t GetQuickToInterpreterBridgeOffset() const;
   void SetQuickToInterpreterBridgeOffset(uint32_t offset);
 
+  int32_t GetImagePatchDelta() const;
+  void RelocateOat(off_t delta);
+  void SetImagePatchDelta(int32_t off);
+
   InstructionSet GetInstructionSet() const;
   const InstructionSetFeatures& GetInstructionSetFeatures() const;
   uint32_t GetImageFileLocationOatChecksum() const;
@@ -129,6 +133,9 @@
   uint32_t quick_resolution_trampoline_offset_;
   uint32_t quick_to_interpreter_bridge_offset_;
 
+  // The amount that the image this oat is associated with has been patched.
+  int32_t image_patch_delta_;
+
   uint32_t image_file_location_oat_checksum_;
   uint32_t image_file_location_oat_data_begin_;
 
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 86c1bae..f9cc36a 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -87,6 +87,11 @@
   return OpenElfFile(file, location, NULL, true, false, error_msg);
 }
 
+OatFile* OatFile::OpenReadable(File* file, const std::string& location, std::string* error_msg) {
+  CheckLocation(location);
+  return OpenElfFile(file, location, NULL, false, false, error_msg);
+}
+
 OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
                              const std::string& location,
                              byte* requested_base,
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 44f4466..3ec2e84 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -52,6 +52,8 @@
   // ImageWriter which wants to open a writable version from an existing
   // file descriptor for patching.
   static OatFile* OpenWritable(File* file, const std::string& location, std::string* error_msg);
+  // Opens an oat file from an already opened File. Maps it PROT_READ, MAP_PRIVATE.
+  static OatFile* OpenReadable(File* file, const std::string& location, std::string* error_msg);
 
   // Open an oat file backed by a std::vector with the given location.
   static OatFile* OpenMemory(std::vector<uint8_t>& oat_contents,
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 9a1d0f7..8d0aff8 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -221,6 +221,7 @@
 
   compiler_callbacks_ = nullptr;
   is_zygote_ = false;
+  must_relocate_ = kDefaultMustRelocate;
   if (kPoisonHeapReferences) {
     // kPoisonHeapReferences currently works only with the interpreter only.
     // TODO: make it work with the compiler.
@@ -390,6 +391,7 @@
       ignore_max_footprint_ = true;
     } else if (option == "-XX:LowMemoryMode") {
       low_memory_mode_ = true;
+      // TODO Might want to turn off must_relocate here.
     } else if (option == "-XX:UseTLAB") {
       use_tlab_ = true;
     } else if (option == "-XX:EnableHSpaceCompactForOOM") {
@@ -408,6 +410,14 @@
           reinterpret_cast<const char*>(options[i].second));
     } else if (option == "-Xzygote") {
       is_zygote_ = true;
+    } else if (StartsWith(option, "-Xpatchoat:")) {
+      if (!ParseStringAfterChar(option, ':', &patchoat_executable_)) {
+        return false;
+      }
+    } else if (option == "-Xrelocate") {
+      must_relocate_ = true;
+    } else if (option == "-Xnorelocate") {
+      must_relocate_ = false;
     } else if (option == "-Xint") {
       interpreter_only_ = true;
     } else if (StartsWith(option, "-Xgc:")) {
@@ -758,6 +768,8 @@
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
+  UsageMessage(stream, "  -Xpatchoat:filename\n");
+  UsageMessage(stream, "  -X[no]relocate\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 23f2bcf..29d5494 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -47,6 +47,10 @@
   std::string jni_trace_;
   CompilerCallbacks* compiler_callbacks_;
   bool is_zygote_;
+  // TODO Change this to true when we want it on by default.
+  static constexpr bool kDefaultMustRelocate = false;
+  bool must_relocate_;
+  std::string patchoat_executable_;
   bool interpreter_only_;
   bool is_explicit_gc_disabled_;
   bool use_tlab_;
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index d2877f9..db98e1f 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -17,7 +17,7 @@
 #include "reference_table.h"
 
 #include "common_runtime_test.h"
-#include "mirror/array.h"
+#include "mirror/array-inl.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index aca2607..b7eae85 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -101,6 +101,7 @@
       instruction_set_(kNone),
       compiler_callbacks_(nullptr),
       is_zygote_(false),
+      must_relocate_(false),
       is_concurrent_gc_enabled_(true),
       is_explicit_gc_disabled_(false),
       default_stack_size_(0),
@@ -385,6 +386,15 @@
   return env->NewGlobalRef(system_class_loader.get());
 }
 
+std::string Runtime::GetPatchoatExecutable() const {
+  if (!patchoat_executable_.empty()) {
+    return patchoat_executable_;
+  }
+  std::string patchoat_executable_(GetAndroidRoot());
+  patchoat_executable_ += (kIsDebugBuild ? "/bin/patchoatd" : "/bin/patchoat");
+  return patchoat_executable_;
+}
+
 std::string Runtime::GetCompilerExecutable() const {
   if (!compiler_executable_.empty()) {
     return compiler_executable_;
@@ -557,6 +567,8 @@
   properties_ = options->properties_;
 
   compiler_callbacks_ = options->compiler_callbacks_;
+  patchoat_executable_ = options->patchoat_executable_;
+  must_relocate_ = options->must_relocate_;
   is_zygote_ = options->is_zygote_;
   is_explicit_gc_disabled_ = options->is_explicit_gc_disabled_;
 
@@ -629,8 +641,7 @@
       break;
   }
 
-  if (!options->interpreter_only_ &&
-    (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_)) {
+  if (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_) {
     fault_manager.Init();
 
     // These need to be in a specific order.  The null point check handler must be
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 284e4ff..c8e462e 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -21,10 +21,12 @@
 #include <stdio.h>
 
 #include <iosfwd>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
 
+#include "compiler_callbacks.h"
 #include "instrumentation.h"
 #include "instruction_set.h"
 #include "jobject_comparator.h"
@@ -54,7 +56,6 @@
 class MethodVerifier;
 }
 class ClassLinker;
-class CompilerCallbacks;
 class DexFile;
 class InternTable;
 class JavaVMExt;
@@ -91,6 +92,18 @@
     return compiler_callbacks_ != nullptr;
   }
 
+  bool CanRelocate() const {
+    return !IsCompiler() || compiler_callbacks_->IsRelocationPossible();
+  }
+
+  bool ShouldRelocate() const {
+    return must_relocate_ && CanRelocate();
+  }
+
+  bool MustRelocateIfPossible() const {
+    return must_relocate_;
+  }
+
   CompilerCallbacks* GetCompilerCallbacks() {
     return compiler_callbacks_;
   }
@@ -104,6 +117,7 @@
   }
 
   std::string GetCompilerExecutable() const;
+  std::string GetPatchoatExecutable() const;
 
   const std::vector<std::string>& GetCompilerOptions() const {
     return compiler_options_;
@@ -173,7 +187,7 @@
   void DetachCurrentThread() LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DumpLockHolders(std::ostream& os);
 
   ~Runtime();
@@ -485,10 +499,12 @@
 
   CompilerCallbacks* compiler_callbacks_;
   bool is_zygote_;
+  bool must_relocate_;
   bool is_concurrent_gc_enabled_;
   bool is_explicit_gc_disabled_;
 
   std::string compiler_executable_;
+  std::string patchoat_executable_;
   std::vector<std::string> compiler_options_;
   std::vector<std::string> image_compiler_options_;
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 38f1307..a5caa07 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -57,26 +57,24 @@
 }
 
 inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
-#ifdef NDEBUG
-  UNUSED(check_locks);  // Keep GCC happy about unused parameters.
-#else
-  CHECK_EQ(0u, tls32_.no_thread_suspension) << tlsPtr_.last_no_thread_suspension_cause;
-  if (check_locks) {
-    bool bad_mutexes_held = false;
-    for (int i = kLockLevelCount - 1; i >= 0; --i) {
-      // We expect no locks except the mutator_lock_.
-      if (i != kMutatorLock) {
-        BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
-        if (held_mutex != NULL) {
-          LOG(ERROR) << "holding \"" << held_mutex->GetName()
-                  << "\" at point where thread suspension is expected";
-          bad_mutexes_held = true;
+  if (kIsDebugBuild) {
+    CHECK_EQ(0u, tls32_.no_thread_suspension) << tlsPtr_.last_no_thread_suspension_cause;
+    if (check_locks) {
+      bool bad_mutexes_held = false;
+      for (int i = kLockLevelCount - 1; i >= 0; --i) {
+        // We expect no locks except the mutator_lock_ or thread list suspend thread lock.
+        if (i != kMutatorLock && i != kThreadListSuspendThreadLock) {
+          BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
+          if (held_mutex != NULL) {
+            LOG(ERROR) << "holding \"" << held_mutex->GetName()
+                      << "\" at point where thread suspension is expected";
+            bad_mutexes_held = true;
+          }
         }
       }
+      CHECK(!bad_mutexes_held);
     }
-    CHECK(!bad_mutexes_held);
   }
-#endif
 }
 
 inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index f888029..8151464 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -76,6 +76,8 @@
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
+const size_t Thread::kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
+    GetStackOverflowReservedBytes(kRuntimeISA);
 
 static const char* kThreadNameDuringStartup = "<native thread without managed peer>";
 
@@ -219,7 +221,7 @@
     // It's likely that callers are trying to ensure they have at least a certain amount of
     // stack space, so we should add our reserved space on top of what they requested, rather
     // than implicitly take it away from them.
-    stack_size += kRuntimeStackOverflowReservedBytes;
+    stack_size += GetStackOverflowReservedBytes(kRuntimeISA);
   } else {
     // If we are going to use implicit stack checks, allocate space for the protected
     // region at the bottom of the stack.
@@ -308,7 +310,7 @@
 
   if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
-        << strerror(errno);
+        << strerror(errno) << kStackOverflowProtectedSize;
   }
 
   // Tell the kernel that we won't be needing these pages any more.
@@ -536,7 +538,7 @@
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  if (read_stack_size <= kRuntimeStackOverflowReservedBytes) {
+  if (read_stack_size <= GetStackOverflowReservedBytes(kRuntimeISA)) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
@@ -1138,7 +1140,7 @@
   if (UNLIKELY(IsExceptionPending())) {
     ScopedObjectAccess soa(Thread::Current());
     mirror::Throwable* exception = GetException(nullptr);
-    LOG(FATAL) << "Throwing new exception " << msg << " with unexpected pending exception: "
+    LOG(FATAL) << "Throwing new exception '" << msg << "' with unexpected pending exception: "
         << exception->Dump();
   }
 }
@@ -2247,7 +2249,7 @@
   if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
     // However, we seem to have already extended to use the full stack.
     LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
-               << kRuntimeStackOverflowReservedBytes << ")?";
+               << GetStackOverflowReservedBytes(kRuntimeISA) << ")?";
     DumpStack(LOG(ERROR));
     LOG(FATAL) << "Recursive stack overflow.";
   }
diff --git a/runtime/thread.h b/runtime/thread.h
index d08c2fc..998f7db 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -104,8 +104,7 @@
   // is protected against reads and the lower is available for use while
   // throwing the StackOverflow exception.
   static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
-  static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
-      kRuntimeStackOverflowReservedBytes;
+  static const size_t kStackOverflowImplicitCheckSize;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
@@ -551,6 +550,16 @@
     return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin);
   }
 
+  byte* GetStackEndForInterpreter(bool implicit_overflow_check) const {
+    if (implicit_overflow_check) {
+      // The interpreter needs the extra overflow bytes that stack_end does
+      // not include.
+      return tlsPtr_.stack_end + GetStackOverflowReservedBytes(kRuntimeISA);
+    } else {
+      return tlsPtr_.stack_end;
+    }
+  }
+
   byte* GetStackEnd() const {
     return tlsPtr_.stack_end;
   }
@@ -567,7 +576,7 @@
       // overflow region.
       tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize;
     } else {
-      tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes;
+      tlsPtr_.stack_end = tlsPtr_.stack_begin + GetStackOverflowReservedBytes(kRuntimeISA);
     }
   }
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index b649b62..ff1a079 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -170,16 +170,7 @@
 // individual thread requires polling. delay_us is the requested sleep and total_delay_us
 // accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
 // subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
-static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us,
-                               bool holding_locks) {
-  if (!holding_locks) {
-    for (int i = kLockLevelCount - 1; i >= 0; --i) {
-      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-      if (held_mutex != NULL) {
-        LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
-      }
-    }
-  }
+static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
   useconds_t new_delay_us = (*delay_us) * 2;
   CHECK_GE(new_delay_us, *delay_us);
   if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
@@ -244,7 +235,7 @@
       useconds_t total_delay_us = 0;
       do {
         useconds_t delay_us = 100;
-        ThreadSuspendSleep(self, &delay_us, &total_delay_us, true);
+        ThreadSuspendSleep(self, &delay_us, &total_delay_us);
       } while (!thread->IsSuspended());
       // Shouldn't need to wait for longer than 1000 microseconds.
       constexpr useconds_t kLongWaitThresholdUS = 1000;
@@ -444,6 +435,11 @@
   while (true) {
     Thread* thread;
     {
+      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+      // is requesting another suspend, to avoid deadlock, by requiring this function be called
+      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+      // than request thread suspension, to avoid potential cycles in threads requesting each other
+      // suspend.
       ScopedObjectAccess soa(self);
       MutexLock mu(self, *Locks::thread_list_lock_);
       thread = Thread::FromManagedThread(soa, peer);
@@ -483,7 +479,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
   }
 }
 
@@ -502,9 +498,14 @@
   CHECK_NE(thread_id, kInvalidThreadId);
   while (true) {
     {
-      Thread* thread = NULL;
+      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+      // is requesting another suspend, to avoid deadlock, by requiring this function be called
+      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+      // than request thread suspension, to avoid potential cycles in threads requesting each other
+      // suspend.
       ScopedObjectAccess soa(self);
       MutexLock mu(self, *Locks::thread_list_lock_);
+      Thread* thread = nullptr;
       for (const auto& it : list_) {
         if (it->GetThreadId() == thread_id) {
           thread = it;
@@ -550,7 +551,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
   }
 }
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index d46987a..1b67ac0 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -68,6 +68,7 @@
   // is set to true.
   static Thread* SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension,
                                      bool* timed_out)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_suspend_thread_lock_)
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
@@ -77,6 +78,7 @@
   // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
   // thread, that may be terminating. If the suspension times out then *timeout is set to true.
   Thread* SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_suspend_thread_lock_)
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8b1ad39..52cdcc1 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1154,22 +1154,55 @@
 }
 
 const char* GetAndroidData() {
+  std::string error_msg;
+  const char* dir = GetAndroidDataSafe(&error_msg);
+  if (dir != nullptr) {
+    return dir;
+  } else {
+    LOG(FATAL) << error_msg;
+    return "";
+  }
+}
+
+const char* GetAndroidDataSafe(std::string* error_msg) {
   const char* android_data = getenv("ANDROID_DATA");
   if (android_data == NULL) {
     if (OS::DirectoryExists("/data")) {
       android_data = "/data";
     } else {
-      LOG(FATAL) << "ANDROID_DATA not set and /data does not exist";
-      return "";
+      *error_msg = "ANDROID_DATA not set and /data does not exist";
+      return nullptr;
     }
   }
   if (!OS::DirectoryExists(android_data)) {
-    LOG(FATAL) << "Failed to find ANDROID_DATA directory " << android_data;
-    return "";
+    *error_msg = StringPrintf("Failed to find ANDROID_DATA directory %s", android_data);
+    return nullptr;
   }
   return android_data;
 }
 
+void GetDalvikCache(const char* subdir, const bool create_if_absent, std::string* dalvik_cache,
+                    bool* have_android_data, bool* dalvik_cache_exists) {
+  CHECK(subdir != nullptr);
+  std::string error_msg;
+  const char* android_data = GetAndroidDataSafe(&error_msg);
+  if (android_data == nullptr) {
+    *have_android_data = false;
+    *dalvik_cache_exists = false;
+    return;
+  } else {
+    *have_android_data = true;
+  }
+  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
+  *dalvik_cache = dalvik_cache_root + subdir;
+  *dalvik_cache_exists = OS::DirectoryExists(dalvik_cache->c_str());
+  if (create_if_absent && !*dalvik_cache_exists && strcmp(android_data, "/data") != 0) {
+    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
+    *dalvik_cache_exists = ((mkdir(dalvik_cache_root.c_str(), 0700) == 0 || errno == EEXIST) &&
+                            (mkdir(dalvik_cache->c_str(), 0700) == 0 || errno == EEXIST));
+  }
+}
+
 std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
   CHECK(subdir != nullptr);
   const char* android_data = GetAndroidData();
@@ -1196,9 +1229,11 @@
   return dalvik_cache;
 }
 
-std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
+bool GetDalvikCacheFilename(const char* location, const char* cache_location,
+                            std::string* filename, std::string* error_msg) {
   if (location[0] != '/') {
-    LOG(FATAL) << "Expected path in location to be absolute: "<< location;
+    *error_msg = StringPrintf("Expected path in location to be absolute: %s", location);
+    return false;
   }
   std::string cache_file(&location[1]);  // skip leading slash
   if (!EndsWith(location, ".dex") && !EndsWith(location, ".art")) {
@@ -1206,7 +1241,17 @@
     cache_file += DexFile::kClassesDex;
   }
   std::replace(cache_file.begin(), cache_file.end(), '/', '@');
-  return StringPrintf("%s/%s", cache_location, cache_file.c_str());
+  *filename = StringPrintf("%s/%s", cache_location, cache_file.c_str());
+  return true;
+}
+
+std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
+  std::string ret;
+  std::string error_msg;
+  if (!GetDalvikCacheFilename(location, cache_location, &ret, &error_msg)) {
+    LOG(FATAL) << error_msg;
+  }
+  return ret;
 }
 
 static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
diff --git a/runtime/utils.h b/runtime/utils.h
index c920050..73872d3 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -439,11 +439,22 @@
 
 // Find $ANDROID_DATA, /data, or abort.
 const char* GetAndroidData();
+// Find $ANDROID_DATA, /data, or return nullptr.
+const char* GetAndroidDataSafe(std::string* error_msg);
 
 // Returns the dalvik-cache location, or dies trying. subdir will be
 // appended to the cache location.
 std::string GetDalvikCacheOrDie(const char* subdir, bool create_if_absent = true);
+// Return true if we found the dalvik cache and stored it in the dalvik_cache argument.
+// have_android_data will be set to true if we have an ANDROID_DATA that exists,
+// dalvik_cache_exists will be true if there is a dalvik-cache directory that is present.
+void GetDalvikCache(const char* subdir, bool create_if_absent, std::string* dalvik_cache,
+                    bool* have_android_data, bool* dalvik_cache_exists);
 
+// Returns the absolute dalvik-cache path for a DexFile or OatFile. The path returned will be
+// rooted at cache_location.
+bool GetDalvikCacheFilename(const char* file_location, const char* cache_location,
+                            std::string* filename, std::string* error_msg);
 // Returns the absolute dalvik-cache path for a DexFile or OatFile, or
 // dies trying. The path returned will be rooted at cache_location.
 std::string GetDalvikCacheFilenameOrDie(const char* file_location,
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index f1b5afd..de792cb 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1728,6 +1728,15 @@
       const uint32_t type_idx = (is_checkcast) ? inst->VRegB_21c() : inst->VRegC_22c();
       const RegType& res_type = ResolveClassAndCheckAccess(type_idx);
       if (res_type.IsConflict()) {
+        // If this is a primitive type, fail HARD.
+        mirror::Class* klass = (*dex_cache_)->GetResolvedType(type_idx);
+        if (klass != nullptr && klass->IsPrimitive()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "using primitive type "
+              << dex_file_->StringByTypeIdx(type_idx) << " in instanceof in "
+              << GetDeclaringClass();
+          break;
+        }
+
         DCHECK_NE(failures_.size(), 0U);
         if (!is_checkcast) {
           work_line_->SetRegisterType(inst->VRegA_22c(), reg_types_.Boolean());
@@ -1972,6 +1981,7 @@
 
         if (!orig_type.Equals(cast_type) &&
             !cast_type.IsUnresolvedTypes() && !orig_type.IsUnresolvedTypes() &&
+            cast_type.HasClass() &&             // Could be conflict type, make sure it has a class.
             !cast_type.GetClass()->IsInterface() &&
             (orig_type.IsZero() ||
                 orig_type.IsStrictlyAssignableFrom(cast_type.Merge(orig_type, &reg_types_)))) {
@@ -2763,12 +2773,30 @@
    * "try" block when they throw, control transfers out of the method.)
    */
   if ((opcode_flags & Instruction::kThrow) != 0 && insn_flags_[work_insn_idx_].IsInTry()) {
-    bool within_catch_all = false;
+    bool has_catch_all_handler = false;
     CatchHandlerIterator iterator(*code_item_, work_insn_idx_);
 
+    // Need the linker to try and resolve the handled class to check if it's Throwable.
+    ClassLinker* linker = Runtime::Current()->GetClassLinker();
+
     for (; iterator.HasNext(); iterator.Next()) {
-      if (iterator.GetHandlerTypeIndex() == DexFile::kDexNoIndex16) {
-        within_catch_all = true;
+      uint16_t handler_type_idx = iterator.GetHandlerTypeIndex();
+      if (handler_type_idx == DexFile::kDexNoIndex16) {
+        has_catch_all_handler = true;
+      } else {
+        // It is also a catch-all if it is java.lang.Throwable.
+        mirror::Class* klass = linker->ResolveType(*dex_file_, handler_type_idx, *dex_cache_,
+                                                   *class_loader_);
+        if (klass != nullptr) {
+          if (klass == mirror::Throwable::GetJavaLangThrowable()) {
+            has_catch_all_handler = true;
+          }
+        } else {
+          // Clear exception.
+          Thread* self = Thread::Current();
+          DCHECK(self->IsExceptionPending());
+          self->ClearException();
+        }
       }
       /*
        * Merge registers into the "catch" block. We want to use the "savedRegs" rather than
@@ -2784,7 +2812,7 @@
      * If the monitor stack depth is nonzero, there must be a "catch all" handler for this
      * instruction. This does apply to monitor-exit because of async exception handling.
      */
-    if (work_line_->MonitorStackDepth() > 0 && !within_catch_all) {
+    if (work_line_->MonitorStackDepth() > 0 && !has_catch_all_handler) {
       /*
        * The state in work_line reflects the post-execution state. If the current instruction is a
        * monitor-enter and the monitor stack was empty, we don't need a catch-all (if it throws,
@@ -3225,7 +3253,7 @@
       return nullptr;
     }
     mirror::Class* super_klass = super.GetClass();
-    if (res_method->GetMethodIndex() >= super_klass->GetVTable()->GetLength()) {
+    if (res_method->GetMethodIndex() >= super_klass->GetVTableLength()) {
       Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
                                    << PrettyMethod(dex_method_idx_, *dex_file_)
                                    << " to super " << super
@@ -3250,20 +3278,21 @@
     VLOG(verifier) << "Failed to get mirror::Class* from '" << actual_arg_type << "'";
     return nullptr;
   }
-  mirror::ObjectArray<mirror::ArtMethod>* vtable = nullptr;
   mirror::Class* klass = actual_arg_type.GetClass();
+  mirror::Class* dispatch_class;
   if (klass->IsInterface()) {
     // Derive Object.class from Class.class.getSuperclass().
     mirror::Class* object_klass = klass->GetClass()->GetSuperClass();
     CHECK(object_klass->IsObjectClass());
-    vtable = object_klass->GetVTable();
+    dispatch_class = object_klass;
   } else {
-    vtable = klass->GetVTable();
+    dispatch_class = klass;
   }
-  CHECK(vtable != nullptr) << PrettyDescriptor(klass);
+  CHECK(dispatch_class->HasVTable()) << PrettyDescriptor(dispatch_class);
   uint16_t vtable_index = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength()) << PrettyDescriptor(klass);
-  mirror::ArtMethod* res_method = vtable->Get(vtable_index);
+  CHECK_LT(static_cast<int32_t>(vtable_index), dispatch_class->GetVTableLength())
+      << PrettyDescriptor(klass);
+  mirror::ArtMethod* res_method = dispatch_class->GetVTableEntry(vtable_index);
   CHECK(!Thread::Current()->IsExceptionPending());
   return res_method;
 }
@@ -3469,10 +3498,12 @@
     value_compatible = value_type.IsFloatTypes();
   } else if (target_type.IsLong()) {
     instruction_compatible = insn_type.IsLong();
-    value_compatible = value_type.IsLongTypes();
+    const RegType& value_type_hi = work_line_->GetRegisterType(vregA + 1);
+    value_compatible = value_type.IsLongTypes() && value_type.CheckWidePair(value_type_hi);
   } else if (target_type.IsDouble()) {
     instruction_compatible = insn_type.IsLong();  // no put-double, so expect put-long
-    value_compatible = value_type.IsDoubleTypes();
+    const RegType& value_type_hi = work_line_->GetRegisterType(vregA + 1);
+    value_compatible = value_type.IsDoubleTypes() && value_type.CheckWidePair(value_type_hi);
   } else {
     instruction_compatible = false;  // reference with primitive store
     value_compatible = false;  // unused
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index fdc6e3f..3a6a72b 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -47,6 +47,8 @@
 jclass WellKnownClasses::java_lang_ThreadGroup;
 jclass WellKnownClasses::java_lang_Throwable;
 jclass WellKnownClasses::java_nio_DirectByteBuffer;
+jclass WellKnownClasses::java_util_Collections;
+jclass WellKnownClasses::libcore_util_EmptyArray;
 jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_Chunk;
 jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer;
 
@@ -150,6 +152,8 @@
   java_lang_ThreadGroup = CacheClass(env, "java/lang/ThreadGroup");
   java_lang_Throwable = CacheClass(env, "java/lang/Throwable");
   java_nio_DirectByteBuffer = CacheClass(env, "java/nio/DirectByteBuffer");
+  java_util_Collections = CacheClass(env, "java/util/Collections");
+  libcore_util_EmptyArray = CacheClass(env, "libcore/util/EmptyArray");
   org_apache_harmony_dalvik_ddmc_Chunk = CacheClass(env, "org/apache/harmony/dalvik/ddmc/Chunk");
   org_apache_harmony_dalvik_ddmc_DdmServer = CacheClass(env, "org/apache/harmony/dalvik/ddmc/DdmServer");
 
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index f6c2930..7639f50 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -60,7 +60,9 @@
   static jclass java_lang_ThreadGroup;
   static jclass java_lang_Thread$UncaughtExceptionHandler;
   static jclass java_lang_Throwable;
+  static jclass java_util_Collections;
   static jclass java_nio_DirectByteBuffer;
+  static jclass libcore_util_EmptyArray;
   static jclass org_apache_harmony_dalvik_ddmc_Chunk;
   static jclass org_apache_harmony_dalvik_ddmc_DdmServer;
 
diff --git a/test/001-nop/build b/test/000-nop/build
similarity index 100%
rename from test/001-nop/build
rename to test/000-nop/build
diff --git a/test/001-nop/expected.txt b/test/000-nop/expected.txt
similarity index 100%
rename from test/001-nop/expected.txt
rename to test/000-nop/expected.txt
diff --git a/test/001-nop/info.txt b/test/000-nop/info.txt
similarity index 100%
rename from test/001-nop/info.txt
rename to test/000-nop/info.txt
diff --git a/test/001-nop/run b/test/000-nop/run
similarity index 100%
rename from test/001-nop/run
rename to test/000-nop/run
diff --git a/test/001-HelloWorld/expected.txt b/test/001-HelloWorld/expected.txt
new file mode 100644
index 0000000..af5626b
--- /dev/null
+++ b/test/001-HelloWorld/expected.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/test/001-HelloWorld/info.txt b/test/001-HelloWorld/info.txt
new file mode 100644
index 0000000..641dd9a
--- /dev/null
+++ b/test/001-HelloWorld/info.txt
@@ -0,0 +1 @@
+Imported from oat test. Print "Hello World."
diff --git a/test/HelloWorld/HelloWorld.java b/test/001-HelloWorld/src/Main.java
similarity index 90%
rename from test/HelloWorld/HelloWorld.java
rename to test/001-HelloWorld/src/Main.java
index c6861ce..1ef6289 100644
--- a/test/HelloWorld/HelloWorld.java
+++ b/test/001-HelloWorld/src/Main.java
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-class HelloWorld {
+public class Main {
   public static void main(String[] args) {
-    System.logI("Hello, world!");
+    System.out.println("Hello, world!");
   }
 }
diff --git a/test/001-Main/expected.txt b/test/001-Main/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/001-Main/expected.txt
diff --git a/test/001-Main/info.txt b/test/001-Main/info.txt
new file mode 100644
index 0000000..e4b33a0
--- /dev/null
+++ b/test/001-Main/info.txt
@@ -0,0 +1 @@
+Import of a previous oat test. Empty main, just test starting up the runtime.
diff --git a/test/HelloWorld/HelloWorld.java b/test/001-Main/src/Main.java
similarity index 85%
copy from test/HelloWorld/HelloWorld.java
copy to test/001-Main/src/Main.java
index c6861ce..3486866 100644
--- a/test/HelloWorld/HelloWorld.java
+++ b/test/001-Main/src/Main.java
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-class HelloWorld {
-  public static void main(String[] args) {
-    System.logI("Hello, world!");
-  }
+public class Main {
+    public static void main(String args[]) {
+    }
 }
diff --git a/test/004-InterfaceTest/expected.txt b/test/004-InterfaceTest/expected.txt
new file mode 100644
index 0000000..4854e24
--- /dev/null
+++ b/test/004-InterfaceTest/expected.txt
@@ -0,0 +1,2 @@
+test_virtual done
+test_interface done
diff --git a/test/004-InterfaceTest/info.txt b/test/004-InterfaceTest/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-InterfaceTest/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/InterfaceTest/InterfaceTest.java b/test/004-InterfaceTest/src/Main.java
similarity index 91%
rename from test/InterfaceTest/InterfaceTest.java
rename to test/004-InterfaceTest/src/Main.java
index ed18eb3d..9ebac59 100644
--- a/test/InterfaceTest/InterfaceTest.java
+++ b/test/004-InterfaceTest/src/Main.java
@@ -17,7 +17,7 @@
 import java.util.Map;
 import java.util.HashMap;
 
-class InterfaceTest {
+public class Main {
 
   public static long test_virtual(HashMap map) {
     Integer intobj = new Integer(0);
@@ -44,10 +44,10 @@
   public static void main(String[] args) {
     HashMap hashmap = new HashMap();
     long elapsed = test_virtual(hashmap);
-    System.logI("virtual map put: " + elapsed);
+    System.out.println("test_virtual done");
     hashmap.clear();
 
     elapsed = test_interface(hashmap);
-    System.logI("interface map put: " + elapsed);
+    System.out.println("test_interface done");
   }
 }
diff --git a/test/004-JniTest/expected.txt b/test/004-JniTest/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/004-JniTest/expected.txt
diff --git a/test/004-JniTest/info.txt b/test/004-JniTest/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-JniTest/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
similarity index 86%
rename from test/JniTest/jni_test.cc
rename to test/004-JniTest/jni_test.cc
index 36cad72..4909a4a 100644
--- a/test/JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -42,7 +42,7 @@
   int attach_result = jvm->AttachCurrentThread(&env, &args);
   assert(attach_result == 0);
 
-  jclass clazz = env->FindClass("JniTest");
+  jclass clazz = env->FindClass("Main");
   assert(clazz != NULL);
   assert(!env->ExceptionCheck());
 
@@ -56,7 +56,7 @@
 }
 
 // http://b/10994325
-extern "C" JNIEXPORT void JNICALL Java_JniTest_testFindClassOnAttachedNativeThread(JNIEnv*,
+extern "C" JNIEXPORT void JNICALL Java_Main_testFindClassOnAttachedNativeThread(JNIEnv*,
                                                                                    jclass) {
   pthread_t pthread;
   int pthread_create_result = pthread_create(&pthread,
@@ -76,7 +76,7 @@
   int attach_result = jvm->AttachCurrentThread(&env, &args);
   assert(attach_result == 0);
 
-  jclass clazz = env->FindClass("JniTest");
+  jclass clazz = env->FindClass("Main");
   assert(clazz != NULL);
   assert(!env->ExceptionCheck());
 
@@ -91,7 +91,7 @@
   return NULL;
 }
 
-extern "C" JNIEXPORT void JNICALL Java_JniTest_testFindFieldOnAttachedNativeThreadNative(JNIEnv*,
+extern "C" JNIEXPORT void JNICALL Java_Main_testFindFieldOnAttachedNativeThreadNative(JNIEnv*,
                                                                                          jclass) {
   pthread_t pthread;
   int pthread_create_result = pthread_create(&pthread,
@@ -111,7 +111,7 @@
   int attach_result = jvm->AttachCurrentThread(&env, &args);
   assert(attach_result == 0);
 
-  jclass clazz = env->FindClass("JniTest");
+  jclass clazz = env->FindClass("Main");
   assert(clazz != NULL);
   assert(!env->ExceptionCheck());
 
@@ -151,7 +151,7 @@
 }
 
 // http://b/15539150
-extern "C" JNIEXPORT void JNICALL Java_JniTest_testReflectFieldGetFromAttachedNativeThreadNative(
+extern "C" JNIEXPORT void JNICALL Java_Main_testReflectFieldGetFromAttachedNativeThreadNative(
     JNIEnv*, jclass) {
   pthread_t pthread;
   int pthread_create_result = pthread_create(&pthread,
@@ -165,22 +165,22 @@
 
 
 // http://b/11243757
-extern "C" JNIEXPORT void JNICALL Java_JniTest_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,
+extern "C" JNIEXPORT void JNICALL Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,
                                                                                         jclass) {
-  jclass super_class = env->FindClass("JniTest$testCallStaticVoidMethodOnSubClass_SuperClass");
+  jclass super_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SuperClass");
   assert(super_class != NULL);
 
   jmethodID execute = env->GetStaticMethodID(super_class, "execute", "()V");
   assert(execute != NULL);
 
-  jclass sub_class = env->FindClass("JniTest$testCallStaticVoidMethodOnSubClass_SubClass");
+  jclass sub_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SubClass");
   assert(sub_class != NULL);
 
   env->CallStaticVoidMethod(sub_class, execute);
 }
 
-extern "C" JNIEXPORT jobject JNICALL Java_JniTest_testGetMirandaMethodNative(JNIEnv* env, jclass) {
-  jclass abstract_class = env->FindClass("JniTest$testGetMirandaMethod_MirandaAbstract");
+extern "C" JNIEXPORT jobject JNICALL Java_Main_testGetMirandaMethodNative(JNIEnv* env, jclass) {
+  jclass abstract_class = env->FindClass("Main$testGetMirandaMethod_MirandaAbstract");
   assert(abstract_class != NULL);
   jmethodID miranda_method = env->GetMethodID(abstract_class, "inInterface", "()Z");
   assert(miranda_method != NULL);
@@ -188,7 +188,7 @@
 }
 
 // https://code.google.com/p/android/issues/detail?id=63055
-extern "C" void JNICALL Java_JniTest_testZeroLengthByteBuffers(JNIEnv* env, jclass) {
+extern "C" void JNICALL Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass) {
   std::vector<uint8_t> buffer(1);
   jobject byte_buffer = env->NewDirectByteBuffer(&buffer[0], 0);
   assert(byte_buffer != NULL);
@@ -201,7 +201,7 @@
 constexpr size_t kByteReturnSize = 7;
 jbyte byte_returns[kByteReturnSize] = { 0, 1, 2, 127, -1, -2, -128 };
 
-extern "C" jbyte JNICALL Java_JniTest_byteMethod(JNIEnv* env, jclass klass, jbyte b1, jbyte b2,
+extern "C" jbyte JNICALL Java_Main_byteMethod(JNIEnv* env, jclass klass, jbyte b1, jbyte b2,
                                                     jbyte b3, jbyte b4, jbyte b5, jbyte b6,
                                                     jbyte b7, jbyte b8, jbyte b9, jbyte b10) {
   // We use b1 to drive the output.
@@ -226,7 +226,7 @@
     static_cast<jshort>(0x8000) };
 // The weird static_cast is because short int is only guaranteed down to -32767, not Java's -32768.
 
-extern "C" jshort JNICALL Java_JniTest_shortMethod(JNIEnv* env, jclass klass, jshort s1, jshort s2,
+extern "C" jshort JNICALL Java_Main_shortMethod(JNIEnv* env, jclass klass, jshort s1, jshort s2,
                                                     jshort s3, jshort s4, jshort s5, jshort s6,
                                                     jshort s7, jshort s8, jshort s9, jshort s10) {
   // We use s1 to drive the output.
@@ -246,7 +246,7 @@
   return short_returns[s1];
 }
 
-extern "C" jboolean JNICALL Java_JniTest_booleanMethod(JNIEnv* env, jclass klass, jboolean b1,
+extern "C" jboolean JNICALL Java_Main_booleanMethod(JNIEnv* env, jclass klass, jboolean b1,
                                                        jboolean b2, jboolean b3, jboolean b4,
                                                        jboolean b5, jboolean b6, jboolean b7,
                                                        jboolean b8, jboolean b9, jboolean b10) {
@@ -268,7 +268,7 @@
 constexpr size_t kCharReturnSize = 8;
 jchar char_returns[kCharReturnSize] = { 0, 1, 2, 127, 255, 256, 15000, 34000 };
 
-extern "C" jchar JNICALL Java_JniTest_charMethod(JNIEnv* env, jclass klacc, jchar c1, jchar c2,
+extern "C" jchar JNICALL Java_Main_charMethod(JNIEnv* env, jclass klacc, jchar c1, jchar c2,
                                                     jchar c3, jchar c4, jchar c5, jchar c6,
                                                     jchar c7, jchar c8, jchar c9, jchar c10) {
   // We use c1 to drive the output.
diff --git a/test/JniTest/JniTest.java b/test/004-JniTest/src/Main.java
similarity index 98%
rename from test/JniTest/JniTest.java
rename to test/004-JniTest/src/Main.java
index 33418a9..11c80f5 100644
--- a/test/JniTest/JniTest.java
+++ b/test/004-JniTest/src/Main.java
@@ -16,7 +16,7 @@
 
 import java.lang.reflect.Method;
 
-class JniTest {
+public class Main {
     public static void main(String[] args) {
         System.loadLibrary("arttest");
         testFindClassOnAttachedNativeThread();
diff --git a/test/004-NativeAllocations/expected.txt b/test/004-NativeAllocations/expected.txt
new file mode 100644
index 0000000..f75da10
--- /dev/null
+++ b/test/004-NativeAllocations/expected.txt
@@ -0,0 +1 @@
+Test complete
diff --git a/test/004-NativeAllocations/info.txt b/test/004-NativeAllocations/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-NativeAllocations/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/NativeAllocations/NativeAllocations.java b/test/004-NativeAllocations/src/Main.java
similarity index 98%
rename from test/NativeAllocations/NativeAllocations.java
rename to test/004-NativeAllocations/src/Main.java
index 9423b91..483c667 100644
--- a/test/NativeAllocations/NativeAllocations.java
+++ b/test/004-NativeAllocations/src/Main.java
@@ -16,7 +16,7 @@
 
 import java.lang.reflect.*;
 
-class NativeAllocations {
+public class Main {
     static Object nativeLock = new Object();
     static int nativeBytes = 0;
     static Object runtime;
diff --git a/test/004-ReferenceMap/expected.txt b/test/004-ReferenceMap/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/004-ReferenceMap/expected.txt
diff --git a/test/004-ReferenceMap/info.txt b/test/004-ReferenceMap/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-ReferenceMap/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/ReferenceMap/ReferenceMap.java b/test/004-ReferenceMap/src/Main.java
similarity index 91%
rename from test/ReferenceMap/ReferenceMap.java
rename to test/004-ReferenceMap/src/Main.java
index c746b68..f9a5498 100644
--- a/test/ReferenceMap/ReferenceMap.java
+++ b/test/004-ReferenceMap/src/Main.java
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-public class ReferenceMap {
-  public ReferenceMap() {
+public class Main {
+  public Main() {
   }
 
   Object f() {
@@ -41,7 +41,7 @@
   }
 
   public static void main(String[] args) {
-    ReferenceMap rm = new ReferenceMap();
+    Main rm = new Main();
     rm.f();
   }
 }
diff --git a/test/ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
similarity index 99%
rename from test/ReferenceMap/stack_walk_refmap_jni.cc
rename to test/004-ReferenceMap/stack_walk_refmap_jni.cc
index e5a17861..7929554 100644
--- a/test/ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -271,7 +271,7 @@
 //        0x0032 - 0x0033 reg=2 y Ljava/lang/Object;
 //        0x0000 - 0x0033 reg=8 this LReferenceMap;
 
-extern "C" JNIEXPORT jint JNICALL Java_ReferenceMap_refmap(JNIEnv*, jobject, jint count) {
+extern "C" JNIEXPORT jint JNICALL Java_Main_refmap(JNIEnv*, jobject, jint count) {
   // Visitor
   ScopedObjectAccess soa(Thread::Current());
   ReferenceMap2Visitor mapper(soa.Self());
diff --git a/test/004-SignalTest/expected.txt b/test/004-SignalTest/expected.txt
new file mode 100644
index 0000000..fd5ec00
--- /dev/null
+++ b/test/004-SignalTest/expected.txt
@@ -0,0 +1,5 @@
+init signal test
+Caught NullPointerException
+Caught StackOverflowError
+signal caught
+Signal test OK
diff --git a/test/004-SignalTest/info.txt b/test/004-SignalTest/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-SignalTest/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/SignalTest/signaltest.cc b/test/004-SignalTest/signaltest.cc
similarity index 88%
rename from test/SignalTest/signaltest.cc
rename to test/004-SignalTest/signaltest.cc
index dfe3197..a2dd664 100644
--- a/test/SignalTest/signaltest.cc
+++ b/test/004-SignalTest/signaltest.cc
@@ -41,7 +41,7 @@
 
 static struct sigaction oldaction;
 
-extern "C" JNIEXPORT void JNICALL Java_SignalTest_initSignalTest(JNIEnv*, jclass) {
+extern "C" JNIEXPORT void JNICALL Java_Main_initSignalTest(JNIEnv*, jclass) {
   struct sigaction action;
   action.sa_sigaction = signalhandler;
   sigemptyset(&action.sa_mask);
@@ -53,7 +53,7 @@
   sigaction(SIGSEGV, &action, &oldaction);
 }
 
-extern "C" JNIEXPORT void JNICALL Java_SignalTest_terminateSignalTest(JNIEnv*, jclass) {
+extern "C" JNIEXPORT void JNICALL Java_Main_terminateSignalTest(JNIEnv*, jclass) {
   sigaction(SIGSEGV, &oldaction, nullptr);
 }
 
@@ -61,7 +61,7 @@
 // to nullptr.
 char *p = nullptr;
 
-extern "C" JNIEXPORT jint JNICALL Java_SignalTest_testSignal(JNIEnv*, jclass) {
+extern "C" JNIEXPORT jint JNICALL Java_Main_testSignal(JNIEnv*, jclass) {
 #ifdef __arm__
   // On ARM we cause a real SEGV.
   *p = 'a';
diff --git a/test/SignalTest/SignalTest.java b/test/004-SignalTest/src/Main.java
similarity index 98%
rename from test/SignalTest/SignalTest.java
rename to test/004-SignalTest/src/Main.java
index 7f15aea..0391592 100644
--- a/test/SignalTest/SignalTest.java
+++ b/test/004-SignalTest/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-class SignalTest {
+public class Main {
     private static native void initSignalTest();
     private static native void terminateSignalTest();
     private static native int testSignal();
diff --git a/test/004-StackWalk/expected.txt b/test/004-StackWalk/expected.txt
new file mode 100644
index 0000000..bde0024
--- /dev/null
+++ b/test/004-StackWalk/expected.txt
@@ -0,0 +1,4 @@
+1st call
+172001234567891011121314151617181920652310201919
+2nd call
+172001234567891011121314151617181920652310201919
diff --git a/test/004-StackWalk/info.txt b/test/004-StackWalk/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-StackWalk/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/StackWalk/StackWalk.java b/test/004-StackWalk/src/Main.java
similarity index 92%
rename from test/StackWalk/StackWalk.java
rename to test/004-StackWalk/src/Main.java
index f7c78ff..1e2a91b 100644
--- a/test/StackWalk/StackWalk.java
+++ b/test/004-StackWalk/src/Main.java
@@ -1,5 +1,5 @@
-public class StackWalk {
-  public StackWalk() {
+public class Main {
+  public Main() {
   }
 
   int f() {
@@ -76,18 +76,18 @@
     s4 = s18 = s19;
     s += s4;
     s += s18;
-    refmap(0);
+    stackmap(0);
     return s;
   }
 
-  native int refmap(int x);
+  native int stackmap(int x);
 
   static {
     System.loadLibrary("arttest");
   }
 
   public static void main(String[] args) {
-    StackWalk st = new StackWalk();
+    Main st = new Main();
     st.f();
   }
 }
diff --git a/test/StackWalk/stack_walk_jni.cc b/test/004-StackWalk/stack_walk_jni.cc
similarity index 93%
rename from test/StackWalk/stack_walk_jni.cc
rename to test/004-StackWalk/stack_walk_jni.cc
index e404f6a..30a0d59 100644
--- a/test/StackWalk/stack_walk_jni.cc
+++ b/test/004-StackWalk/stack_walk_jni.cc
@@ -95,13 +95,12 @@
         CHECK_REGS(2, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 25);
       }
     }
-    LOG(INFO) << reinterpret_cast<const void*>(reg_bitmap);
 
     return true;
   }
 };
 
-extern "C" JNIEXPORT jint JNICALL Java_StackWalk_refmap(JNIEnv*, jobject, jint count) {
+extern "C" JNIEXPORT jint JNICALL Java_Main_stackmap(JNIEnv*, jobject, jint count) {
   ScopedObjectAccess soa(Thread::Current());
   CHECK_EQ(count, 0);
   gJava_StackWalk_refmap_calls++;
@@ -113,7 +112,7 @@
   return count + 1;
 }
 
-extern "C" JNIEXPORT jint JNICALL Java_StackWalk2_refmap2(JNIEnv*, jobject, jint count) {
+extern "C" JNIEXPORT jint JNICALL Java_Main_refmap2(JNIEnv*, jobject, jint count) {
   ScopedObjectAccess soa(Thread::Current());
   gJava_StackWalk_refmap_calls++;
 
diff --git a/test/004-ThreadStress/check b/test/004-ThreadStress/check
new file mode 100755
index 0000000..ffbb8cf
--- /dev/null
+++ b/test/004-ThreadStress/check
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Only compare the last line.
+tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/004-ThreadStress/expected.txt b/test/004-ThreadStress/expected.txt
new file mode 100644
index 0000000..a26fb4f
--- /dev/null
+++ b/test/004-ThreadStress/expected.txt
@@ -0,0 +1 @@
+Finishing worker
diff --git a/test/004-ThreadStress/info.txt b/test/004-ThreadStress/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-ThreadStress/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/ThreadStress/ThreadStress.java b/test/004-ThreadStress/src/Main.java
similarity index 89%
rename from test/ThreadStress/ThreadStress.java
rename to test/004-ThreadStress/src/Main.java
index 5dccc68..0c1c97d 100644
--- a/test/ThreadStress/ThreadStress.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import android.system.ErrnoException;
-import android.system.Os;
-import android.system.OsConstants;
+import java.lang.reflect.*;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -26,7 +24,7 @@
 
 // Run on host with:
 //   javac ThreadTest.java && java ThreadStress && rm *.class
-class ThreadStress implements Runnable {
+public class Main implements Runnable {
 
     public static final boolean DEBUG = false;
 
@@ -80,7 +78,7 @@
         // Fill in the Operation[] array for each thread by laying
         // down references to operation according to their desired
         // frequency.
-        final ThreadStress[] threadStresses = new ThreadStress[numberOfThreads];
+        final Main[] threadStresses = new Main[numberOfThreads];
         for (int t = 0; t < threadStresses.length; t++) {
             Operation[] operations = new Operation[operationsPerThread];
             int o = 0;
@@ -98,7 +96,7 @@
             }
             // Randomize the oepration order
             Collections.shuffle(Arrays.asList(operations));
-            threadStresses[t] = new ThreadStress(lock, t, operations);
+            threadStresses[t] = new Main(lock, t, operations);
         }
 
         // Enable to dump operation counds per thread to make sure its
@@ -129,9 +127,9 @@
         // operationsPerThread.
         Thread[] runners = new Thread[numberOfThreads];
         for (int r = 0; r < runners.length; r++) {
-            final ThreadStress ts = threadStresses[r];
+            final Main ts = threadStresses[r];
             runners[r] = new Thread("Runner thread " + r) {
-                final ThreadStress threadStress = ts;
+                final Main threadStress = ts;
                 public void run() {
                     int id = threadStress.id;
                     System.out.println("Starting worker for " + id);
@@ -146,7 +144,7 @@
                                            + (operationsPerThread - threadStress.nextOperation)
                                            + " operations remaining.");
                     }
-                    System.out.println("Finishing worker for " + id);
+                    System.out.println("Finishing worker");
                 }
             };
         }
@@ -179,7 +177,7 @@
 
     private int nextOperation;
 
-    private ThreadStress(Object lock, int id, Operation[] operations) {
+    private Main(Object lock, int id, Operation[] operations) {
         this.lock = lock;
         this.id = id;
         this.operations = operations;
@@ -204,8 +202,8 @@
                     }
                     case SIGQUIT: {
                         try {
-                            Os.kill(Os.getpid(), OsConstants.SIGQUIT);
-                        } catch (ErrnoException ex) {
+                            SIGQUIT();
+                        } catch (Exception ex) {
                         }
                     }
                     case SLEEP: {
@@ -267,4 +265,17 @@
             }
         }
     }
+
+    private static void SIGQUIT() throws Exception {
+        Class<?> osClass = Class.forName("android.system.Os");
+        Method getpid = osClass.getDeclaredMethod("getpid");
+        int pid = (Integer)getpid.invoke(null);
+
+        Class<?> osConstants = Class.forName("android.system.OsConstants");
+        Field sigquitField = osConstants.getDeclaredField("SIGQUIT");
+        int sigquit = (Integer)sigquitField.get(null);
+
+        Method kill = osClass.getDeclaredMethod("kill", int.class, int.class);
+        kill.invoke(null, pid, sigquit);
+    }
 }
diff --git a/test/004-UnsafeTest/expected.txt b/test/004-UnsafeTest/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/004-UnsafeTest/expected.txt
diff --git a/test/004-UnsafeTest/info.txt b/test/004-UnsafeTest/info.txt
new file mode 100644
index 0000000..00b0d9a
--- /dev/null
+++ b/test/004-UnsafeTest/info.txt
@@ -0,0 +1 @@
+Imported from oat tests.
diff --git a/test/UnsafeTest/UnsafeTest.java b/test/004-UnsafeTest/src/Main.java
similarity index 95%
rename from test/UnsafeTest/UnsafeTest.java
rename to test/004-UnsafeTest/src/Main.java
index 9e2ff87..8c8ad16 100644
--- a/test/UnsafeTest/UnsafeTest.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -17,21 +17,21 @@
 import java.lang.reflect.Field;
 import sun.misc.Unsafe;
 
-public class UnsafeTest {
+public class Main {
   static {
     System.loadLibrary("arttest");
   }
 
   private static void check(int actual, int expected, String msg) {
     if (actual != expected) {
-      System.logE(msg + " : " + actual + " != " + expected);
+      System.out.println(msg + " : " + actual + " != " + expected);
       System.exit(-1);
     }
   }
 
   private static void check(long actual, long expected, String msg) {
     if (actual != expected) {
-      System.logE(msg + " : " + actual + " != " + expected);
+      System.out.println(msg + " : " + actual + " != " + expected);
       System.exit(-1);
     }
   }
diff --git a/test/UnsafeTest/unsafe_test.cc b/test/004-UnsafeTest/unsafe_test.cc
similarity index 85%
rename from test/UnsafeTest/unsafe_test.cc
rename to test/004-UnsafeTest/unsafe_test.cc
index e36ee14..ca0e39e 100644
--- a/test/UnsafeTest/unsafe_test.cc
+++ b/test/004-UnsafeTest/unsafe_test.cc
@@ -24,14 +24,14 @@
 
 namespace art {
 
-extern "C" JNIEXPORT jint JNICALL Java_UnsafeTest_vmArrayBaseOffset(JNIEnv* env, jclass, jobject classObj) {
+extern "C" JNIEXPORT jint JNICALL Java_Main_vmArrayBaseOffset(JNIEnv* env, jclass, jobject classObj) {
   ScopedObjectAccess soa(env);
   mirror::Class* klass = soa.Decode<mirror::Class*>(classObj);
   return mirror::Array::DataOffset(
       Primitive::ComponentSize(klass->GetComponentType()->GetPrimitiveType())).Int32Value();
 }
 
-extern "C" JNIEXPORT jint JNICALL Java_UnsafeTest_vmArrayIndexScale(JNIEnv* env, jclass, jobject classObj) {
+extern "C" JNIEXPORT jint JNICALL Java_Main_vmArrayIndexScale(JNIEnv* env, jclass, jobject classObj) {
   ScopedObjectAccess soa(env);
   mirror::Class* klass = soa.Decode<mirror::Class*>(classObj);
   return Primitive::ComponentSize(klass->GetComponentType()->GetPrimitiveType());
diff --git a/test/004-annotations/build b/test/005-annotations/build
similarity index 100%
rename from test/004-annotations/build
rename to test/005-annotations/build
diff --git a/test/004-annotations/expected.txt b/test/005-annotations/expected.txt
similarity index 100%
rename from test/004-annotations/expected.txt
rename to test/005-annotations/expected.txt
diff --git a/test/004-annotations/info.txt b/test/005-annotations/info.txt
similarity index 100%
rename from test/004-annotations/info.txt
rename to test/005-annotations/info.txt
diff --git a/test/004-annotations/src/Main.java b/test/005-annotations/src/Main.java
similarity index 100%
rename from test/004-annotations/src/Main.java
rename to test/005-annotations/src/Main.java
diff --git a/test/004-annotations/src/android/test/AnnoSimplePackage1.java b/test/005-annotations/src/android/test/AnnoSimplePackage1.java
similarity index 100%
rename from test/004-annotations/src/android/test/AnnoSimplePackage1.java
rename to test/005-annotations/src/android/test/AnnoSimplePackage1.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoArrayField.java b/test/005-annotations/src/android/test/anno/AnnoArrayField.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoArrayField.java
rename to test/005-annotations/src/android/test/anno/AnnoArrayField.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoFancyConstructor.java b/test/005-annotations/src/android/test/anno/AnnoFancyConstructor.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoFancyConstructor.java
rename to test/005-annotations/src/android/test/anno/AnnoFancyConstructor.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoFancyField.java b/test/005-annotations/src/android/test/anno/AnnoFancyField.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoFancyField.java
rename to test/005-annotations/src/android/test/anno/AnnoFancyField.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoFancyMethod.java b/test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoFancyMethod.java
rename to test/005-annotations/src/android/test/anno/AnnoFancyMethod.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoFancyParameter.java b/test/005-annotations/src/android/test/anno/AnnoFancyParameter.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoFancyParameter.java
rename to test/005-annotations/src/android/test/anno/AnnoFancyParameter.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoFancyType.java b/test/005-annotations/src/android/test/anno/AnnoFancyType.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoFancyType.java
rename to test/005-annotations/src/android/test/anno/AnnoFancyType.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleConstructor.java b/test/005-annotations/src/android/test/anno/AnnoSimpleConstructor.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleConstructor.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleConstructor.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleField.java b/test/005-annotations/src/android/test/anno/AnnoSimpleField.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleField.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleField.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleLocalVariable.java b/test/005-annotations/src/android/test/anno/AnnoSimpleLocalVariable.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleLocalVariable.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleLocalVariable.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleMethod.java b/test/005-annotations/src/android/test/anno/AnnoSimpleMethod.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleMethod.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleMethod.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimplePackage.java b/test/005-annotations/src/android/test/anno/AnnoSimplePackage.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimplePackage.java
rename to test/005-annotations/src/android/test/anno/AnnoSimplePackage.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleParameter.java b/test/005-annotations/src/android/test/anno/AnnoSimpleParameter.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleParameter.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleParameter.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleType.java b/test/005-annotations/src/android/test/anno/AnnoSimpleType.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleType.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleType.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleType2.java b/test/005-annotations/src/android/test/anno/AnnoSimpleType2.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleType2.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleType2.java
diff --git a/test/004-annotations/src/android/test/anno/AnnoSimpleTypeInvis.java b/test/005-annotations/src/android/test/anno/AnnoSimpleTypeInvis.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/AnnoSimpleTypeInvis.java
rename to test/005-annotations/src/android/test/anno/AnnoSimpleTypeInvis.java
diff --git a/test/004-annotations/src/android/test/anno/ExportedProperty.java b/test/005-annotations/src/android/test/anno/ExportedProperty.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/ExportedProperty.java
rename to test/005-annotations/src/android/test/anno/ExportedProperty.java
diff --git a/test/004-annotations/src/android/test/anno/FullyNoted.java b/test/005-annotations/src/android/test/anno/FullyNoted.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/FullyNoted.java
rename to test/005-annotations/src/android/test/anno/FullyNoted.java
diff --git a/test/004-annotations/src/android/test/anno/INoted.java b/test/005-annotations/src/android/test/anno/INoted.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/INoted.java
rename to test/005-annotations/src/android/test/anno/INoted.java
diff --git a/test/004-annotations/src/android/test/anno/IntToString.java b/test/005-annotations/src/android/test/anno/IntToString.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/IntToString.java
rename to test/005-annotations/src/android/test/anno/IntToString.java
diff --git a/test/004-annotations/src/android/test/anno/MissingAnnotation.java b/test/005-annotations/src/android/test/anno/MissingAnnotation.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/MissingAnnotation.java
rename to test/005-annotations/src/android/test/anno/MissingAnnotation.java
diff --git a/test/004-annotations/src/android/test/anno/SimplyNoted.java b/test/005-annotations/src/android/test/anno/SimplyNoted.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/SimplyNoted.java
rename to test/005-annotations/src/android/test/anno/SimplyNoted.java
diff --git a/test/004-annotations/src/android/test/anno/SomeClass.java b/test/005-annotations/src/android/test/anno/SomeClass.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/SomeClass.java
rename to test/005-annotations/src/android/test/anno/SomeClass.java
diff --git a/test/004-annotations/src/android/test/anno/SubNoted.java b/test/005-annotations/src/android/test/anno/SubNoted.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/SubNoted.java
rename to test/005-annotations/src/android/test/anno/SubNoted.java
diff --git a/test/004-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/TestAnnotations.java
rename to test/005-annotations/src/android/test/anno/TestAnnotations.java
diff --git a/test/004-annotations/src/android/test/anno/package-info.java b/test/005-annotations/src/android/test/anno/package-info.java
similarity index 100%
rename from test/004-annotations/src/android/test/anno/package-info.java
rename to test/005-annotations/src/android/test/anno/package-info.java
diff --git a/test/004-annotations/src/android/test/package-info.java b/test/005-annotations/src/android/test/package-info.java
similarity index 100%
rename from test/004-annotations/src/android/test/package-info.java
rename to test/005-annotations/src/android/test/package-info.java
diff --git a/test/005-args/expected.txt b/test/006-args/expected.txt
similarity index 100%
rename from test/005-args/expected.txt
rename to test/006-args/expected.txt
diff --git a/test/005-args/info.txt b/test/006-args/info.txt
similarity index 100%
rename from test/005-args/info.txt
rename to test/006-args/info.txt
diff --git a/test/005-args/src/ArgsTest.java b/test/006-args/src/ArgsTest.java
similarity index 100%
rename from test/005-args/src/ArgsTest.java
rename to test/006-args/src/ArgsTest.java
diff --git a/test/005-args/src/Main.java b/test/006-args/src/Main.java
similarity index 100%
rename from test/005-args/src/Main.java
rename to test/006-args/src/Main.java
diff --git a/test/006-count10/expected.txt b/test/007-count10/expected.txt
similarity index 100%
rename from test/006-count10/expected.txt
rename to test/007-count10/expected.txt
diff --git a/test/006-count10/info.txt b/test/007-count10/info.txt
similarity index 100%
rename from test/006-count10/info.txt
rename to test/007-count10/info.txt
diff --git a/test/006-count10/src/Main.java b/test/007-count10/src/Main.java
similarity index 100%
rename from test/006-count10/src/Main.java
rename to test/007-count10/src/Main.java
diff --git a/test/007-exceptions/expected.txt b/test/008-exceptions/expected.txt
similarity index 100%
rename from test/007-exceptions/expected.txt
rename to test/008-exceptions/expected.txt
diff --git a/test/007-exceptions/info.txt b/test/008-exceptions/info.txt
similarity index 100%
rename from test/007-exceptions/info.txt
rename to test/008-exceptions/info.txt
diff --git a/test/007-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
similarity index 100%
rename from test/007-exceptions/src/Main.java
rename to test/008-exceptions/src/Main.java
diff --git a/test/008-instanceof/expected.txt b/test/008-instanceof/expected.txt
deleted file mode 100644
index 77fd0cb..0000000
--- a/test/008-instanceof/expected.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-iface1.mFloaty = 5.0 wahoo
-aa.mFloaty = 5.0 wahoo
-bb.mWhoami = ImplB!
-aaOkay (false) = false
-bbOkay (true) = true
-Caught a ClassCastException (expected)
diff --git a/test/008-instanceof/src/Iface1.java b/test/008-instanceof/src/Iface1.java
deleted file mode 100644
index d7f5376..0000000
--- a/test/008-instanceof/src/Iface1.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2005 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Test stuff.
- */
-public interface Iface1 {
-
-    public int iFunc1(int ii);
-
-    public float mFloaty = 5.0f;
-
-    public String mWahoo = new String("wahoo");
-}
diff --git a/test/008-instanceof/src/Iface2.java b/test/008-instanceof/src/Iface2.java
deleted file mode 100644
index 2b33c39..0000000
--- a/test/008-instanceof/src/Iface2.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2006 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Another interface.
- */
-public interface Iface2 {
-
-    public int iFunc2(int ii);
-}
diff --git a/test/008-instanceof/src/Iface2Sub1.java b/test/008-instanceof/src/Iface2Sub1.java
deleted file mode 100644
index bcff8ab..0000000
--- a/test/008-instanceof/src/Iface2Sub1.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2006 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Another interface.
- */
-public interface Iface2Sub1 extends Iface2, Cloneable {
-
-    //public int iFunc2(int ii);
-}
diff --git a/test/008-instanceof/src/ImplA.java b/test/008-instanceof/src/ImplA.java
deleted file mode 100644
index 27364f2..0000000
--- a/test/008-instanceof/src/ImplA.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2006 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Blah.
- */
-public class ImplA implements Iface1, Iface2 {
-
-    public int iFunc1(int ii) {
-        return ii+1;
-    }
-    public int iFunc2(int ii) {
-        return ii+2;
-    }
-}
diff --git a/test/008-instanceof/src/ImplB.java b/test/008-instanceof/src/ImplB.java
deleted file mode 100644
index 8b05702..0000000
--- a/test/008-instanceof/src/ImplB.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2006 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Blah.
- */
-public class ImplB implements Iface1, Iface2 {
-
-    public int iFunc1(int ii) {
-        return ii+10;
-    }
-    public int iFunc2(int ii) {
-        return ii+20;
-    }
-
-    public static String mWhoami = new String("ImplB!");
-}
diff --git a/test/008-instanceof/src/ImplBSub.java b/test/008-instanceof/src/ImplBSub.java
deleted file mode 100644
index a94ae4d..0000000
--- a/test/008-instanceof/src/ImplBSub.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2006 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Interface test.
- */
-public class ImplBSub extends ImplB implements /*Iface2,*/ Iface2Sub1 {
-
-    public int iFunc1(int ii) {
-        return ii+100;
-    }
-    public int iFunc2(int ii) {
-        return ii+200;
-    }
-}
diff --git a/test/008-instanceof/src/Main.java b/test/008-instanceof/src/Main.java
deleted file mode 100644
index 77f3f51..0000000
--- a/test/008-instanceof/src/Main.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2007 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Test instanceof
- */
-public class Main {
-    public static void main(String args[]) {
-        Iface1 face1;
-        ImplA aa = new ImplA();
-        ImplBSub bb = new ImplBSub();
-        boolean aaOkay, bbOkay;
-
-        face1 = aa;
-        face1 = bb;
-
-        System.out.println("iface1.mFloaty = " + face1.mFloaty + " " + face1.mWahoo);
-        System.out.println("aa.mFloaty = " + aa.mFloaty + " " + aa.mWahoo);
-        System.out.println("bb.mWhoami = " + bb.mWhoami);
-
-        aaOkay = face1 instanceof ImplA;
-        System.out.print("aaOkay (false) = ");
-        System.out.println(aaOkay);
-        bbOkay = face1 instanceof ImplB;
-        System.out.print("bbOkay (true) = ");
-        System.out.println(bbOkay);
-
-        bb = (ImplBSub) face1;
-        try {
-            aa = (ImplA) face1;
-        }
-        catch (ClassCastException cce) {
-            System.out.println("Caught a ClassCastException (expected)");
-        }
-    }
-}
diff --git a/test/009-instanceof/expected.txt b/test/009-instanceof/expected.txt
new file mode 100644
index 0000000..967c0bf
--- /dev/null
+++ b/test/009-instanceof/expected.txt
@@ -0,0 +1,11 @@
+iface1.mFloaty = 5.0 wahoo
+aa.mFloaty = 5.0 wahoo
+bb.mWhoami = ImplB!
+aaOkay (false) = false
+bbOkay (true) = true
+Caught a ClassCastException (expected)
+instanceof Serializable = true
+instanceof Cloneable = true
+instanceof Runnable = false
+aaOkay (false) = false
+bbOkay (true) = true
diff --git a/test/008-instanceof/info.txt b/test/009-instanceof/info.txt
similarity index 100%
rename from test/008-instanceof/info.txt
rename to test/009-instanceof/info.txt
diff --git a/test/009-instanceof2/src/Iface1.java b/test/009-instanceof/src/Iface1.java
similarity index 100%
rename from test/009-instanceof2/src/Iface1.java
rename to test/009-instanceof/src/Iface1.java
diff --git a/test/009-instanceof2/src/Iface2.java b/test/009-instanceof/src/Iface2.java
similarity index 100%
rename from test/009-instanceof2/src/Iface2.java
rename to test/009-instanceof/src/Iface2.java
diff --git a/test/009-instanceof2/src/Iface2Sub1.java b/test/009-instanceof/src/Iface2Sub1.java
similarity index 100%
rename from test/009-instanceof2/src/Iface2Sub1.java
rename to test/009-instanceof/src/Iface2Sub1.java
diff --git a/test/009-instanceof2/src/ImplA.java b/test/009-instanceof/src/ImplA.java
similarity index 100%
rename from test/009-instanceof2/src/ImplA.java
rename to test/009-instanceof/src/ImplA.java
diff --git a/test/009-instanceof2/src/ImplB.java b/test/009-instanceof/src/ImplB.java
similarity index 100%
rename from test/009-instanceof2/src/ImplB.java
rename to test/009-instanceof/src/ImplB.java
diff --git a/test/009-instanceof2/src/ImplBSub.java b/test/009-instanceof/src/ImplBSub.java
similarity index 100%
rename from test/009-instanceof2/src/ImplBSub.java
rename to test/009-instanceof/src/ImplBSub.java
diff --git a/test/009-instanceof2/src/Main.java b/test/009-instanceof/src/Main.java
similarity index 66%
rename from test/009-instanceof2/src/Main.java
rename to test/009-instanceof/src/Main.java
index 15a1e50..807ae69 100644
--- a/test/009-instanceof2/src/Main.java
+++ b/test/009-instanceof/src/Main.java
@@ -19,10 +19,36 @@
  */
 public class Main {
     public static void main(String args[]) {
+        Iface1 face1;
+        ImplA aa = new ImplA();
+        ImplBSub bb = new ImplBSub();
+        boolean aaOkay, bbOkay;
+
+        face1 = aa;
+        face1 = bb;
+
+        System.out.println("iface1.mFloaty = " + face1.mFloaty + " " + face1.mWahoo);
+        System.out.println("aa.mFloaty = " + aa.mFloaty + " " + aa.mWahoo);
+        System.out.println("bb.mWhoami = " + bb.mWhoami);
+
+        aaOkay = face1 instanceof ImplA;
+        System.out.print("aaOkay (false) = ");
+        System.out.println(aaOkay);
+        bbOkay = face1 instanceof ImplB;
+        System.out.print("bbOkay (true) = ");
+        System.out.println(bbOkay);
+
+        bb = (ImplBSub) face1;
+        try {
+            aa = (ImplA) face1;
+        }
+        catch (ClassCastException cce) {
+            System.out.println("Caught a ClassCastException (expected)");
+        }
+
         Iface1[] faceArray;
         ImplA[] aaArray = new ImplA[5];
         ImplBSub[] bbArray = new ImplBSub[5];
-        boolean aaOkay, bbOkay;
 
         faceArray = aaArray;
         faceArray = bbArray;
diff --git a/test/009-instanceof2/expected.txt b/test/009-instanceof2/expected.txt
deleted file mode 100644
index 74ad202..0000000
--- a/test/009-instanceof2/expected.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-instanceof Serializable = true
-instanceof Cloneable = true
-instanceof Runnable = false
-aaOkay (false) = false
-bbOkay (true) = true
diff --git a/test/009-instanceof2/info.txt b/test/009-instanceof2/info.txt
deleted file mode 100644
index 08127da..0000000
--- a/test/009-instanceof2/info.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-This is a miscellaneous test that was imported into the new-at-the-time
-runtime test framework. The test is intended to exercise basic features,
-and as such cannot be build on top of junit, since failure of such basic
-features might disrupt junit.
-
-TODO: Real description goes here.
diff --git a/test/018-stack-overflow/expected.txt b/test/018-stack-overflow/expected.txt
index 98b45b7..cc10c0c 100644
--- a/test/018-stack-overflow/expected.txt
+++ b/test/018-stack-overflow/expected.txt
@@ -1,3 +1,10 @@
-caught SOE in testSelfRecursion
+libartd run.
+caught SOE3 in testSelfRecursion
+caught SOE10 in testSelfRecursion
+caught SOE in testMutualRecursion
+SOE test done
+libart run.
+caught SOE3 in testSelfRecursion
+caught SOE10 in testSelfRecursion
 caught SOE in testMutualRecursion
 SOE test done
diff --git a/test/018-stack-overflow/run b/test/018-stack-overflow/run
new file mode 100755
index 0000000..1a71a1a
--- /dev/null
+++ b/test/018-stack-overflow/run
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Run normal. This will be the debug build.
+echo "libartd run."
+${RUN} "${@}"
+
+# Run non-debug.
+echo "libart run."
+${RUN} "${@/#libartd.so/libart.so}"
diff --git a/test/018-stack-overflow/src/Main.java b/test/018-stack-overflow/src/Main.java
index 41adabc..0961226 100644
--- a/test/018-stack-overflow/src/Main.java
+++ b/test/018-stack-overflow/src/Main.java
@@ -25,16 +25,38 @@
     }
 
     private static void testSelfRecursion() {
+//        try {
+//            stackOverflowTestSub0();
+//        }
+//        catch (StackOverflowError soe) {
+//            System.out.println("caught SOE0 in testSelfRecursion");
+//        }
         try {
-            stackOverflowTestSub(0.0, 0.0, 0.0);
+            stackOverflowTestSub3(0.0, 1.0, 2.0);
         }
         catch (StackOverflowError soe) {
-            System.out.println("caught SOE in testSelfRecursion");
+            System.out.println("caught SOE3 in testSelfRecursion");
+        }
+        try {
+            stackOverflowTestSub10(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+        }
+        catch (StackOverflowError soe) {
+            System.out.println("caught SOE10 in testSelfRecursion");
         }
     }
 
-    private static void stackOverflowTestSub(double pad1, double pad2, double pad3) {
-        stackOverflowTestSub(pad1, pad2, pad3);
+    private static void stackOverflowTestSub0() {
+        stackOverflowTestSub0();
+    }
+
+    private static void stackOverflowTestSub3(double pad1, double pad2, double pad3) {
+        stackOverflowTestSub3(pad1, pad2, pad3);
+    }
+
+    private static void stackOverflowTestSub10(double pad1, double pad2, double pad3, double pad4,
+                                               double pad5, double pad6, double pad7, double pad8,
+                                               double pad9, double pad10) {
+        stackOverflowTestSub10(pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10);
     }
 
     private static void testMutualRecursion() {
diff --git a/test/050-sync-test/src/Main.java b/test/050-sync-test/src/Main.java
index ec6d732..5364e2a 100644
--- a/test/050-sync-test/src/Main.java
+++ b/test/050-sync-test/src/Main.java
@@ -50,13 +50,14 @@
         one = new CpuThread(1);
         two = new CpuThread(2);
 
-        one.start();
-
-        try {
-            Thread.sleep(100);
-        } catch (InterruptedException ie) {
-            System.out.println("INTERRUPT!");
-            ie.printStackTrace();
+        synchronized (one) {
+            one.start();
+            try {
+                one.wait();
+            } catch (InterruptedException ie) {
+                System.out.println("INTERRUPT!");
+                ie.printStackTrace();
+            }
         }
 
         two.start();
@@ -101,6 +102,9 @@
         //System.out.println(Thread.currentThread().getName());
 
         synchronized (mSyncable) {
+            synchronized (this) {
+                this.notify();
+            }
             for (int i = 0; i < 10; i++) {
                 output(mNumber);
             }
diff --git a/test/080-oom-throw/src/Main.java b/test/080-oom-throw/src/Main.java
index 3ffe2f3..035690f 100644
--- a/test/080-oom-throw/src/Main.java
+++ b/test/080-oom-throw/src/Main.java
@@ -21,7 +21,7 @@
         static void blowup(char[][] holder) {
             try {
                 for (int i = 0; i < holder.length; ++i) {
-                    holder[i] = new char[128 * 1024];
+                    holder[i] = new char[1024 * 1024];
                 }
             } catch (OutOfMemoryError oome) {
                 ArrayMemEater.sawOome = true;
@@ -49,7 +49,7 @@
     }
 
     static boolean triggerArrayOOM() {
-        ArrayMemEater.blowup(new char[1 * 1024 * 1024][]);
+        ArrayMemEater.blowup(new char[128 * 1024][]);
         return ArrayMemEater.sawOome;
     }
 
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 1c3c89e..9ecc0a0 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -66,25 +66,6 @@
     test_Memory_pokeLong();
   }
 
-  /*
-   * Determine if two floating point numbers are approximately equal.
-   *
-   * (Assumes that floating point is generally working, so we can't use
-   * this for the first set of tests.)
-   */
-  static boolean approxEqual(float a, float b, float maxDelta) {
-    if (a > b)
-      return (a - b) < maxDelta;
-    else
-      return (b - a) < maxDelta;
-  }
-  static boolean approxEqual(double a, double b, double maxDelta) {
-    if (a > b)
-      return (a - b) < maxDelta;
-    else
-      return (b - a) < maxDelta;
-  }
-
   /**
    * Will test inlining Thread.currentThread().
    */
@@ -340,39 +321,59 @@
   }
 
   public static void test_Math_min_F() {
-    Assert.assertTrue(approxEqual(Math.min(0.0f, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.min(1.0f, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.min(0.0f, 1.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.min(0.0f, Float.MAX_VALUE), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.min(Float.MIN_VALUE, 0.0f), Float.MIN_VALUE, 0.001f));
-    Assert.assertTrue(approxEqual(Math.min(Float.MIN_VALUE, Float.MAX_VALUE), Float.MIN_VALUE, 0.001f));
+    Assert.assertTrue(Float.isNaN(Math.min(1.0f, Float.NaN)));
+    Assert.assertTrue(Float.isNaN(Math.min(Float.NaN, 1.0f)));
+    Assert.assertEquals(Math.min(-0.0f, 0.0f), -0.0f);
+    Assert.assertEquals(Math.min(0.0f, -0.0f), -0.0f);
+    Assert.assertEquals(Math.min(-0.0f, -0.0f), -0.0f);
+    Assert.assertEquals(Math.min(0.0f, 0.0f), 0.0f);
+    Assert.assertEquals(Math.min(1.0f, 0.0f), 0.0f);
+    Assert.assertEquals(Math.min(0.0f, 1.0f), 0.0f);
+    Assert.assertEquals(Math.min(0.0f, Float.MAX_VALUE), 0.0f);
+    Assert.assertEquals(Math.min(Float.MIN_VALUE, 0.0f), 0.0f);
+    Assert.assertEquals(Math.min(Float.MIN_VALUE, Float.MAX_VALUE), Float.MIN_VALUE);
   }
 
   public static void test_Math_max_F() {
-    Assert.assertTrue(approxEqual(Math.max(0.0f, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.max(1.0f, 0.0f), 1.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.max(0.0f, 1.0f), 1.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.max(0.0f, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
-    Assert.assertTrue(approxEqual(Math.max(Float.MIN_VALUE, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(Math.max(Float.MIN_VALUE, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
+    Assert.assertTrue(Float.isNaN(Math.max(1.0f, Float.NaN)));
+    Assert.assertTrue(Float.isNaN(Math.max(Float.NaN, 1.0f)));
+    Assert.assertEquals(Math.max(-0.0f, 0.0f), 0.0f);
+    Assert.assertEquals(Math.max(0.0f, -0.0f), 0.0f);
+    Assert.assertEquals(Math.max(-0.0f, -0.0f), -0.0f);
+    Assert.assertEquals(Math.max(0.0f, 0.0f), 0.0f);
+    Assert.assertEquals(Math.max(1.0f, 0.0f), 1.0f);
+    Assert.assertEquals(Math.max(0.0f, 1.0f), 1.0f);
+    Assert.assertEquals(Math.max(0.0f, Float.MAX_VALUE), Float.MAX_VALUE);
+    Assert.assertEquals(Math.max(Float.MIN_VALUE, 0.0f), Float.MIN_VALUE);
+    Assert.assertEquals(Math.max(Float.MIN_VALUE, Float.MAX_VALUE), Float.MAX_VALUE);
   }
 
   public static void test_Math_min_D() {
-    Assert.assertTrue(approxEqual(Math.min(0.0d, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.min(1.0d, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.min(0.0d, 1.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.min(0.0d, Double.MAX_VALUE), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.min(Double.MIN_VALUE, 0.0d), Double.MIN_VALUE, 0.001d));
-    Assert.assertTrue(approxEqual(Math.min(Double.MIN_VALUE, Double.MAX_VALUE), Double.MIN_VALUE, 0.001d));
+    Assert.assertTrue(Double.isNaN(Math.min(1.0d, Double.NaN)));
+    Assert.assertTrue(Double.isNaN(Math.min(Double.NaN, 1.0d)));
+    Assert.assertEquals(Math.min(-0.0d, 0.0d), -0.0d);
+    Assert.assertEquals(Math.min(0.0d, -0.0d), -0.0d);
+    Assert.assertEquals(Math.min(-0.0d, -0.0d), -0.0d);
+    Assert.assertEquals(Math.min(0.0d, 0.0d), 0.0d);
+    Assert.assertEquals(Math.min(1.0d, 0.0d), 0.0d);
+    Assert.assertEquals(Math.min(0.0d, 1.0d), 0.0d);
+    Assert.assertEquals(Math.min(0.0d, Double.MAX_VALUE), 0.0d);
+    Assert.assertEquals(Math.min(Double.MIN_VALUE, 0.0d), 0.0d);
+    Assert.assertEquals(Math.min(Double.MIN_VALUE, Double.MAX_VALUE), Double.MIN_VALUE);
   }
 
   public static void test_Math_max_D() {
-    Assert.assertTrue(approxEqual(Math.max(0.0d, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.max(1.0d, 0.0d), 1.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.max(0.0d, 1.0d), 1.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.max(0.0d, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
-    Assert.assertTrue(approxEqual(Math.max(Double.MIN_VALUE, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(Math.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
+    Assert.assertTrue(Double.isNaN(Math.max(1.0d, Double.NaN)));
+    Assert.assertTrue(Double.isNaN(Math.max(Double.NaN, 1.0d)));
+    Assert.assertEquals(Math.max(-0.0d, 0.0d), 0.0d);
+    Assert.assertEquals(Math.max(0.0d, -0.0d), 0.0d);
+    Assert.assertEquals(Math.max(-0.0d, -0.0d), -0.0d);
+    Assert.assertEquals(Math.max(0.0d, 0.0d), 0.0d);
+    Assert.assertEquals(Math.max(1.0d, 0.0d), 1.0d);
+    Assert.assertEquals(Math.max(0.0d, 1.0d), 1.0d);
+    Assert.assertEquals(Math.max(0.0d, Double.MAX_VALUE), Double.MAX_VALUE);
+    Assert.assertEquals(Math.max(Double.MIN_VALUE, 0.0d), Double.MIN_VALUE);
+    Assert.assertEquals(Math.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE);
   }
 
   public static void test_StrictMath_abs_I() {
@@ -431,39 +432,59 @@
   }
 
   public static void test_StrictMath_min_F() {
-    Assert.assertTrue(approxEqual(StrictMath.min(0.0f, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.min(1.0f, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.min(0.0f, 1.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.min(0.0f, Float.MAX_VALUE), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.min(Float.MIN_VALUE, 0.0f), Float.MIN_VALUE, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.min(Float.MIN_VALUE, Float.MAX_VALUE), Float.MIN_VALUE, 0.001f));
+    Assert.assertTrue(Float.isNaN(StrictMath.min(1.0f, Float.NaN)));
+    Assert.assertTrue(Float.isNaN(StrictMath.min(Float.NaN, 1.0f)));
+    Assert.assertEquals(StrictMath.min(-0.0f, 0.0f), -0.0f);
+    Assert.assertEquals(StrictMath.min(0.0f, -0.0f), -0.0f);
+    Assert.assertEquals(StrictMath.min(-0.0f, -0.0f), -0.0f);
+    Assert.assertEquals(StrictMath.min(0.0f, 0.0f), 0.0f);
+    Assert.assertEquals(StrictMath.min(1.0f, 0.0f), 0.0f);
+    Assert.assertEquals(StrictMath.min(0.0f, 1.0f), 0.0f);
+    Assert.assertEquals(StrictMath.min(0.0f, Float.MAX_VALUE), 0.0f);
+    Assert.assertEquals(StrictMath.min(Float.MIN_VALUE, 0.0f), 0.0f);
+    Assert.assertEquals(StrictMath.min(Float.MIN_VALUE, Float.MAX_VALUE), Float.MIN_VALUE);
   }
 
   public static void test_StrictMath_max_F() {
-    Assert.assertTrue(approxEqual(StrictMath.max(0.0f, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.max(1.0f, 0.0f), 1.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.max(0.0f, 1.0f), 1.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.max(0.0f, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.max(Float.MIN_VALUE, 0.0f), 0.0f, 0.001f));
-    Assert.assertTrue(approxEqual(StrictMath.max(Float.MIN_VALUE, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
+    Assert.assertTrue(Float.isNaN(StrictMath.max(1.0f, Float.NaN)));
+    Assert.assertTrue(Float.isNaN(StrictMath.max(Float.NaN, 1.0f)));
+    Assert.assertEquals(StrictMath.max(-0.0f, 0.0f), 0.0f);
+    Assert.assertEquals(StrictMath.max(0.0f, -0.0f), 0.0f);
+    Assert.assertEquals(StrictMath.max(-0.0f, -0.0f), -0.0f);
+    Assert.assertEquals(StrictMath.max(0.0f, 0.0f), 0.0f);
+    Assert.assertEquals(StrictMath.max(1.0f, 0.0f), 1.0f);
+    Assert.assertEquals(StrictMath.max(0.0f, 1.0f), 1.0f);
+    Assert.assertEquals(StrictMath.max(0.0f, Float.MAX_VALUE), Float.MAX_VALUE);
+    Assert.assertEquals(StrictMath.max(Float.MIN_VALUE, 0.0f), Float.MIN_VALUE);
+    Assert.assertEquals(StrictMath.max(Float.MIN_VALUE, Float.MAX_VALUE), Float.MAX_VALUE);
   }
 
   public static void test_StrictMath_min_D() {
-    Assert.assertTrue(approxEqual(StrictMath.min(0.0d, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.min(1.0d, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.min(0.0d, 1.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.min(0.0d, Double.MAX_VALUE), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.min(Double.MIN_VALUE, 0.0d), Double.MIN_VALUE, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.min(Double.MIN_VALUE, Double.MAX_VALUE), Double.MIN_VALUE, 0.001d));
+    Assert.assertTrue(Double.isNaN(StrictMath.min(1.0d, Double.NaN)));
+    Assert.assertTrue(Double.isNaN(StrictMath.min(Double.NaN, 1.0d)));
+    Assert.assertEquals(StrictMath.min(-0.0d, 0.0d), -0.0d);
+    Assert.assertEquals(StrictMath.min(0.0d, -0.0d), -0.0d);
+    Assert.assertEquals(StrictMath.min(-0.0d, -0.0d), -0.0d);
+    Assert.assertEquals(StrictMath.min(0.0d, 0.0d), 0.0d);
+    Assert.assertEquals(StrictMath.min(1.0d, 0.0d), 0.0d);
+    Assert.assertEquals(StrictMath.min(0.0d, 1.0d), 0.0d);
+    Assert.assertEquals(StrictMath.min(0.0d, Double.MAX_VALUE), 0.0d);
+    Assert.assertEquals(StrictMath.min(Double.MIN_VALUE, 0.0d), 0.0d);
+    Assert.assertEquals(StrictMath.min(Double.MIN_VALUE, Double.MAX_VALUE), Double.MIN_VALUE);
   }
 
   public static void test_StrictMath_max_D() {
-    Assert.assertTrue(approxEqual(StrictMath.max(0.0d, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.max(1.0d, 0.0d), 1.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.max(0.0d, 1.0d), 1.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.max(0.0d, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.max(Double.MIN_VALUE, 0.0d), 0.0d, 0.001d));
-    Assert.assertTrue(approxEqual(StrictMath.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
+    Assert.assertTrue(Double.isNaN(StrictMath.max(1.0d, Double.NaN)));
+    Assert.assertTrue(Double.isNaN(StrictMath.max(Double.NaN, 1.0d)));
+    Assert.assertEquals(StrictMath.max(-0.0d, 0.0d), 0.0d);
+    Assert.assertEquals(StrictMath.max(0.0d, -0.0d), 0.0d);
+    Assert.assertEquals(StrictMath.max(-0.0d, -0.0d), -0.0d);
+    Assert.assertEquals(StrictMath.max(0.0d, 0.0d), 0.0d);
+    Assert.assertEquals(StrictMath.max(1.0d, 0.0d), 1.0d);
+    Assert.assertEquals(StrictMath.max(0.0d, 1.0d), 1.0d);
+    Assert.assertEquals(StrictMath.max(0.0d, Double.MAX_VALUE), Double.MAX_VALUE);
+    Assert.assertEquals(StrictMath.max(Double.MIN_VALUE, 0.0d), Double.MIN_VALUE);
+    Assert.assertEquals(StrictMath.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE);
   }
 
   public static void test_Float_floatToRawIntBits() {
diff --git a/test/083-compiler-regressions/expected.txt b/test/083-compiler-regressions/expected.txt
index 10406c7..9f57dbd 100644
--- a/test/083-compiler-regressions/expected.txt
+++ b/test/083-compiler-regressions/expected.txt
@@ -14,6 +14,7 @@
 false
 b13679511Test finishing
 b16177324TestWrapper caught NPE as expected.
+b16230771TestWrapper caught NPE as expected.
 largeFrame passes
 largeFrameFloat passes
 mulBy1Test passes
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 18bc674..748b0de 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -36,6 +36,7 @@
         b5884080Test();
         b13679511Test();
         b16177324TestWrapper();
+        b16230771TestWrapper();
         largeFrameTest();
         largeFrameTestFloat();
         mulBy1Test();
@@ -927,6 +928,28 @@
       System.out.println("Unexpectedly retrieved all values: " + v1 + ", " + v2 + ", " + v3);
     }
 
+    static void b16230771TestWrapper() {
+      try {
+        b16230771Test();
+      } catch (NullPointerException expected) {
+        System.out.println("b16230771TestWrapper caught NPE as expected.");
+      }
+    }
+
+    static void b16230771Test() {
+      Integer[] array = { null };
+      for (Integer i : array) {
+        try {
+          int value = i;  // Null check on unboxing should fail.
+          System.out.println("Unexpectedly retrieved value " + value);
+        } catch (NullPointerException e) {
+          int value = i;  // Null check on unboxing should fail.
+          // The bug was a missing null check, so this would actually cause SIGSEGV.
+          System.out.println("Unexpectedly retrieved value " + value + " in NPE catch handler");
+        }
+      }
+    }
+
     static double TooManyArgs(
           long l00,
           long l01,
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index bed0689..1af4121 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -32,7 +32,7 @@
 62 (class java.lang.Long)
 14 (class java.lang.Short)
 [public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final char[] java.lang.String.value, private final int java.lang.String.count, private int java.lang.String.hashCode, private final int java.lang.String.offset, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
+[private final char[] java.lang.String.value, private final int java.lang.String.count, private int java.lang.String.hashCode, private final int java.lang.String.offset, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final long java.lang.String.serialVersionUID, private static final char java.lang.String.REPLACEMENT_CHAR]
 [void java.lang.String._getChars(int,int,char[],int), public char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public volatile int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private char java.lang.String.foldCase(char), public static transient java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static transient java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
diff --git a/test/114-ParallelGC/expected.txt b/test/114-ParallelGC/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/114-ParallelGC/expected.txt
diff --git a/test/114-ParallelGC/info.txt b/test/114-ParallelGC/info.txt
new file mode 100644
index 0000000..246b4e6
--- /dev/null
+++ b/test/114-ParallelGC/info.txt
@@ -0,0 +1 @@
+Imported from oat tests. Allocates and frees objects with multiple threads.
diff --git a/test/ParallelGC/ParallelGC.java b/test/114-ParallelGC/src/Main.java
similarity index 84%
rename from test/ParallelGC/ParallelGC.java
rename to test/114-ParallelGC/src/Main.java
index eb9e04e..fb110bd 100644
--- a/test/ParallelGC/ParallelGC.java
+++ b/test/114-ParallelGC/src/Main.java
@@ -17,11 +17,11 @@
 import java.util.ArrayList;
 import java.util.List;
 
-class ParallelGC implements Runnable {
+public class Main implements Runnable {
     public static void main(String[] args) throws Exception {
         Thread[] threads = new Thread[16];
         for (int i = 0; i < threads.length; i++) {
-            threads[i] = new Thread(new ParallelGC(i));
+            threads[i] = new Thread(new Main(i));
         }
         for (Thread thread : threads) {
             thread.start();
@@ -33,15 +33,14 @@
 
     private final int id;
 
-    private ParallelGC(int id) {
+    private Main(int id) {
         this.id = id;
     }
 
     public void run() {
         List l = new ArrayList();
-        for (int i = 0; i < 1000; i++) {
+        for (int i = 0; i < 500; i++) {
             l.add(new ArrayList(i));
-            System.out.print(id);
         }
     }
 }
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
index a5192e1..2c6d1c2 100644
--- a/test/401-optimizing-compiler/src/Main.java
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -75,6 +75,37 @@
     if (m.$opt$TestReturnNewObject(m) == m) {
       throw new Error("Unexpected value returned");
     }
+
+    // Loop enough iterations to hope for a crash if no write barrier
+    // is emitted.
+    for (int j = 0; j < 3; j++) {
+      Main m1 = new Main();
+      $opt$SetFieldInOldObject(m1);
+      for (int i = 0; i < 1000; ++i) {
+        Object o = new byte[1024];
+      }
+    }
+
+    // Test that we do NPE checks on invokedirect.
+    Exception exception = null;
+    try {
+      invokePrivate();
+    } catch (NullPointerException e) {
+      exception = e;
+    }
+
+    if (exception == null) {
+      throw new Error("Missing NullPointerException");
+    }
+  }
+
+  public static void invokePrivate() {
+    Main m = null;
+    m.privateMethod();
+  }
+
+  private void privateMethod() {
+    Object o = new Object();
   }
 
   static int $opt$TestInvokeIntParameter(int param) {
@@ -169,4 +200,10 @@
   public static void throwStaticMethod() {
     throw new Error("Error");
   }
+
+  public static void $opt$SetFieldInOldObject(Main m) {
+    m.o = new Main();
+  }
+
+  Object o;
 }
diff --git a/test/700-LoadArgRegs/expected.txt b/test/700-LoadArgRegs/expected.txt
index 4977df6..c0d5eee 100644
--- a/test/700-LoadArgRegs/expected.txt
+++ b/test/700-LoadArgRegs/expected.txt
@@ -74,3 +74,4 @@
 -91, -92, -93, -94, -95, -96, -97, -98, -99
 -1, -91, -92, -93, -94, -95, -96, -97, -98, -99
 1, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 2, 3, 4, 5, 6
+1234605616436508552, -8613303245920329199, 1234605616436508552
diff --git a/test/700-LoadArgRegs/src/Main.java b/test/700-LoadArgRegs/src/Main.java
index 0e6de73..4649d05 100644
--- a/test/700-LoadArgRegs/src/Main.java
+++ b/test/700-LoadArgRegs/src/Main.java
@@ -274,6 +274,14 @@
     System.out.println(i1+", "+d1+", "+d2+", "+d3+", "+d4+", "+d5+", "+d6+", "+d7+", "+d8+", "+d9+", "+i2+", "+i3+", "+i4+", "+i5+", "+i6);
   }
 
+  static void testRefs1(Object o1, Object o2, Object o3, Object o4, Object o5, long l1, long l2, long l3) {
+    System.out.println(l1 + ", " + l2 + ", " + l3);
+  }
+
+  static void testRefs(Object o1, Object o2, Object o3, Object o4, Object o5, long l1, long l2, long l3) {
+    testRefs1(o1, o2, o3, o4, o5, l1, l2, l3);
+  }
+
   static public void main(String[] args) throws Exception {
     testI();
     testB();
@@ -288,5 +296,8 @@
     testLL();
 
     testMore(1, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 2, 3, 4, 5, 6);
+
+    Object obj = new Object();
+    testRefs(obj, obj, obj, obj, obj, 0x1122334455667788L, 0x8877665544332211L, 0x1122334455667788L);
   }
 }
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index bf3e2aa..f3563a4 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -19,11 +19,11 @@
 include art/build/Android.common_build.mk
 
 LIBARTTEST_COMMON_SRC_FILES := \
-  JniTest/jni_test.cc \
-  SignalTest/signaltest.cc \
-  ReferenceMap/stack_walk_refmap_jni.cc \
-  StackWalk/stack_walk_jni.cc \
-  UnsafeTest/unsafe_test.cc
+  004-JniTest/jni_test.cc \
+  004-SignalTest/signaltest.cc \
+  004-ReferenceMap/stack_walk_refmap_jni.cc \
+  004-StackWalk/stack_walk_jni.cc \
+  004-UnsafeTest/unsafe_test.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ifdef TARGET_2ND_ARCH
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
deleted file mode 100644
index 2b142db..0000000
--- a/test/Android.oat.mk
+++ /dev/null
@@ -1,456 +0,0 @@
-# Copyright (C) 2011 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOCAL_PATH := $(call my-dir)
-LOCAL_PID := $(shell echo $$PPID)
-
-include art/build/Android.common_test.mk
-
-########################################################################
-
-# Subdirectories in art/test which contain dex files used as inputs for oat tests. Declare the
-# simplest tests (Main, HelloWorld) first, the rest are alphabetical.
-TEST_OAT_DIRECTORIES := \
-  Main \
-  HelloWorld \
-  InterfaceTest \
-  JniTest \
-  SignalTest \
-  NativeAllocations \
-  ParallelGC \
-  ReferenceMap \
-  StackWalk \
-  ThreadStress \
-  UnsafeTest
-
-# TODO: Enable when the StackWalk2 tests are passing
-#  StackWalk2 \
-
-# Create build rules for each dex file recording the dependency.
-$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval $(call build-art-test-dex,art-oat-test,$(dir), \
-  $(ART_TARGET_TEST_OUT),$(LOCAL_PATH)/Android.oat.mk,ART_TEST_TARGET_OAT_$(dir)_DEX, \
-  ART_TEST_HOST_OAT_$(dir)_DEX)))
-
-########################################################################
-
-include $(LOCAL_PATH)/Android.libarttest.mk
-
-ART_TEST_TARGET_OAT_DEFAULT$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_DEFAULT$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_DEFAULT_RULES :=
-ART_TEST_TARGET_OAT_OPTIMIZING$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_OPTIMIZING$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_OPTIMIZING_RULES :=
-ART_TEST_TARGET_OAT_INTERPRETER$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_INTERPRETER$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_INTERPRETER_RULES :=
-ART_TEST_TARGET_OAT$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_RULES :=
-
-# We need dex2oat and dalvikvm on the target as well as the core image.
-TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUT) $(2ND_TARGET_CORE_IMG_OUT) $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
-ifdef TARGET_2ND_ARCH
-TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libarttest.so
-endif
-
-# Define rule to run an individual oat test on the host. Output from the test is written to the
-# host in /tmp/android-data in a directory named after test's rule name (its target) and the parent
-# process' PID (ie the PID of make). On failure the output is dumped to the console. To test for
-# success on the target device a file is created following a successful test and this is pulled
-# onto the host. If the pull fails then the file wasn't created because the test failed.
-# $(1): directory - the name of the test we're building such as HelloWorld.
-# $(2): 2ND_ or undefined - used to differentiate between the primary and secondary architecture.
-# $(3): the target (rule name), e.g. test-art-target-oat-default-HelloWorld64
-# $(4): -Xint or undefined - do we want to run with the interpreter or default.
-define define-test-art-oat-rule-target
-  # Add the test dependencies to test-art-target-sync, which will be a prerequisite for the test
-  # to ensure files are pushed to the device.
-  TEST_ART_TARGET_SYNC_DEPS += $$(ART_TEST_TARGET_OAT_$(1)_DEX)
-
-.PHONY: $(3)
-$(3): test-art-target-sync
-	$(hide) mkdir -p $(ART_HOST_TEST_DIR)/android-data-$$@
-	$(hide) echo Running: $$@
-	$(hide) adb shell touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$(LOCAL_PID)
-	$(hide) adb shell rm $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$(LOCAL_PID)
-	$(hide) $$(call ART_TEST_SKIP,$$@) && \
-	  adb shell "/system/bin/dalvikvm$($(2)ART_PHONY_TEST_TARGET_SUFFIX) \
-	    $(DALVIKVM_FLAGS) $(4) -XXlib:libartd.so -Ximage:$(ART_TARGET_TEST_DIR)/core.art \
-	    -classpath $(ART_TARGET_TEST_DIR)/art-oat-test-$(1).jar \
-	    -Djava.library.path=$(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH) $(1) \
-	      && touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$(LOCAL_PID)" \
-	        > $(ART_HOST_TEST_DIR)/android-data-$$@/output.txt 2>&1 && \
-	  (adb pull $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$(LOCAL_PID) $(ART_HOST_TEST_DIR)/android-data-$$@ \
-	    && $$(call ART_TEST_PASSED,$$@)) \
-	    || (([ ! -f $(ART_HOST_TEST_DIR)/android-data-$$@/output.txt ] || \
-	         cat $(ART_HOST_TEST_DIR)/android-data-$$@/output.txt) && $$(call ART_TEST_FAILED,$$@))
-	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
-	  echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \
-	  rm -r $(ART_HOST_TEST_DIR)) || true
-
-endef  # define-test-art-oat-rule-target
-
-# Define rules to run oat tests on the target.
-# $(1): directory - the name of the test we're building such as HelloWorld.
-# $(2): 2ND_ or undefined - used to differentiate between the primary and secondary architecture.
-define define-test-art-oat-rules-target
-  # Define a phony rule to run a target oat test using the default compiler.
-  default_test_rule := test-art-target-oat-default-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
-  $(call define-test-art-oat-rule-target,$(1),$(2),$$(default_test_rule),)
-
-  ART_TEST_TARGET_OAT_DEFAULT$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += $$(default_test_rule)
-  ART_TEST_TARGET_OAT_DEFAULT_RULES += $$(default_test_rule)
-  ART_TEST_TARGET_OAT_DEFAULT_$(1)_RULES += $$(default_test_rule)
-
-  optimizing_test_rule := test-art-target-oat-optimizing-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
-  $(call define-test-art-oat-rule-target,$(1),$(2),$$(optimizing_test_rule), \
-    -Xcompiler-option --compiler-backend=Optimizing)
-
-  ART_TEST_TARGET_OAT_OPTIMIZING$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += $$(optimizing_test_rule)
-  ART_TEST_TARGET_OAT_OPTIMIZING_RULES += $$(optimizing_test_rule)
-  ART_TEST_TARGET_OAT_OPTIMIZING_$(1)_RULES += $$(optimizing_test_rule)
-
-  # Define a phony rule to run a target oat test using the interpeter.
-  interpreter_test_rule := test-art-target-oat-interpreter-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
-  $(call define-test-art-oat-rule-target,$(1),$(2),$$(interpreter_test_rule),-Xint)
-
-  ART_TEST_TARGET_OAT_INTERPRETER$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += $$(interpreter_test_rule)
-  ART_TEST_TARGET_OAT_INTERPRETER_RULES += $$(interpreter_test_rule)
-  ART_TEST_TARGET_OAT_INTERPRETER_$(1)_RULES += $$(interpreter_test_rule)
-
-  # Define a phony rule to run both the default and interpreter variants.
-  all_test_rule :=  test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
-.PHONY: $$(all_test_rule)
-$$(all_test_rule): $$(default_test_rule) $$(optimizing_test_rule) $$(interpreter_test_rule)
-	$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
-
-  ART_TEST_TARGET_OAT$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += $$(all_test_rule)
-  ART_TEST_TARGET_OAT_RULES += $$(all_test_rule)
-  ART_TEST_TARGET_OAT_$(1)_RULES += $$(all_test_rule)
-
-  # Clear locally defined variables.
-  interpreter_test_rule :=
-  default_test_rule :=
-  optimizing_test_rule :=
-  all_test_rule :=
-endef  # define-test-art-oat-rules-target
-
-ART_TEST_HOST_OAT_DEFAULT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_DEFAULT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_DEFAULT_RULES :=
-ART_TEST_HOST_OAT_OPTIMIZING$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_OPTIMIZING$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_OPTIMIZING_RULES :=
-ART_TEST_HOST_OAT_INTERPRETER$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_INTERPRETER$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_INTERPRETER_RULES :=
-ART_TEST_HOST_OAT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_RULES :=
-
-# All tests require the host executables, libarttest and the core images.
-ART_TEST_HOST_OAT_DEPENDENCIES := \
-  $(ART_HOST_EXECUTABLES) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
-  $(HOST_CORE_IMG_OUT)
-
-ifneq ($(HOST_PREFER_32_BIT),true)
-ART_TEST_HOST_OAT_DEPENDENCIES += \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_HOST_CORE_IMG_OUT)
-endif
-
-# Define rule to run an individual oat test on the host. Output from the test is written to the
-# host in /tmp/android-data in a directory named after test's rule name (its target) and the parent
-# process' PID (ie the PID of make). On failure the output is dumped to the console.
-# $(1): directory - the name of the test we're building such as HelloWorld.
-# $(2): 2ND_ or undefined - used to differentiate between the primary and secondary architecture.
-# $(3): the target (rule name), e.g. test-art-host-oat-default-HelloWorld64
-# $(4): argument to dex2oat
-# $(5): argument to runtime, e.g. -Xint or undefined
-define define-test-art-oat-rule-host
-  # Remove the leading / from /tmp for the test directory.
-  dex_file := $$(subst /tmp,tmp,$(ART_HOST_TEST_DIR))/android-data-$(3)/oat-test-dex-$(1).jar
-  oat_file := $(ART_HOST_TEST_DIR)/android-data-$(3)/dalvik-cache/$$($(2)HOST_ARCH)/$$(subst /,@,$$(dex_file))@classes.dex
-$(3): PRIVATE_DEX_FILE := /$$(dex_file)
-$(3): PRIVATE_OAT_FILE := $$(oat_file)
-.PHONY: $(3)
-$(3): $$(ART_TEST_HOST_OAT_$(1)_DEX) $(ART_TEST_HOST_OAT_DEPENDENCIES)
-	$(hide) mkdir -p $(ART_HOST_TEST_DIR)/android-data-$$@/dalvik-cache/$$($(2)HOST_ARCH)
-	$(hide) cp $$(realpath $$<) $(ART_HOST_TEST_DIR)/android-data-$$@/oat-test-dex-$(1).jar
-	$(hide) $(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) $(4) \
-	  --boot-image=$$(HOST_CORE_IMG_LOCATION) \
-	  --dex-file=$$(PRIVATE_DEX_FILE) --oat-file=$$(PRIVATE_OAT_FILE) \
-	  --instruction-set=$($(2)ART_HOST_ARCH) --host --android-root=$(HOST_OUT) \
-	  || $$(call ART_TEST_FAILED,$$@)
-	$(hide) $$(call ART_TEST_SKIP,$$@) && \
-	ANDROID_DATA=$(ART_HOST_TEST_DIR)/android-data-$$@/ \
-	ANDROID_ROOT=$(HOST_OUT) \
-	ANDROID_LOG_TAGS='*:d' \
-	LD_LIBRARY_PATH=$$($(2)ART_HOST_OUT_SHARED_LIBRARIES) \
-	LD_PRELOAD=libsigchain$$(ART_HOST_SHLIB_EXTENSION) \
-	$(HOST_OUT_EXECUTABLES)/dalvikvm$$($(2)ART_PHONY_TEST_HOST_SUFFIX) $(DALVIKVM_FLAGS) $(5) \
-	    -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$$(HOST_CORE_IMG_LOCATION) \
-	    -classpath $(ART_HOST_TEST_DIR)/android-data-$$@/oat-test-dex-$(1).jar \
-	    -Djava.library.path=$$($(2)ART_HOST_OUT_SHARED_LIBRARIES) $(1) \
-	      > $(ART_HOST_TEST_DIR)/android-data-$$@/output.txt 2>&1 \
-	  && $$(call ART_TEST_PASSED,$$@) \
-	  || (([ ! -f $(ART_HOST_TEST_DIR)/android-data-$$@/output.txt ] || \
-	       cat $(ART_HOST_TEST_DIR)/android-data-$$@/output.txt) && $$(call ART_TEST_FAILED,$$@))
-	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
-	  echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \
-	  rm -r $(ART_HOST_TEST_DIR)) || true
-endef  # define-test-art-oat-rule-host
-
-# Define rules to run oat tests on the host.
-# $(1): directory - the name of the test we're building such as HelloWorld.
-# $(2): 2ND_ or undefined - used to differentiate between the primary and secondary architecture.
-define define-test-art-oat-rules-host
-  # Create a rule to run the host oat test with the default compiler.
-  default_test_rule := test-art-host-oat-default-$(1)$$($(2)ART_PHONY_TEST_HOST_SUFFIX)
-  $(call define-test-art-oat-rule-host,$(1),$(2),$$(default_test_rule),,)
-
-  ART_TEST_HOST_OAT_DEFAULT$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(default_test_rule)
-  ART_TEST_HOST_OAT_DEFAULT_RULES += $$(default_test_rule)
-  ART_TEST_HOST_OAT_DEFAULT_$(1)_RULES += $$(default_test_rule)
-
-  # Create a rule to run the host oat test with the optimizing compiler.
-  optimizing_test_rule := test-art-host-oat-optimizing-$(1)$$($(2)ART_PHONY_TEST_HOST_SUFFIX)
-  $(call define-test-art-oat-rule-host,$(1),$(2),$$(optimizing_test_rule),--compiler-backend=Optimizing,)
-
-  ART_TEST_HOST_OAT_OPTIMIZING$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(optimizing_test_rule)
-  ART_TEST_HOST_OAT_OPTIMIZING_RULES += $$(optimizing_test_rule)
-  ART_TEST_HOST_OAT_OPTIMIZING_$(1)_RULES += $$(optimizing_test_rule)
-
-  # Create a rule to run the host oat test with the interpreter.
-  interpreter_test_rule := test-art-host-oat-interpreter-$(1)$$($(2)ART_PHONY_TEST_HOST_SUFFIX)
-  $(call define-test-art-oat-rule-host,$(1),$(2),$$(interpreter_test_rule),--compiler-filter=interpret-only,-Xint)
-
-  ART_TEST_HOST_OAT_INTERPRETER$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(interpreter_test_rule)
-  ART_TEST_HOST_OAT_INTERPRETER_RULES += $$(interpreter_test_rule)
-  ART_TEST_HOST_OAT_INTERPRETER_$(1)_RULES += $$(interpreter_test_rule)
-
-  # Define a phony rule to run both the default and interpreter variants.
-  all_test_rule :=  test-art-host-oat-$(1)$$($(2)ART_PHONY_TEST_HOST_SUFFIX)
-.PHONY: $$(all_test_rule)
-$$(all_test_rule): $$(default_test_rule) $$(interpreter_test_rule) $$(optimizing_test_rule)
-	$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
-
-  ART_TEST_HOST_OAT$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(all_test_rule)
-  ART_TEST_HOST_OAT_RULES += $$(all_test_rule)
-  ART_TEST_HOST_OAT_$(1)_RULES += $$(all_test_rule)
-
-  # Clear locally defined variables.
-  default_test_rule :=
-  optimizing_test_rule :=
-  interpreter_test_rule :=
-  all_test_rule :=
-endef  # define-test-art-oat-rules-host
-
-# For a given test create all the combinations of host/target, compiler and suffix such as:
-# test-art-host-oat-HelloWord or test-art-target-oat-interpreter-HelloWorld64
-# $(1): test name, e.g. HelloWorld
-# $(2): host or target
-# $(3): HOST or TARGET
-# $(4): undefined, -default, -optimizing or -interpreter
-# $(5): undefined, _DEFAULT, _OPTIMIZING or _INTERPRETER
-define define-test-art-oat-combination-for-test
-  ifeq ($(2),host)
-    ifneq ($(3),HOST)
-      $$(error argument mismatch $(2) and ($3))
-    endif
-  else
-    ifneq ($(2),target)
-      $$(error found $(2) expected host or target)
-    endif
-    ifneq ($(3),TARGET)
-      $$(error argument mismatch $(2) and ($3))
-    endif
-  endif
-
-  rule_name := test-art-$(2)-oat$(4)-$(1)
-  dependencies := $$(ART_TEST_$(3)_OAT$(5)_$(1)_RULES)
-
-  ifeq ($$(dependencies),)
-    ifneq ($(4),-optimizing)
-      $$(error $$(rule_name) has no dependencies)
-    endif
-  endif
-
-.PHONY: $$(rule_name)
-$$(rule_name): $$(dependencies)
-	$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
-
-  # Clear locally defined variables.
-  rule_name :=
-  dependencies :=
-endef  # define-test-art-oat-combination
-
-# Define target and host oat test rules for the differing multilib flavors and default vs
-# interpreter runs. The format of the generated rules (for running an individual test) is:
-#   test-art-(host|target)-oat-(default|interpreter)-${directory}(32|64)
-# The rules are appended to various lists to enable shorter phony build rules to be built.
-# $(1): directory
-define define-test-art-oat-rules
-  # Define target tests.
-  ART_TEST_TARGET_OAT_DEFAULT_$(1)_RULES :=
-  ART_TEST_TARGET_OAT_OPTIMIZING_$(1)_RULES :=
-  ART_TEST_TARGET_OAT_INTERPRETER_$(1)_RULES :=
-  ART_TEST_TARGET_OAT_$(1)_RULES :=
-  $(call define-test-art-oat-rules-target,$(1),)
-  ifdef TARGET_2ND_ARCH
-    $(call define-test-art-oat-rules-target,$(1),2ND_)
-  endif
-  $(call define-test-art-oat-combination-for-test,$(1),target,TARGET,,))
-  $(call define-test-art-oat-combination-for-test,$(1),target,TARGET,-default,_DEFAULT))
-  $(call define-test-art-oat-combination-for-test,$(1),target,TARGET,-optimizing,_OPTIMIZING))
-  $(call define-test-art-oat-combination-for-test,$(1),target,TARGET,-interpreter,_INTERPRETER))
-
-  # Define host tests.
-  ART_TEST_HOST_OAT_DEFAULT_$(1)_RULES :=
-  ART_TEST_HOST_OAT_OPTIMIZING_$(1)_RULES :=
-  ART_TEST_HOST_OAT_INTERPRETER_$(1)_RULES :=
-  ART_TEST_HOST_OAT_$(1)_RULES :=
-  $(call define-test-art-oat-rules-host,$(1),)
-  ifneq ($(HOST_PREFER_32_BIT),true)
-    $(call define-test-art-oat-rules-host,$(1),2ND_)
-  endif
-  $(call define-test-art-oat-combination-for-test,$(1),host,HOST,,)
-  $(call define-test-art-oat-combination-for-test,$(1),host,HOST,-default,_DEFAULT)
-  $(call define-test-art-oat-combination-for-test,$(1),host,HOST,-optimizing,_OPTIMIZING)
-  $(call define-test-art-oat-combination-for-test,$(1),host,HOST,-interpreter,_INTERPRETER)
-
-  # Clear locally defined variables.
-  ART_TEST_TARGET_OAT_DEFAULT_$(1)_RULES :=
-  ART_TEST_TARGET_OAT_OPTIMIZING_$(1)_RULES :=
-  ART_TEST_TARGET_OAT_INTERPRETER_$(1)_RULES :=
-  ART_TEST_TARGET_OAT_$(1)_RULES :=
-  ART_TEST_HOST_OAT_DEFAULT_$(1)_RULES :=
-  ART_TEST_HOST_OAT_OPTIMIZING_$(1)_RULES :=
-  ART_TEST_HOST_OAT_INTERPRETER_$(1)_RULES :=
-  ART_TEST_HOST_OAT_$(1)_RULES :=
-endef  # define-test-art-oat-rules
-$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval $(call define-test-art-oat-rules,$(dir))))
-
-# Define all the combinations of host/target, compiler and suffix such as:
-# test-art-host-oat or test-art-target-oat-interpreter64
-# $(1): host or target
-# $(2): HOST or TARGET
-# $(3): undefined, -default, -optimizing or -interpreter
-# $(4): undefined, _DEFAULT, _OPTIMIZING or _INTERPRETER
-# $(5): undefined, 32 or 64
-define define-test-art-oat-combination
-  ifeq ($(1),host)
-    ifneq ($(2),HOST)
-      $$(error argument mismatch $(1) and ($2))
-    endif
-  else
-    ifneq ($(1),target)
-      $$(error found $(1) expected host or target)
-    endif
-    ifneq ($(2),TARGET)
-      $$(error argument mismatch $(1) and ($2))
-    endif
-  endif
-
-  rule_name := test-art-$(1)-oat$(3)$(5)
-  dependencies := $$(ART_TEST_$(2)_OAT$(4)$(5)_RULES)
-
-  ifeq ($$(dependencies),)
-    ifneq ($(3),-optimizing)
-      $$(error $$(rule_name) has no dependencies)
-    endif
-  endif
-
-.PHONY: $$(rule_name)
-$$(rule_name): $$(dependencies)
-	$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
-
-  # Clear locally defined variables.
-  rule_name :=
-  dependencies :=
-
-endef  # define-test-art-oat-combination
-
-$(eval $(call define-test-art-oat-combination,target,TARGET,,,))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-default,_DEFAULT,))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-optimizing,_OPTIMIZING,))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-interpreter,_INTERPRETER,))
-$(eval $(call define-test-art-oat-combination,target,TARGET,,,$(ART_PHONY_TEST_TARGET_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-default,_DEFAULT,$(ART_PHONY_TEST_TARGET_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-optimizing,_OPTIMIZING,$(ART_PHONY_TEST_TARGET_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-interpreter,_INTERPRETER,$(ART_PHONY_TEST_TARGET_SUFFIX)))
-ifdef TARGET_2ND_ARCH
-$(eval $(call define-test-art-oat-combination,target,TARGET,,,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-default,_DEFAULT,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-optimizing,_OPTIMIZING,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,target,TARGET,-interpreter,_INTERPRETER,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
-endif
-
-$(eval $(call define-test-art-oat-combination,host,HOST,,,))
-$(eval $(call define-test-art-oat-combination,host,HOST,-default,_DEFAULT,))
-$(eval $(call define-test-art-oat-combination,host,HOST,-optimizing,_OPTIMIZING,))
-$(eval $(call define-test-art-oat-combination,host,HOST,-interpreter,_INTERPRETER,))
-$(eval $(call define-test-art-oat-combination,host,HOST,,,$(ART_PHONY_TEST_HOST_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,host,HOST,-default,_DEFAULT,$(ART_PHONY_TEST_HOST_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,host,HOST,-optimizing,_OPTIMIZING,$(ART_PHONY_TEST_HOST_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,host,HOST,-interpreter,_INTERPRETER,$(ART_PHONY_TEST_HOST_SUFFIX)))
-ifneq ($(HOST_PREFER_32_BIT),true)
-$(eval $(call define-test-art-oat-combination,host,HOST,,,$(2ND_ART_PHONY_TEST_HOST_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,host,HOST,-default,_DEFAULT,$(2ND_ART_PHONY_TEST_HOST_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,host,HOST,-optimizing,_OPTIMIZING,$(2ND_ART_PHONY_TEST_HOST_SUFFIX)))
-$(eval $(call define-test-art-oat-combination,host,HOST,-interpreter,_INTERPRETER,$(2ND_ART_PHONY_TEST_HOST_SUFFIX)))
-endif
-
-# Clear locally defined variables.
-define-test-art-oat-rule-target :=
-define-test-art-oat-rules-target :=
-define-test-art-oat-rule-host :=
-define-test-art-oat-rules-host :=
-define-test-art-oat-combination-for-test :=
-define-test-art-oat-combination :=
-ART_TEST_TARGET_OAT_DEFAULT$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_DEFAULT$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_DEFAULT_RULES :=
-ART_TEST_TARGET_OAT_OPTIMIZING$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_OPTIMIZING$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_OPTIMIZING_RULES :=
-ART_TEST_TARGET_OAT_INTERPRETER$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_INTERPRETER$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_INTERPRETER_RULES :=
-ART_TEST_TARGET_OAT$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
-ART_TEST_TARGET_OAT_RULES :=
-ART_TEST_HOST_OAT_DEFAULT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_DEFAULT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_DEFAULT_RULES :=
-ART_TEST_HOST_OAT_OPTIMIZING$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_OPTIMIZING$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_OPTIMIZING_RULES :=
-ART_TEST_HOST_OAT_INTERPRETER$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_INTERPRETER$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_INTERPRETER_RULES :=
-ART_TEST_HOST_OAT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
-ART_TEST_HOST_OAT_RULES :=
-ART_TEST_HOST_OAT_DEPENDENCIES :=
-$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval ART_TEST_TARGET_OAT_$(dir)_DEX :=))
-$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval ART_TEST_HOST_OAT_$(dir)_DEX :=))
-TEST_OAT_DIRECTORIES :=
-LOCAL_PID :=
-LOCAL_PATH :=
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 78312d1..ac47da6 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -21,22 +21,23 @@
 TEST_ART_RUN_TESTS := $(wildcard $(LOCAL_PATH)/[0-9]*)
 TEST_ART_RUN_TESTS := $(subst $(LOCAL_PATH)/,, $(TEST_ART_RUN_TESTS))
 
-# List all the test names for host and target excluding the -trace suffix
+# List all the test names for host and target and compiler variants.
 # $(1): test name, e.g. 003-omnibus-opcodes
-# $(2): undefined or -trace
+# $(2): undefined, -trace, -gcverify or -gcstress
+# $(3): -relocate, -norelocate, -no-prebuild, or undefined.
 define all-run-test-names
-  test-art-host-run-test$(2)-default-$(1)32 \
-  test-art-host-run-test$(2)-optimizing-$(1)32 \
-  test-art-host-run-test$(2)-interpreter-$(1)32 \
-  test-art-host-run-test$(2)-default-$(1)64 \
-  test-art-host-run-test$(2)-optimizing-$(1)64 \
-  test-art-host-run-test$(2)-interpreter-$(1)64 \
-  test-art-target-run-test$(2)-default-$(1)32 \
-  test-art-target-run-test$(2)-optimizing-$(1)32 \
-  test-art-target-run-test$(2)-interpreter-$(1)32 \
-  test-art-target-run-test$(2)-default-$(1)64 \
-  test-art-target-run-test$(2)-optimizing-$(1)64 \
-  test-art-target-run-test$(2)-interpreter-$(1)64
+  test-art-host-run-test$(2)-default$(3)-$(1)32 \
+  test-art-host-run-test$(2)-optimizing$(3)-$(1)32 \
+  test-art-host-run-test$(2)-interpreter$(3)-$(1)32 \
+  test-art-host-run-test$(2)-default$(3)-$(1)64 \
+  test-art-host-run-test$(2)-optimizing$(3)-$(1)64 \
+  test-art-host-run-test$(2)-interpreter$(3)-$(1)64 \
+  test-art-target-run-test$(2)-default$(3)-$(1)32 \
+  test-art-target-run-test$(2)-optimizing$(3)-$(1)32 \
+  test-art-target-run-test$(2)-interpreter$(3)-$(1)32 \
+  test-art-target-run-test$(2)-default$(3)-$(1)64 \
+  test-art-target-run-test$(2)-optimizing$(3)-$(1)64 \
+  test-art-target-run-test$(2)-interpreter$(3)-$(1)64
 endef  # all-run-test-names
 
 # Tests that are timing sensitive and flaky on heavily loaded systems.
@@ -46,23 +47,48 @@
 
  # disable timing sensitive tests on "dist" builds.
 ifdef dist_goal
-  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),))
-  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-trace))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),,))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcverify,))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),,-relocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-relocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcverify,-relocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-relocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),,-norelocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-norelocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcverify,-norelocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-norelocate))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),,-prebuild))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-prebuild))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcverify,-prebuild))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-prebuild))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),,-no-prebuild))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-no-prebuild))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcverify,-no-prebuild))
+  ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-no-prebuild))
 endif
 
 # Tests that are broken in --trace mode.
 TEST_ART_BROKEN_TRACE_RUN_TESTS := \
   003-omnibus-opcodes \
-  004-annotations \
+  004-InterfaceTest \
+  004-SignalTest \
+  004-ThreadStress \
+  005-annotations \
+  012-math \
   018-stack-overflow \
   023-many-interfaces \
+  027-arithmetic \
   031-class-attributes \
   037-inherit \
   044-proxy \
   046-reflect \
   051-thread \
   055-enum-performance \
+  062-character-encodings \
   064-field-access \
+  074-gc-thrash \
   078-polymorphic-virtual \
   080-oom-throw \
   082-inline-execute \
@@ -74,9 +100,27 @@
   103-string-append \
   107-int-math2 \
   112-double-math \
+  114-ParallelGC \
+  700-LoadArgRegs \
   701-easy-div-rem
 
-ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-relocate))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-no-prebuild))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-prebuild))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-norelocate))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,))
+
+# Tests that need more than 2MB of RAM or are running into other corner cases in GC stress related
+# to OOMEs.
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
+  114-ParallelGC
+
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-relocate))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-no-prebuild))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-prebuild))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,-norelocate))
+ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(call all-run-test-names,$(test),-gcstress,))
+
 
 # The path where build only targets will be output, e.g.
 # out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA
@@ -111,9 +155,13 @@
 include $(BUILD_PHONY_PACKAGE)
 
 # Clear temp vars.
-TEST_ART_RUN_TEST_BUILD_RULES :=
+all-run-test-names :=
 art_run_tests_dir :=
 define-build-art-run-test :=
+TEST_ART_RUN_TEST_BUILD_RULES :=
+TEST_ART_TIMING_SENSITIVE_RUN_TESTS :=
+TEST_ART_BROKEN_TRACE_RUN_TESTS :=
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
 
 ########################################################################
 
@@ -121,38 +169,138 @@
 ART_TEST_TARGET_RUN_TEST_DEFAULT_RULES :=
 ART_TEST_TARGET_RUN_TEST_INTERPRETER_RULES :=
 ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RULES :=
+ART_TEST_TARGET_RUN_TEST_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_PREBUILD_RULES :=
 ART_TEST_TARGET_RUN_TEST_ALL$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_DEFAULT$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_DEFAULT$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_ALL_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING_RULES :=
+ART_TEST_HOST_RUN_TEST_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_PREBUILD_RULES :=
 ART_TEST_HOST_RUN_TEST_ALL$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 
 # We need dex2oat and dalvikvm on the target as well as the core image.
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUT) $(2ND_TARGET_CORE_IMG_OUT)
 
+# Also need libarttest.
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
+ifdef TARGET_2ND_ARCH
+TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libarttest.so
+endif
+
 # All tests require the host executables and the core images.
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
   $(ART_HOST_EXECUTABLES) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(HOST_CORE_IMG_OUT)
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_HOST_CORE_IMG_OUT)
 endif
@@ -163,12 +311,15 @@
 # $(2): host or target
 # $(3): default, optimizing or interpreter
 # $(4): 32 or 64
-# $(5): run tests with tracing enabled or not: trace or undefined
+# $(5): run tests with tracing or GC verification enabled or not: trace, gcverify or undefined
+# $(6): relocate, norelocate, no-prebuild or undefined.
 define define-test-art-run-test
   run_test_options := $(addprefix --runtime-option ,$(DALVIKVM_FLAGS))
-  run_test_rule_name := test-art-$(2)-run-test-$(3)-$(1)$(4)
+  run_test_rule_name :=
   uc_host_or_target :=
   prereq_rule :=
+  skip_test := false
+  uc_reloc_type :=
   ifeq ($(2),host)
     uc_host_or_target := HOST
     run_test_options += --host
@@ -181,10 +332,39 @@
       $$(error found $(2) expected host or target)
     endif
   endif
+  ifeq ($(6),relocate)
+    uc_reloc_type := RELOCATE
+    run_test_options += --relocate --no-prebuild
+    ifneq ($(ART_TEST_RUN_TEST_RELOCATE),true)
+      skip_test := true
+    endif
+  else
+    ifeq ($(6),no-prebuild)
+      uc_reloc_type := NO_PREBUILD
+      run_test_options += --no-relocate --no-prebuild
+      ifneq ($(ART_TEST_RUN_TEST_NO_PREBUILD),true)
+        skip_test := true
+      endif
+    else
+      ifeq ($(6),norelocate)
+        uc_reloc_type := NORELOCATE
+        run_test_options += --no-relocate --prebuild
+        ifneq ($(ART_TEST_RUN_TEST_NO_RELOCATE),true)
+          skip_test := true
+        endif
+      else
+        uc_reloc_type := PREBUILD
+        run_test_options += --relocate --prebuild
+      endif
+    endif
+  endif
   uc_compiler :=
   ifeq ($(3),optimizing)
     uc_compiler := OPTIMIZING
     run_test_options += -Xcompiler-option --compiler-backend=Optimizing
+    ifneq ($$(ART_TEST_OPTIMIZING),true)
+      skip_test := true
+    endif
   else
     ifeq ($(3),interpreter)
       uc_compiler := INTERPRETER
@@ -206,14 +386,37 @@
   endif
   ifeq ($(5),trace)
     run_test_options += --trace
-    run_test_rule_name := test-art-$(2)-run-test-trace-$(3)-$(1)$(4)
+    run_test_rule_name := test-art-$(2)-run-test-trace-$(3)-$(6)-$(1)$(4)
+    ifneq ($$(ART_TEST_TRACE),true)
+      skip_test := true
+    endif
   else
-    ifneq (,$(5))
-      $$(error found $(5) expected undefined or -trace)
+    ifeq ($(5),gcverify)
+      run_test_options += --runtime-option -Xgc:preverify --runtime-option -Xgc:postverify \
+        --runtime-option -Xgc:preverify_rosalloc --runtime-option -Xgc:postverify_rosalloc
+      run_test_rule_name := test-art-$(2)-run-test-gcverify-$(3)-$(6)-$(1)$(4)
+      ifneq ($$(ART_TEST_GC_VERIFY),true)
+        skip_test := true
+      endif
+    else
+      ifeq ($(5),gcstress)
+        run_test_options += --runtime-option -Xgc:SS --runtime-option -Xms2m \
+          --runtime-option -Xmx2m --runtime-option -Xgc:preverify --runtime-option -Xgc:postverify
+        run_test_rule_name := test-art-$(2)-run-test-gcstress-$(3)-$(6)-$(1)$(4)
+        ifneq ($$(ART_TEST_GC_STRESS),true)
+          skip_test := true
+        endif
+      else
+        ifneq (,$(5))
+          $$(error found $(5) expected undefined or gcverify, gcstress or trace)
+        endif
+        run_test_rule_name := test-art-$(2)-run-test-$(3)-$(6)-$(1)$(4)
+      endif
     endif
   endif
-  run_test_options := --output-path $(ART_HOST_TEST_DIR)/run-test-output/$$(run_test_rule_name) \
-    $$(run_test_options)
+  ifeq ($$(skip_test),false)
+    run_test_options := --output-path $(ART_HOST_TEST_DIR)/run-test-output/$$(run_test_rule_name) \
+      $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
 .PHONY: $$(run_test_rule_name)
 $$(run_test_rule_name): $(DX) $(HOST_OUT_EXECUTABLES)/jasmin $$(prereq_rule)
@@ -224,20 +427,28 @@
 	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
 	  echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \
 	  rm -r $(ART_HOST_TEST_DIR)) || true
+  else
+    .PHONY: $$(run_test_rule_name)
+$$(run_test_rule_name):
+  endif
 
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)$(4)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_$(1)_RULES += $$(run_test_rule_name)
-  ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_RULES += $$(run_test_rule_name)
+  ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_$$(uc_reloc_type)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$(1)_RULES += $$(run_test_rule_name)
+  ART_TEST_$$(uc_host_or_target)_RUN_TEST_$(1)$(4)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_ALL_RULES += $$(run_test_rule_name)
+  ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_reloc_type)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_ALL$(4)_RULES += $$(run_test_rule_name)
 
   # Clear locally defined variables.
+  skip_test :=
   run_test_options :=
   run_test_rule_name :=
   uc_host_or_target :=
   prereq_rule :=
+  uc_reloc_type :=
   uc_compiler :=
 endef  # define-test-art-run-test
 
@@ -254,7 +465,8 @@
 # Create rules for a group of run tests.
 # $(1): test name, e.g. 003-omnibus-opcodes
 # $(2): host or target
-define define-test-art-run-test-group
+# $(3): relocate, norelocate or no-prebuild, or prebuild.
+define define-test-art-run-test-group-type
   group_uc_host_or_target :=
   ifeq ($(2),host)
     group_uc_host_or_target := HOST
@@ -266,17 +478,18 @@
     endif
   endif
 
-  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_DEFAULT_$(1)_RULES :=
-  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_INTERPRETER_$(1)_RULES :=
-  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_OPTIMIZING_$(1)_RULES :=
-  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)_RULES :=
-  $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
-  $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
-  $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
-  ifeq ($(2),host)
-    # For now just test tracing on the host with default.
-    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace))
-  endif
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcverify,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcverify,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcverify,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcstress,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcstress,$(3)))
+  $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcstress,$(3)))
   do_second := false
   ifeq ($(2),host)
     ifneq ($$(HOST_PREFER_32_BIT),true)
@@ -288,15 +501,57 @@
     endif
   endif
   ifeq (true,$$(do_second))
-    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
-    $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
-    $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),))
-    ifeq ($(2),host)
-      # For now just test tracing on the host with default.
-      $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),trace,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcverify,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcverify,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcverify,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),default,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcstress,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),interpreter,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcstress,$(3)))
+    $$(eval $$(call define-test-art-run-test,$(1),$(2),optimizing,$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX),gcstress,$(3)))
+  endif
+endef  # define-test-art-run-test-group-type
+
+# Create rules for a group of run tests.
+# $(1): test name, e.g. 003-omnibus-opcodes
+# $(2): host or target
+define define-test-art-run-test-group
+  group_uc_host_or_target :=
+  ifeq ($(2),host)
+    group_uc_host_or_target := HOST
+  else
+    ifeq ($(2),target)
+      group_uc_host_or_target := TARGET
+    else
+      $$(error found $(2) expected host or target)
     endif
   endif
-
+  do_second := false
+  ifeq ($(2),host)
+    ifneq ($$(HOST_PREFER_32_BIT),true)
+      do_second := true
+    endif
+  else
+    ifdef TARGET_2ND_ARCH
+      do_second := true
+    endif
+  endif
+  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_DEFAULT_$(1)_RULES :=
+  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_INTERPRETER_$(1)_RULES :=
+  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_OPTIMIZING_$(1)_RULES :=
+  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)_RULES :=
+  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)_RULES :=
+  ifeq ($$(do_second),true)
+    ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)_RULES :=
+  endif
+  $$(eval $$(call define-test-art-run-test-group-type,$(1),$(2),prebuild))
+  $$(eval $$(call define-test-art-run-test-group-type,$(1),$(2),norelocate))
+  $$(eval $$(call define-test-art-run-test-group-type,$(1),$(2),relocate))
+  $$(eval $$(call define-test-art-run-test-group-type,$(1),$(2),no-prebuild))
   $$(eval $$(call define-test-art-run-test-group-rule,test-art-$(2)-run-test-default-$(1), \
     $$(ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_DEFAULT_$(1)_RULES)))
   $$(eval $$(call define-test-art-run-test-group-rule,test-art-$(2)-run-test-interpreter-$(1), \
@@ -305,12 +560,22 @@
     $$(ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_OPTIMIZING_$(1)_RULES)))
   $$(eval $$(call define-test-art-run-test-group-rule,test-art-$(2)-run-test-$(1), \
     $$(ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)_RULES)))
+  $$(eval $$(call define-test-art-run-test-group-rule,test-art-$(2)-run-test-$(1)$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX), \
+    $$(ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)_RULES)))
+  ifeq ($$(do_second),true)
+    $$(eval $$(call define-test-art-run-test-group-rule,test-art-$(2)-run-test-$(1)$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX), \
+      $$(ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)_RULES)))
+  endif
 
   # Clear locally defined variables.
   ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_DEFAULT_$(1)_RULES :=
   ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_INTERPRETER_$(1)_RULES :=
   ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_OPTIMIZING_$(1)_RULES :=
   ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)_RULES :=
+  ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)$$(ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)_RULES :=
+  ifeq ($$(do_second),true)
+    ART_TEST_$$(group_uc_host_or_target)_RUN_TEST_$(1)$$(2ND_ART_PHONY_TEST_$$(group_uc_host_or_target)_SUFFIX)_RULES :=
+  endif
   group_uc_host_or_target :=
   do_second :=
 endef  # define-test-art-run-test-group
@@ -318,6 +583,14 @@
 $(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-test-art-run-test-group,$(test),target)))
 $(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-test-art-run-test-group,$(test),host)))
 
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-no-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-norelocate, \
+  $(ART_TEST_TARGET_RUN_TEST_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-relocate, \
+  $(ART_TEST_TARGET_RUN_TEST_RELOCATE_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test, \
   $(ART_TEST_TARGET_RUN_TEST_ALL_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default, \
@@ -326,6 +599,30 @@
   $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing, \
   $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-no-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-no-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-no-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-prebuild, \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-norelocate, \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-norelocate, \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-norelocate, \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-relocate, \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-relocate, \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-relocate, \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test$(ART_PHONY_TEST_TARGET_SUFFIX), \
   $(ART_TEST_TARGET_RUN_TEST_ALL$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default$(ART_PHONY_TEST_TARGET_SUFFIX), \
@@ -334,6 +631,38 @@
   $(ART_TEST_TARGET_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing$(ART_PHONY_TEST_TARGET_SUFFIX), \
   $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-no-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-norelocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-relocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-no-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-no-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-no-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-prebuild$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-norelocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-norelocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-norelocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-relocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-relocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-relocate$(ART_PHONY_TEST_TARGET_SUFFIX), \
+  $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
 ifdef TARGET_2ND_ARCH
   $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
     $(ART_TEST_TARGET_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
@@ -343,8 +672,48 @@
     $(ART_TEST_TARGET_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
   $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
     $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-no-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-norelocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-relocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-no-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-no-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-no-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-prebuild$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-norelocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-norelocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-norelocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-default-relocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-interpreter-relocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-target-run-test-optimizing-relocate$(2ND_ART_PHONY_TEST_TARGET_SUFFIX), \
+    $(ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES)))
 endif
 
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-no-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-norelocate, \
+  $(ART_TEST_HOST_RUN_TEST_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-relocate, \
+  $(ART_TEST_HOST_RUN_TEST_RELOCATE_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test, \
   $(ART_TEST_HOST_RUN_TEST_ALL_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default, \
@@ -353,6 +722,30 @@
   $(ART_TEST_HOST_RUN_TEST_INTERPRETER_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing, \
   $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-no-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-no-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-no-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_NO_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-prebuild, \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_PREBUILD_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-norelocate, \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-norelocate, \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-norelocate, \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-relocate, \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-relocate, \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-relocate, \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test$(ART_PHONY_TEST_HOST_SUFFIX), \
   $(ART_TEST_HOST_RUN_TEST_ALL$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default$(ART_PHONY_TEST_HOST_SUFFIX), \
@@ -361,6 +754,38 @@
   $(ART_TEST_HOST_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
 $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing$(ART_PHONY_TEST_HOST_SUFFIX), \
   $(ART_TEST_HOST_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-no-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-norelocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-relocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-no-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-no-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-no-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-prebuild$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-norelocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-norelocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-norelocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-relocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-relocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+$(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-relocate$(ART_PHONY_TEST_HOST_SUFFIX), \
+  $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
 ifneq ($(HOST_PREFER_32_BIT),true)
   $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
     $(ART_TEST_HOST_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
@@ -370,33 +795,160 @@
     $(ART_TEST_HOST_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
   $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
     $(ART_TEST_HOST_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-no-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-norelocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-relocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-no-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-no-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-no-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-prebuild$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-norelocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-norelocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-norelocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-default-relocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-interpreter-relocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
+  $(eval $(call define-test-art-run-test-group-rule,test-art-host-run-test-optimizing-relocate$(2ND_ART_PHONY_TEST_HOST_SUFFIX), \
+    $(ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES)))
 endif
 
+# include libarttest build rules.
+include $(LOCAL_PATH)/Android.libarttest.mk
+
 define-test-art-run-test :=
 define-test-art-run-test-group-rule :=
 define-test-art-run-test-group :=
-all-run-test-names :=
+TEST_ART_RUN_TESTS :=
 ART_TEST_TARGET_RUN_TEST_ALL_RULES :=
 ART_TEST_TARGET_RUN_TEST_DEFAULT_RULES :=
 ART_TEST_TARGET_RUN_TEST_INTERPRETER_RULES :=
 ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RULES :=
+ART_TEST_TARGET_RUN_TEST_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE_RULES :=
+ART_TEST_TARGET_RUN_TEST_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NO_PREBUILD_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_PREBUILD_RULES :=
 ART_TEST_TARGET_RUN_TEST_ALL$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_DEFAULT$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_DEFAULT$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_RELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NORELOCATE$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_DEFAULT_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_INTERPRETER_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_NO_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_RUN_TEST_OPTIMIZING_PREBUILD$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_ALL_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING_RULES :=
+ART_TEST_HOST_RUN_TEST_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE_RULES :=
+ART_TEST_HOST_RUN_TEST_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NO_PREBUILD_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_PREBUILD_RULES :=
 ART_TEST_HOST_RUN_TEST_ALL$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD$(ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_ALL$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_DEFAULT$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_INTERPRETER$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
 ART_TEST_HOST_RUN_TEST_OPTIMIZING$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_RELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NORELOCATE$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_DEFAULT_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_INTERPRETER_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_NO_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
+ART_TEST_HOST_RUN_TEST_OPTIMIZING_PREBUILD$(2ND_ART_PHONY_TEST_HOST_SUFFIX)_RULES :=
diff --git a/test/etc/default-check b/test/etc/default-check
new file mode 100755
index 0000000..46a095c
--- /dev/null
+++ b/test/etc/default-check
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+diff --strip-trailing-cr -q "$1" "$2" >/dev/null
\ No newline at end of file
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
index 4265f1c..d72e997 100755
--- a/test/etc/host-run-test-jar
+++ b/test/etc/host-run-test-jar
@@ -10,20 +10,28 @@
 }
 
 DEBUGGER="n"
+PREBUILD="n"
 GDB="n"
+ISA="x86"
 INTERPRETER="n"
 VERIFY="y"
+RELOCATE="y"
 OPTIMIZE="y"
 INVOKE_WITH=""
 DEV_MODE="n"
 QUIET="n"
 FLAGS=""
+COMPILER_FLAGS=""
+BUILD_BOOT_OPT=""
 exe="${ANDROID_HOST_OUT}/bin/dalvikvm32"
 
 while true; do
     if [ "x$1" = "x--quiet" ]; then
         QUIET="y"
         shift
+    elif [ "x$1" = "x--prebuild" ]; then
+        PREBUILD="y"
+        shift
     elif [ "x$1" = "x--lib" ]; then
         shift
         if [ "x$1" = "x" ]; then
@@ -37,7 +45,9 @@
         shift
     elif [ "x$1" = "x--boot" ]; then
         shift
-        BOOT_OPT="$1"
+        option="$1"
+        BOOT_OPT="$option"
+        BUILD_BOOT_OPT="--boot-image=${option#-Ximage:}"
         shift
     elif [ "x$1" = "x--debug" ]; then
         DEBUGGER="y"
@@ -65,6 +75,7 @@
         INTERPRETER="y"
         shift
     elif [ "x$1" = "x--64" ]; then
+        ISA="x64"
         exe="${ANDROID_HOST_OUT}/bin/dalvikvm64"
         shift
     elif [ "x$1" = "x--no-verify" ]; then
@@ -73,10 +84,17 @@
     elif [ "x$1" = "x--no-optimize" ]; then
         OPTIMIZE="n"
         shift
+    elif [ "x$1" = "x--no-relocate" ]; then
+        RELOCATE="n"
+        shift
+    elif [ "x$1" = "x--relocate" ]; then
+        RELOCATE="y"
+        shift
     elif [ "x$1" = "x-Xcompiler-option" ]; then
         shift
         option="$1"
         FLAGS="${FLAGS} -Xcompiler-option $option"
+        COMPILER_FLAGS="${COMPILER_FLAGS} $option"
         shift
     elif [ "x$1" = "x--runtime-option" ]; then
         shift
@@ -129,13 +147,40 @@
 
 if [ "$INTERPRETER" = "y" ]; then
     INT_OPTS="-Xint"
+    COMPILER_FLAGS="${COMPILER_FLAGS} --compiler-filter=interpret-only"
+fi
+
+if [ "$RELOCATE" = "y" ]; then
+  FLAGS="${FLAGS} -Xrelocate"
+  COMPILER_FLAGS="${COMPILER_FLAGS} --runtime-arg -Xnorelocate --include-patch-information"
+  # Run test sets a fairly draconian ulimit that we will likely blow right over
+  # since we are relocating. Get the total size of the /system/framework directory
+  # in 512 byte blocks and set it as the ulimit. This should be more than enough
+  # room.
+  ulimit -S $(du -c -B512 ${ANDROID_ROOT}/framework | tail -1 | cut -f1) || exit 1
+else
+  FLAGS="${FLAGS} -Xnorelocate"
+  COMPILER_FLAGS="${COMPILER_FLAGS} --runtime-arg -Xnorelocate --no-include-patch-information"
+fi
+
+mkdir_cmd="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
+if [ "$PREBUILD" = "y" ]; then
+  prebuild_cmd="${ANDROID_HOST_OUT}/bin/dex2oatd $COMPILER_FLAGS --instruction-set=$ISA $BUILD_BOOT_OPT --dex-file=$DEX_LOCATION/$TEST_NAME.jar --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g")"
+else
+  prebuild_cmd="true"
 fi
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
-
+cmdline="$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $FLAGS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
-  echo $cmdline "$@"
+  if [ "$PREBUILD" = "y" ]; then
+    echo "$mkdir_cmd && $prebuild_cmd && $cmdline"
+  elif [ "$RELOCATE" = "y" ]; then
+    echo "$mkdir_cmd && $cmdline"
+  else
+    echo $cmdline
+  fi
 fi
 
 cd $ANDROID_BUILD_TOP
-$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $FLAGS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main "$@"
+$mkdir_cmd && $prebuild_cmd && LD_PRELOAD=libsigchain.so $cmdline "$@"
diff --git a/test/etc/push-and-run-prebuilt-test-jar b/test/etc/push-and-run-prebuilt-test-jar
new file mode 100755
index 0000000..2b9604b
--- /dev/null
+++ b/test/etc/push-and-run-prebuilt-test-jar
@@ -0,0 +1,211 @@
+#!/bin/sh
+#
+# Run the code in test.jar on the device. The jar should contain a top-level
+# class named Main to run.
+
+msg() {
+    if [ "$QUIET" = "n" ]; then
+        echo "$@"
+    fi
+}
+
+ARCHITECTURES_32="(arm|x86|mips|none)"
+ARCHITECTURES_64="(arm64|x86_64|none)"
+ARCHITECTURES_PATTERN="${ARCHITECTURES_32}"
+RELOCATE="y"
+GDB="n"
+DEBUGGER="n"
+INTERPRETER="n"
+VERIFY="y"
+OPTIMIZE="y"
+ZYGOTE=""
+QUIET="n"
+DEV_MODE="n"
+INVOKE_WITH=""
+FLAGS=""
+TARGET_SUFFIX=""
+COMPILE_FLAGS=""
+
+while true; do
+    if [ "x$1" = "x--quiet" ]; then
+        QUIET="y"
+        shift
+    elif [ "x$1" = "x--lib" ]; then
+        shift
+        if [ "x$1" = "x" ]; then
+            echo "$0 missing argument to --lib" 1>&2
+            exit 1
+        fi
+        LIB="$1"
+        shift
+    elif [ "x$1" = "x-Xcompiler-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} -Xcompiler-option $option"
+        COMPILE_FLAGS="${COMPILE_FLAGS} $option"
+        shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} $option"
+        shift
+    elif [ "x$1" = "x--boot" ]; then
+        shift
+        BOOT_OPT="$1"
+        BUILD_BOOT_OPT="--boot-image=${1#-Ximage:}"
+        shift
+    elif [ "x$1" = "x--relocate" ]; then
+        RELOCATE="y"
+        shift
+    elif [ "x$1" = "x--no-relocate" ]; then
+        RELOCATE="n"
+        shift
+    elif [ "x$1" = "x--debug" ]; then
+        DEBUGGER="y"
+        shift
+    elif [ "x$1" = "x--gdb" ]; then
+        GDB="y"
+        DEV_MODE="y"
+        shift
+    elif [ "x$1" = "x--zygote" ]; then
+        ZYGOTE="--zygote"
+        msg "Spawning from zygote"
+        shift
+    elif [ "x$1" = "x--dev" ]; then
+        DEV_MODE="y"
+        shift
+    elif [ "x$1" = "x--interpreter" ]; then
+        INTERPRETER="y"
+        shift
+    elif [ "x$1" = "x--invoke-with" ]; then
+        shift
+        if [ "x$1" = "x" ]; then
+            echo "$0 missing argument to --invoke-with" 1>&2
+            exit 1
+        fi
+        if [ "x$INVOKE_WITH" = "x" ]; then
+            INVOKE_WITH="$1"
+        else
+            INVOKE_WITH="$INVOKE_WITH $1"
+        fi
+        shift
+    elif [ "x$1" = "x--no-verify" ]; then
+        VERIFY="n"
+        shift
+    elif [ "x$1" = "x--no-optimize" ]; then
+        OPTIMIZE="n"
+        shift
+    elif [ "x$1" = "x--" ]; then
+        shift
+        break
+    elif [ "x$1" = "x--64" ]; then
+        TARGET_SUFFIX="64"
+        ARCHITECTURES_PATTERN="${ARCHITECTURES_64}"
+        shift
+    elif expr "x$1" : "x--" >/dev/null 2>&1; then
+        echo "unknown $0 option: $1" 1>&2
+        exit 1
+    else
+        break
+    fi
+done
+
+if [ "$ZYGOTE" = "" ]; then
+    if [ "$OPTIMIZE" = "y" ]; then
+        if [ "$VERIFY" = "y" ]; then
+            DEX_OPTIMIZE="-Xdexopt:verified"
+        else
+            DEX_OPTIMIZE="-Xdexopt:all"
+        fi
+        msg "Performing optimizations"
+    else
+        DEX_OPTIMIZE="-Xdexopt:none"
+        msg "Skipping optimizations"
+    fi
+
+    if [ "$VERIFY" = "y" ]; then
+        DEX_VERIFY=""
+        msg "Performing verification"
+    else
+        DEX_VERIFY="-Xverify:none"
+        msg "Skipping verification"
+    fi
+fi
+
+msg "------------------------------"
+
+ARCH=$(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
+if [ x"$ARCH" = "x" ]; then
+  echo "Unable to determine architecture"
+  exit 1
+fi
+
+if [ "$QUIET" = "n" ]; then
+  adb shell rm -r $DEX_LOCATION
+  adb shell mkdir -p $DEX_LOCATION
+  adb push $TEST_NAME.jar $DEX_LOCATION
+  adb push $TEST_NAME-ex.jar $DEX_LOCATION
+else
+  adb shell rm -r $DEX_LOCATION >/dev/null 2>&1
+  adb shell mkdir -p $DEX_LOCATION >/dev/null 2>&1
+  adb push $TEST_NAME.jar $DEX_LOCATION >/dev/null 2>&1
+  adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
+fi
+
+if [ "$DEBUGGER" = "y" ]; then
+  # Use this instead for ddms and connect by running 'ddms':
+  # DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_android_adb,server=y,suspend=y"
+  # TODO: add a separate --ddms option?
+
+  PORT=12345
+  msg "Waiting for jdb to connect:"
+  msg "    adb forward tcp:$PORT tcp:$PORT"
+  msg "    jdb -attach localhost:$PORT"
+  DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_socket,address=$PORT,server=y,suspend=y"
+fi
+
+if [ "$GDB" = "y" ]; then
+    gdb="gdbserver$TARGET_SUFFIX :5039"
+    gdbargs="$exe"
+fi
+
+if [ "$INTERPRETER" = "y" ]; then
+    INT_OPTS="-Xint"
+    COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
+fi
+
+JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
+
+if [ "$RELOCATE" = "y" ]; then
+    RELOCATE_OPT="-Xrelocate"
+    BUILD_RELOCATE_OPT="--runtime-arg -Xnorelocate"
+    COMPILE_FLAGS="${COMPILE_FLAGS} --include-patch-information"
+    FLAGS="${FLAGS} -Xcompiler-option --include-patch-information"
+else
+    RELOCATE_OPT="-Xnorelocate"
+    BUILD_RELOCATE_OPT="--runtime-arg -Xnorelocate"
+fi
+
+# This is due to the fact this cmdline can get longer than the longest allowed
+# adb command and there is no way to get the exit status from a adb shell
+# command.
+cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
+    mkdir -p $DEX_LOCATION/dalvik-cache/$ARCH/ && \
+    $INVOKE_WITH /system/bin/dex2oatd $COMPILE_FLAGS $BUILD_BOOT_OPT $BUILD_RELOCATE_OPT  --runtime-arg -classpath --runtime-arg $DEX_LOCATION/$TEST_NAME.jar --dex-file=$DEX_LOCATION/$TEST_NAME.jar --oat-file=$DEX_LOCATION/dalvik-cache/$ARCH/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") --instruction-set=$ARCH && \
+    $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $RELOCATE_OPT $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main $@"
+cmdfile=$(tempfile -p "cmd-" -s "-$TEST_NAME")
+echo "$cmdline" > $cmdfile
+
+if [ "$DEV_MODE" = "y" ]; then
+  echo $cmdline
+fi
+
+if [ "$QUIET" = "n" ]; then
+  adb push $cmdfile $DEX_LOCATION/cmdline.sh
+else
+  adb push $cmdfile $DEX_LOCATION/cmdline.sh > /dev/null 2>&1
+fi
+
+adb shell sh $DEX_LOCATION/cmdline.sh
+
+rm -f $cmdfile
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index b090c33..776a011 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -9,6 +9,7 @@
     fi
 }
 
+RELOCATE="y"
 GDB="n"
 DEBUGGER="n"
 INTERPRETER="n"
@@ -61,6 +62,12 @@
     elif [ "x$1" = "x--dev" ]; then
         DEV_MODE="y"
         shift
+    elif [ "x$1" = "x--relocate" ]; then
+        RELOCATE="y"
+        shift
+    elif [ "x$1" = "x--no-relocate" ]; then
+        RELOCATE="n"
+        shift
     elif [ "x$1" = "x--interpreter" ]; then
         INTERPRETER="y"
         shift
@@ -155,8 +162,15 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
+if [ "$RELOCATE" = "y" ]; then
+  RELOCATE_OPT="-Xrelocate"
+  FLAGS="${FLAGS} -Xcompiler-option --include-patch-information"
+else
+  RELOCATE_OPT="-Xnorelocate"
+fi
+
 cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
-    $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
+    $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $RELOCATE_OPT $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"
 fi
diff --git a/test/run-all-tests b/test/run-all-tests
index 25d5c5f..02f46f9 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -83,6 +83,18 @@
     elif [ "x$1" = "x--trace" ]; then
         run_args="${run_args} --trace"
         shift
+    elif [ "x$1" = "x--relocate" ]; then
+        run_args="${run_args} --relocate"
+        shift
+    elif [ "x$1" = "x--no-relocate" ]; then
+        run_args="${run_args} --no-relocate"
+        shift
+    elif [ "x$1" = "x--no-prebuild" ]; then
+        run_args="${run_args} --no-prebuild"
+        shift;
+    elif [ "x$1" = "x--prebuild" ]; then
+        run_args="${run_args} --prebuild"
+        shift;
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
@@ -101,7 +113,8 @@
         echo "  Options are all passed to run-test; refer to that for " \
              "further documentation:"
         echo "    --debug --dev --host --interpreter --jvm --no-optimize"
-        echo "    --no-verify -O --update --valgrind --zygote --64"
+        echo "    --no-verify -O --update --valgrind --zygote --64 --relocate"
+        echo "    --prebuild"
         echo "  Specific Runtime Options:"
         echo "    --seq                Run tests one-by-one, avoiding failures caused by busy CPU"
     ) 1>&2
diff --git a/test/run-test b/test/run-test
index 2989f25..0e42efe 100755
--- a/test/run-test
+++ b/test/run-test
@@ -56,29 +56,32 @@
 build="build"
 run="run"
 expected="expected.txt"
+check_cmd="check"
 output="output.txt"
 build_output="build-output.txt"
 lib="libartd.so"
 run_args="--quiet"
 
+prebuild_mode="yes"
 target_mode="yes"
 dev_mode="no"
 update_mode="no"
+debug_mode="no"
+relocate="yes"
 runtime="art"
 usage="no"
 build_only="no"
 suffix64=""
+trace="false"
 
 while true; do
     if [ "x$1" = "x--host" ]; then
         target_mode="no"
-        RUN="${progdir}/etc/host-run-test-jar"
         DEX_LOCATION=$tmp_dir
         shift
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
         runtime="jvm"
-        RUN="${progdir}/etc/reference-run-test-classes"
         NEED_DEX="false"
         shift
     elif [ "x$1" = "x-O" ]; then
@@ -88,6 +91,18 @@
         lib="libdvm.so"
         runtime="dalvik"
         shift
+    elif [ "x$1" = "x--relocate" ]; then
+        relocate="yes"
+        shift
+    elif [ "x$1" = "x--no-relocate" ]; then
+        relocate="no"
+        shift
+    elif [ "x$1" = "x--prebuild" ]; then
+        prebuild_mode="yes"
+        shift;
+    elif [ "x$1" = "x--no-prebuild" ]; then
+        prebuild_mode="no"
+        shift;
     elif [ "x$1" = "x--image" ]; then
         shift
         image="$1"
@@ -162,7 +177,7 @@
         suffix64="64"
         shift
     elif [ "x$1" = "x--trace" ]; then
-        run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin"
+        trace="true"
         shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
@@ -174,13 +189,48 @@
 done
 mkdir -p $tmp_dir
 
+if [ "$trace" = "true" ]; then
+    run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin --runtime-option -Xmethod-trace-file-size:2000000"
+fi
+
+# Most interesting target architecture variables are Makefile variables, not environment variables.
+# Try to map the suffix64 flag and what we find in ${ANDROID_PRODUCT_OUT}/data/art-test to an architecture name.
+function guess_arch_name() {
+    grep32bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm|x86|mips)$'`
+    grep64bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm64|x86_64)$'`
+    if [ "x${suffix64}" = "x64" ]; then
+        target_arch_name=${grep64bit}
+    else
+        target_arch_name=${grep32bit}
+    fi
+}
+
+if [ "$target_mode" = "no" ]; then
+    if [ "$runtime" = "jvm" ]; then
+        RUN="${progdir}/etc/reference-run-test-classes"
+        if [ "$prebuild_mode" = "yes" ]; then
+            echo "--prebuild with --jvm is unsupported";
+            exit 1;
+        fi
+    else
+        RUN="${progdir}/etc/host-run-test-jar"
+        if [ "$prebuild_mode" = "yes" ]; then
+            run_args="${run_args} --prebuild"
+        fi
+    fi
+else
+    if [ "$prebuild_mode" = "yes" ]; then
+        RUN="${progdir}/etc/push-and-run-prebuilt-test-jar"
+    fi
+fi
+
 if [ ! "$runtime" = "jvm" ]; then
   run_args="${run_args} --lib $lib"
 fi
 
 if [ "$runtime" = "dalvik" ]; then
     if [ "$target_mode" = "no" ]; then
-        framework="${OUT}/system/framework"
+        framework="${ANDROID_PRODUCT_OUT}/system/framework"
         bpath="${framework}/core.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/core-junit.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
         run_args="${run_args} --boot -Xbootclasspath:${bpath}"
     else
@@ -196,9 +246,17 @@
 	    export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86
         fi
         run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
+        run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
     else
+        guess_arch_name
+        run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}"
         run_args="${run_args} --boot -Ximage:/data/art-test/core.art"
     fi
+    if [ "$relocate" = "yes" ]; then
+      run_args="${run_args} --relocate"
+    else
+      run_args="${run_args} --no-relocate"
+    fi
 fi
 
 if [ "$dev_mode" = "yes" -a "$update_mode" = "yes" ]; then
@@ -241,7 +299,7 @@
         echo "  Runtime Options:"
         echo "    -O                   Run non-debug rather than debug build (off by default)."
         echo "    -Xcompiler-option    Pass an option to the compiler."
-        echo "    -runtime-option      Pass an option to the runtime."
+        echo "    --runtime-option     Pass an option to the runtime."
         echo "    --debug              Wait for a debugger to attach."
         echo "    --gdb                Run under gdb; incompatible with some tests."
         echo "    --build-only         Build test files only (off by default)."
@@ -252,6 +310,13 @@
         echo "    --zygote             Spawn the process from the Zygote." \
              "If used, then the"
         echo "                         other runtime options are ignored."
+        echo "    --prebuild           Run dex2oat on the files before starting test. (default)"
+        echo "    --no-prebuild        Do not run dex2oat on the files before starting"
+        echo "                         the test."
+        echo "    --relocate           Force the use of relocating in the test, making"
+        echo "                         the image and oat files be relocated to a random"
+        echo "                         address before running. (default)"
+        echo "    --no-relocate        Force the use of no relocating in the test"
         echo "    --host               Use the host-mode virtual machine."
         echo "    --invoke-with        Pass --invoke-with option to runtime."
         echo "    --dalvik             Use Dalvik (off by default)."
@@ -296,8 +361,13 @@
     cp "${progdir}/etc/default-run" run
 fi
 
+if [ '!' -r "$check_cmd" ]; then
+    cp "${progdir}/etc/default-check" check
+fi
+
 chmod 755 "$build"
 chmod 755 "$run"
+chmod 755 "$check_cmd"
 
 export TEST_NAME=`basename ${test_dir}`
 
@@ -308,7 +378,7 @@
 elif echo "$test_dir" | grep 083; then
   file_size_limit=5120
 fi
-if ! ulimit "$file_size_limit"; then
+if ! ulimit -S "$file_size_limit"; then
    echo "ulimit file size setting failed"
 fi
 
@@ -364,7 +434,7 @@
         cp "$build_output" "$output"
         echo "build exit status: $build_exit" >>"$output"
     fi
-    diff --strip-trailing-cr -q "$expected" "$output" >/dev/null
+    ./$check_cmd "$expected" "$output"
     if [ "$?" = "0" ]; then
         # output == expected
         good="yes"