Merge "Fixed and refactored profiler options handling"
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 83c536f..aa167b2 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -119,18 +119,29 @@
 endif
 
 # Clang build support.
-# Target builds use GCC by default.
-ART_TARGET_CLANG := false
+
+# Host.
 ART_HOST_CLANG := false
 ifneq ($(WITHOUT_HOST_CLANG),true)
   # By default, host builds use clang for better warnings.
   ART_HOST_CLANG := true
 endif
 
-# enable ART_TARGET_CLANG for ARM64
-ifneq (,$(filter $(TARGET_ARCH),arm64))
-ART_TARGET_CLANG := true
-endif
+# Clang on the target: only enabled for ARM64. Target builds use GCC by default.
+ART_TARGET_CLANG :=
+ART_TARGET_CLANG_arm :=
+ART_TARGET_CLANG_arm64 := true
+ART_TARGET_CLANG_mips :=
+ART_TARGET_CLANG_x86 :=
+ART_TARGET_CLANG_x86_64 :=
+
+define set-target-local-clang-vars
+    LOCAL_CLANG := $(ART_TARGET_CLANG)
+    $(foreach arch,$(ART_SUPPORTED_ARCH),
+    	ifneq ($$(ART_TARGET_CLANG_$(arch)),)
+        LOCAL_CLANG_$(arch) := $$(ART_TARGET_CLANG_$(arch))
+      endif)
+endef
 
 # directory used for dalvik-cache on device
 ART_DALVIK_CACHE_DIR := /data/dalvik-cache
@@ -190,13 +201,18 @@
 	-Wstrict-aliasing \
 	-fstrict-aliasing
 
+ART_TARGET_CLANG_CFLAGS :=
+ART_TARGET_CLANG_CFLAGS_arm :=
+ART_TARGET_CLANG_CFLAGS_arm64 :=
+ART_TARGET_CLANG_CFLAGS_mips :=
+ART_TARGET_CLANG_CFLAGS_x86 :=
+ART_TARGET_CLANG_CFLAGS_x86_64 :=
+
 # these are necessary for Clang ARM64 ART builds
-ifeq ($(ART_TARGET_CLANG), true)
-art_cflags += \
+ART_TARGET_CLANG_CFLAGS_arm64  += \
 	-Wno-implicit-exception-spec-mismatch \
 	-DNVALGRIND \
 	-Wno-unused-value
-endif
 
 ifeq ($(ART_SMALL_MODE),true)
   art_cflags += -DART_SMALL_MODE=1
@@ -206,20 +222,18 @@
   art_cflags += -DART_SEA_IR_MODE=1
 endif
 
-ifeq ($(HOST_OS),linux)
-  art_non_debug_cflags := \
-	-Wframe-larger-than=1728
-endif
-
 art_non_debug_cflags := \
 	-O3
 
-# FIXME: upstream LLVM has a vectorizer bug that needs to be fixed
-ifeq ($(ART_TARGET_CLANG),true)
-art_non_debug_cflags += \
-        -fno-vectorize
+ifeq ($(HOST_OS),linux)
+  art_non_debug_cflags += \
+	-Wframe-larger-than=1728
 endif
 
+# FIXME: upstream LLVM has a vectorizer bug that needs to be fixed
+ART_TARGET_CLANG_CFLAGS_arm64 += \
+	-fno-vectorize
+
 art_debug_cflags := \
 	-O1 \
 	-DDYNAMIC_ANNOTATIONS_ENABLED=1 \
@@ -296,6 +310,24 @@
 
 ART_TARGET_DEBUG_CFLAGS := $(art_debug_cflags)
 
+# $(1): ndebug_or_debug
+define set-target-local-cflags-vars
+    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+    LOCAL_CFLAGS_x86 += $(ART_TARGET_CFLAGS_x86)
+    art_target_cflags_ndebug_or_debug := $(1)
+    ifeq ($$(art_target_cflags_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
+    endif
+
+    # TODO: Also set when ART_TARGET_CLANG_$(arch)!=false and ART_TARGET_CLANG==true
+    $(foreach arch,$(ART_SUPPORTED_ARCH),
+    	ifeq ($$(ART_TARGET_CLANG_$(arch)),true)
+        LOCAL_CFLAGS_$(arch) += $$(ART_TARGET_CLANG_CFLAGS_$(arch))
+      endif)
+endef
+
 ART_BUILD_TARGET := false
 ART_BUILD_HOST := false
 ART_BUILD_NDEBUG := false
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index a186e85..49e7384 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -66,14 +66,8 @@
 
   LOCAL_CFLAGS := $(ART_EXECUTABLES_CFLAGS)
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
-    LOCAL_CFLAGS_x86 += $(ART_TARGET_CFLAGS_x86)
-    ifeq ($$(art_ndebug_or_debug),debug)
-      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
-    else
-      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
-    endif
+  	$(call set-target-local-clang-vars)
+  	$(call set-target-local-cflags-vars,$(6))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 9f1d0f1..b07e4f8 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -36,6 +36,7 @@
 	runtime/dex_instruction_visitor_test.cc \
 	runtime/dex_method_iterator_test.cc \
 	runtime/entrypoints/math_entrypoints_test.cc \
+	runtime/entrypoints_order_test.cc \
 	runtime/exception_test.cc \
 	runtime/gc/accounting/space_bitmap_test.cc \
 	runtime/gc/heap_test.cc \
@@ -187,16 +188,15 @@
   LOCAL_CFLAGS := $(ART_TEST_CFLAGS)
   include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
-    LOCAL_CFLAGS_x86 := $(ART_TARGET_CFLAGS_x86)
+  	$(call set-target-local-clang-vars)
+  	$(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
     LOCAL_STATIC_LIBRARIES += libgtest_libc++
     LOCAL_MODULE_PATH_32 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
     include $(BUILD_EXECUTABLE)
-    
+
     ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_ARCH)/$$(LOCAL_MODULE)
     art_gtest_target := test-art-$$(art_target_or_host)-gtest-$$(art_gtest_name)
 
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index c080928..b4c99b5 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -49,9 +49,8 @@
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/build/Android.libarttest.mk
   include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS := $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
-    LOCAL_CFLAGS_x86 := $(ART_TARGET_CFLAGS_x86)
+  	$(call set-target-local-clang-vars)
+  	$(call set-target-local-cflags-vars,debug)
     LOCAL_SHARED_LIBRARIES += libdl libcutils
     LOCAL_STATIC_LIBRARIES := libgtest
     LOCAL_MULTILIB := both
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 3bed01d..cfce9f7 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -95,6 +95,8 @@
 	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
 	utils/arm/assembler_arm.cc \
+	utils/arm/assembler_arm32.cc \
+	utils/arm/assembler_thumb2.cc \
 	utils/arm/managed_register_arm.cc \
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
@@ -177,8 +179,10 @@
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
+    LOCAL_SHARED_LIBRARIES += libart
   else # debug
     LOCAL_MODULE := libartd-compiler
+    LOCAL_SHARED_LIBRARIES += libartd
   endif
 
   LOCAL_MODULE_TAGS := optional
@@ -200,32 +204,21 @@
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
   include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+    ifeq ($$(art_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
   endif
 
   # TODO: clean up the compilers and remove this.
   LOCAL_CFLAGS += -Wno-unused-parameter
 
-  LOCAL_SHARED_LIBRARIES += liblog
-  ifeq ($$(art_ndebug_or_debug),debug)
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
-    endif
-    LOCAL_SHARED_LIBRARIES += libartd
-  else
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
-    endif
-    LOCAL_SHARED_LIBRARIES += libart
-  endif
   ifeq ($(ART_USE_PORTABLE_COMPILER),true)
     LOCAL_SHARED_LIBRARIES += libLLVM
     LOCAL_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 767ffbf..eb48cc3 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -25,6 +25,7 @@
   kInvalidRegClass,
   kCoreReg,
   kFPReg,
+  kRefReg,
   kAnyReg,
 };
 
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 7848b06..5b9c763 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -134,39 +134,133 @@
   }
 }
 
+// Enable opcodes that mostly work, but produce assertion errors (thus breaking libartd.so).
+#define ARM64_USE_EXPERIMENTAL_OPCODES 0
+
 // TODO: Remove this when we are able to compile everything.
 int arm64_support_list[] = {
     Instruction::NOP,
     Instruction::MOVE,
     Instruction::MOVE_FROM16,
     Instruction::MOVE_16,
+    Instruction::MOVE_EXCEPTION,
+    Instruction::RETURN_VOID,
+    Instruction::RETURN,
+    Instruction::RETURN_WIDE,
+    Instruction::CONST_4,
+    Instruction::CONST_16,
+    Instruction::CONST,
+    Instruction::CONST_STRING,
+    Instruction::MONITOR_ENTER,
+    Instruction::MONITOR_EXIT,
+    Instruction::THROW,
+    Instruction::GOTO,
+    Instruction::GOTO_16,
+    Instruction::GOTO_32,
+    Instruction::IF_EQ,
+    Instruction::IF_NE,
+    Instruction::IF_LT,
+    Instruction::IF_GE,
+    Instruction::IF_GT,
+    Instruction::IF_LE,
+    Instruction::IF_EQZ,
+    Instruction::IF_NEZ,
+    Instruction::IF_LTZ,
+    Instruction::IF_GEZ,
+    Instruction::IF_GTZ,
+    Instruction::IF_LEZ,
+    Instruction::NEG_INT,
+    Instruction::NOT_INT,
+    Instruction::NEG_FLOAT,
+    Instruction::INT_TO_BYTE,
+    Instruction::INT_TO_CHAR,
+    Instruction::INT_TO_SHORT,
+    Instruction::ADD_INT,
+    Instruction::SUB_INT,
+    Instruction::MUL_INT,
+    Instruction::DIV_INT,
+    Instruction::REM_INT,
+    Instruction::AND_INT,
+    Instruction::OR_INT,
+    Instruction::XOR_INT,
+    Instruction::SHL_INT,
+    Instruction::SHR_INT,
+    Instruction::USHR_INT,
+    Instruction::ADD_FLOAT,
+    Instruction::SUB_FLOAT,
+    Instruction::MUL_FLOAT,
+    Instruction::DIV_FLOAT,
+    Instruction::ADD_INT_2ADDR,
+    Instruction::SUB_INT_2ADDR,
+    Instruction::MUL_INT_2ADDR,
+    Instruction::DIV_INT_2ADDR,
+    Instruction::REM_INT_2ADDR,
+    Instruction::AND_INT_2ADDR,
+    Instruction::OR_INT_2ADDR,
+    Instruction::XOR_INT_2ADDR,
+    Instruction::SHL_INT_2ADDR,
+    Instruction::SHR_INT_2ADDR,
+    Instruction::USHR_INT_2ADDR,
+    Instruction::ADD_FLOAT_2ADDR,
+    Instruction::SUB_FLOAT_2ADDR,
+    Instruction::MUL_FLOAT_2ADDR,
+    Instruction::DIV_FLOAT_2ADDR,
+    Instruction::ADD_INT_LIT16,
+    Instruction::RSUB_INT,
+    Instruction::MUL_INT_LIT16,
+    Instruction::DIV_INT_LIT16,
+    Instruction::REM_INT_LIT16,
+    Instruction::AND_INT_LIT16,
+    Instruction::OR_INT_LIT16,
+    Instruction::XOR_INT_LIT16,
+    Instruction::ADD_INT_LIT8,
+    Instruction::RSUB_INT_LIT8,
+    Instruction::MUL_INT_LIT8,
+    Instruction::DIV_INT_LIT8,
+    Instruction::REM_INT_LIT8,
+    Instruction::AND_INT_LIT8,
+    Instruction::OR_INT_LIT8,
+    Instruction::XOR_INT_LIT8,
+    Instruction::SHL_INT_LIT8,
+    Instruction::SHR_INT_LIT8,
+    Instruction::USHR_INT_LIT8,
+    // TODO(Arm64): Enable compiler pass
+    // ----- ExtendedMIROpcode -----
+    kMirOpPhi,
+    kMirOpCopy,
+    kMirOpFusedCmplFloat,
+    kMirOpFusedCmpgFloat,
+    kMirOpFusedCmplDouble,
+    kMirOpFusedCmpgDouble,
+    kMirOpFusedCmpLong,
+    kMirOpNop,
+    kMirOpNullCheck,
+    kMirOpRangeCheck,
+    kMirOpDivZeroCheck,
+    kMirOpCheck,
+    kMirOpCheckPart2,
+    kMirOpSelect,
+
+#if ARM64_USE_EXPERIMENTAL_OPCODES
     Instruction::MOVE_WIDE,
     Instruction::MOVE_WIDE_FROM16,
     Instruction::MOVE_WIDE_16,
     Instruction::MOVE_OBJECT,
     Instruction::MOVE_OBJECT_FROM16,
     Instruction::MOVE_OBJECT_16,
+    // Instruction::PACKED_SWITCH,
+    // Instruction::SPARSE_SWITCH,
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
-    Instruction::MOVE_EXCEPTION,
-    Instruction::RETURN_VOID,
-    Instruction::RETURN,
-    Instruction::RETURN_WIDE,
     // Instruction::RETURN_OBJECT,
-    // Instruction::CONST_4,
-    // Instruction::CONST_16,
-    // Instruction::CONST,
     // Instruction::CONST_HIGH16,
     // Instruction::CONST_WIDE_16,
     // Instruction::CONST_WIDE_32,
     // Instruction::CONST_WIDE,
     // Instruction::CONST_WIDE_HIGH16,
-    // Instruction::CONST_STRING,
     // Instruction::CONST_STRING_JUMBO,
     // Instruction::CONST_CLASS,
-    Instruction::MONITOR_ENTER,
-    Instruction::MONITOR_EXIT,
     // Instruction::CHECK_CAST,
     // Instruction::INSTANCE_OF,
     // Instruction::ARRAY_LENGTH,
@@ -175,29 +269,11 @@
     // Instruction::FILLED_NEW_ARRAY,
     // Instruction::FILLED_NEW_ARRAY_RANGE,
     // Instruction::FILL_ARRAY_DATA,
-    Instruction::THROW,
-    // Instruction::GOTO,
-    // Instruction::GOTO_16,
-    // Instruction::GOTO_32,
-    // Instruction::PACKED_SWITCH,
-    // Instruction::SPARSE_SWITCH,
     Instruction::CMPL_FLOAT,
     Instruction::CMPG_FLOAT,
     Instruction::CMPL_DOUBLE,
     Instruction::CMPG_DOUBLE,
     Instruction::CMP_LONG,
-    // Instruction::IF_EQ,
-    // Instruction::IF_NE,
-    // Instruction::IF_LT,
-    // Instruction::IF_GE,
-    // Instruction::IF_GT,
-    // Instruction::IF_LE,
-    // Instruction::IF_EQZ,
-    // Instruction::IF_NEZ,
-    // Instruction::IF_LTZ,
-    // Instruction::IF_GEZ,
-    // Instruction::IF_GTZ,
-    // Instruction::IF_LEZ,
     // Instruction::UNUSED_3E,
     // Instruction::UNUSED_3F,
     // Instruction::UNUSED_40,
@@ -259,11 +335,8 @@
     // Instruction::INVOKE_INTERFACE_RANGE,
     // Instruction::UNUSED_79,
     // Instruction::UNUSED_7A,
-    Instruction::NEG_INT,
-    Instruction::NOT_INT,
     Instruction::NEG_LONG,
     Instruction::NOT_LONG,
-    Instruction::NEG_FLOAT,
     Instruction::NEG_DOUBLE,
     Instruction::INT_TO_LONG,
     Instruction::INT_TO_FLOAT,
@@ -277,20 +350,6 @@
     Instruction::DOUBLE_TO_INT,
     Instruction::DOUBLE_TO_LONG,
     Instruction::DOUBLE_TO_FLOAT,
-    Instruction::INT_TO_BYTE,
-    Instruction::INT_TO_CHAR,
-    Instruction::INT_TO_SHORT,
-    Instruction::ADD_INT,
-    Instruction::SUB_INT,
-    Instruction::MUL_INT,
-    Instruction::DIV_INT,
-    Instruction::REM_INT,
-    Instruction::AND_INT,
-    Instruction::OR_INT,
-    Instruction::XOR_INT,
-    Instruction::SHL_INT,
-    Instruction::SHR_INT,
-    Instruction::USHR_INT,
     Instruction::ADD_LONG,
     Instruction::SUB_LONG,
     Instruction::MUL_LONG,
@@ -302,27 +361,12 @@
     Instruction::SHL_LONG,
     Instruction::SHR_LONG,
     Instruction::USHR_LONG,
-    Instruction::ADD_FLOAT,
-    Instruction::SUB_FLOAT,
-    Instruction::MUL_FLOAT,
-    Instruction::DIV_FLOAT,
     // Instruction::REM_FLOAT,
     Instruction::ADD_DOUBLE,
     Instruction::SUB_DOUBLE,
     Instruction::MUL_DOUBLE,
     Instruction::DIV_DOUBLE,
     // Instruction::REM_DOUBLE,
-    Instruction::ADD_INT_2ADDR,
-    Instruction::SUB_INT_2ADDR,
-    Instruction::MUL_INT_2ADDR,
-    Instruction::DIV_INT_2ADDR,
-    Instruction::REM_INT_2ADDR,
-    Instruction::AND_INT_2ADDR,
-    Instruction::OR_INT_2ADDR,
-    Instruction::XOR_INT_2ADDR,
-    Instruction::SHL_INT_2ADDR,
-    Instruction::SHR_INT_2ADDR,
-    Instruction::USHR_INT_2ADDR,
     Instruction::ADD_LONG_2ADDR,
     Instruction::SUB_LONG_2ADDR,
     Instruction::MUL_LONG_2ADDR,
@@ -334,35 +378,12 @@
     Instruction::SHL_LONG_2ADDR,
     Instruction::SHR_LONG_2ADDR,
     Instruction::USHR_LONG_2ADDR,
-    Instruction::ADD_FLOAT_2ADDR,
-    Instruction::SUB_FLOAT_2ADDR,
-    Instruction::MUL_FLOAT_2ADDR,
-    Instruction::DIV_FLOAT_2ADDR,
     // Instruction::REM_FLOAT_2ADDR,
     Instruction::ADD_DOUBLE_2ADDR,
     Instruction::SUB_DOUBLE_2ADDR,
     Instruction::MUL_DOUBLE_2ADDR,
     Instruction::DIV_DOUBLE_2ADDR,
     // Instruction::REM_DOUBLE_2ADDR,
-    Instruction::ADD_INT_LIT16,
-    Instruction::RSUB_INT,
-    Instruction::MUL_INT_LIT16,
-    Instruction::DIV_INT_LIT16,
-    Instruction::REM_INT_LIT16,
-    Instruction::AND_INT_LIT16,
-    Instruction::OR_INT_LIT16,
-    Instruction::XOR_INT_LIT16,
-    Instruction::ADD_INT_LIT8,
-    Instruction::RSUB_INT_LIT8,
-    Instruction::MUL_INT_LIT8,
-    Instruction::DIV_INT_LIT8,
-    Instruction::REM_INT_LIT8,
-    Instruction::AND_INT_LIT8,
-    Instruction::OR_INT_LIT8,
-    Instruction::XOR_INT_LIT8,
-    Instruction::SHL_INT_LIT8,
-    Instruction::SHR_INT_LIT8,
-    Instruction::USHR_INT_LIT8,
     // Instruction::IGET_QUICK,
     // Instruction::IGET_WIDE_QUICK,
     // Instruction::IGET_OBJECT_QUICK,
@@ -392,24 +413,7 @@
     // Instruction::UNUSED_FD,
     // Instruction::UNUSED_FE,
     // Instruction::UNUSED_FF,
-
-    // TODO(Arm64): Enable compiler pass
-    // ----- ExtendedMIROpcode -----
-    kMirOpPhi,
-    kMirOpCopy,
-    kMirOpFusedCmplFloat,
-    kMirOpFusedCmpgFloat,
-    kMirOpFusedCmplDouble,
-    kMirOpFusedCmpgDouble,
-    kMirOpFusedCmpLong,
-    kMirOpNop,
-    kMirOpNullCheck,
-    kMirOpRangeCheck,
-    kMirOpDivZeroCheck,
-    kMirOpCheck,
-    kMirOpCheckPart2,
-    kMirOpSelect,
-    // kMirOpLast,
+#endif /* ARM64_USE_EXPERIMENTAL_OPCODES */
 };
 
 // TODO: Remove this when we are able to compile everything.
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index db28f3a..a2676c8 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -111,7 +111,8 @@
       merged_df_flags_(0u),
       ifield_lowering_infos_(arena, 0u),
       sfield_lowering_infos_(arena, 0u),
-      method_lowering_infos_(arena, 0u) {
+      method_lowering_infos_(arena, 0u),
+      gen_suspend_test_list_(arena, 0u) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
   max_available_special_compiler_temps_ = std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg))
       - std::abs(static_cast<int>(kVRegTempBaseReg));
@@ -1072,19 +1073,21 @@
     }
   }
 
-  // Remove the BB information and also find the after_list
+  // Remove the BB information and also find the after_list.
   for (MIR* mir = first_list_mir; mir != last_list_mir; mir = mir->next) {
     mir->bb = NullBasicBlockId;
   }
 
   after_list = last_list_mir->next;
 
-  // If there is nothing before the list, after_list is the first_mir
+  // If there is nothing before the list, after_list is the first_mir.
   if (before_list == nullptr) {
     first_mir_insn = after_list;
+  } else {
+    before_list->next = after_list;
   }
 
-  // If there is nothing after the list, before_list is last_mir
+  // If there is nothing after the list, before_list is last_mir.
   if (after_list == nullptr) {
     last_mir_insn = before_list;
   }
@@ -1531,6 +1534,24 @@
   return false;
 }
 
+bool MIRGraph::HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id) {
+  BasicBlock* target = GetBasicBlock(target_id);
+
+  if (source == nullptr || target == nullptr)
+    return false;
+
+  int idx;
+  for (idx = gen_suspend_test_list_.Size() - 1; idx >= 0; idx--) {
+    BasicBlock* bb = gen_suspend_test_list_.Get(idx);
+    if (bb == source)
+      return true;  // The block has been inserted by a suspend check before.
+    if (source->dominators->IsBitSet(bb->id) && bb->dominators->IsBitSet(target_id))
+      return true;
+  }
+
+  return false;
+}
+
 ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
     : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
       visited_taken_(false), have_successors_(false) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 38cd5ee..b6cec66 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -192,6 +192,7 @@
 
 typedef uint16_t BasicBlockId;
 static const BasicBlockId NullBasicBlockId = 0;
+static constexpr bool kLeafOptimization = false;
 
 /*
  * In general, vreg/sreg describe Dalvik registers that originated with dx.  However,
@@ -1055,6 +1056,20 @@
   void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
 
+  // Used for removing redudant suspend tests
+  void AppendGenSuspendTestList(BasicBlock* bb) {
+    if (gen_suspend_test_list_.Size() == 0 ||
+        gen_suspend_test_list_.Get(gen_suspend_test_list_.Size() - 1) != bb) {
+      gen_suspend_test_list_.Insert(bb);
+    }
+  }
+
+  /* This is used to check if there is already a method call dominating the
+   * source basic block of a backedge and being dominated by the target basic
+   * block of the backedge.
+   */
+  bool HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id);
+
  protected:
   int FindCommonParent(int block1, int block2);
   void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
@@ -1162,6 +1177,7 @@
   GrowableArray<MirSFieldLoweringInfo> sfield_lowering_infos_;
   GrowableArray<MirMethodLoweringInfo> method_lowering_infos_;
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
+  GrowableArray<BasicBlock*> gen_suspend_test_list_;  // List of blocks containing suspend tests
 
   friend class ClassInitCheckEliminationTest;
   friend class LocalValueNumberingTest;
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index e384f6b..e32e7cb 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -29,7 +29,8 @@
  *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
  *        registers.
  * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4   : (rARM_SUSPEND) is reserved (suspend check/debugger assist)
+ * r4   : If ARM_R4_SUSPEND_FLAG is set then reserved as a suspend check/debugger
+ *        assist flag, otherwise a callee save promotion target.
  * r5   : Callee save (promotion target)
  * r6   : Callee save (promotion target)
  * r7   : Callee save (promotion target)
@@ -95,6 +96,8 @@
 
 // First FP callee save.
 #define ARM_FP_CALLEE_SAVE_BASE 16
+// Flag for using R4 to do suspend check
+#define ARM_R4_SUSPEND_FLAG
 
 enum ArmResourceEncodingPos {
   kArmGPReg0   = 0,
@@ -117,7 +120,11 @@
   r1           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
   r2           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
   r3           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
+#ifdef ARM_R4_SUSPEND_FLAG
   rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
+#else
+  r4           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
+#endif
   r5           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
   r6           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
   r7           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
@@ -207,7 +214,11 @@
 constexpr RegStorage rs_r1(RegStorage::kValid | r1);
 constexpr RegStorage rs_r2(RegStorage::kValid | r2);
 constexpr RegStorage rs_r3(RegStorage::kValid | r3);
+#ifdef ARM_R4_SUSPEND_FLAG
 constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
+#else
+constexpr RegStorage rs_r4(RegStorage::kValid | r4);
+#endif
 constexpr RegStorage rs_r5(RegStorage::kValid | r5);
 constexpr RegStorage rs_r6(RegStorage::kValid | r6);
 constexpr RegStorage rs_r7(RegStorage::kValid | r7);
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5d74b8d..9f9e618 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -313,11 +313,11 @@
 
 void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index f0a9ca4..9c801a5 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index dde8ff0..e06d814 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -51,7 +51,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -94,7 +94,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2556788..4732e52 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -206,13 +206,16 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
   if (mir->ssa_rep->num_uses == 1) {
     // CONST case
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     // Change kCondNe to kCondEq for the special cases below.
     if (ccode == kCondNe) {
       ccode = kCondEq;
@@ -239,8 +242,8 @@
       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
     } else {
       // Unlikely case - could be tuned.
-      RegStorage t_reg1 = AllocTemp();
-      RegStorage t_reg2 = AllocTemp();
+      RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
+      RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
       LoadConstant(t_reg1, true_val);
       LoadConstant(t_reg2, false_val);
       OpRegImm(kOpCmp, rl_src.reg, 0);
@@ -253,9 +256,9 @@
     // MOVE case
     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     OpRegImm(kOpCmp, rl_src.reg, 0);
     LIR* it = nullptr;
     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
@@ -814,10 +817,10 @@
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -840,7 +843,7 @@
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -950,8 +953,18 @@
 
 // Test suspend flag, return target of taken suspend branch
 LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
+#ifdef ARM_R4_SUSPEND_FLAG
   NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+#else
+  RegStorage t_reg = AllocTemp();
+  LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(),
+    t_reg, kUnsignedHalf);
+  LIR* cmp_branch = OpCmpImmBranch((target == NULL) ? kCondNe : kCondEq, t_reg,
+    0, target);
+  FreeTemp(t_reg);
+  return cmp_branch;
+#endif
 }
 
 // Decrement register and branch on condition
@@ -1047,7 +1060,7 @@
       ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
       FlushAllRegs();
       CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
-      rl_result = GetReturnWide(false);
+      rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     }
@@ -1126,7 +1139,7 @@
     if (reg_status != 0) {
       // We had manually allocated registers for rl_result.
       // Now construct a RegLocation.
-      rl_result = GetReturnWide(false);  // Just using as a template.
+      rl_result = GetReturnWide(kCoreReg);  // Just using as a template.
       rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
     }
 
@@ -1168,7 +1181,7 @@
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1203,7 +1216,7 @@
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -1229,7 +1242,7 @@
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -1267,7 +1280,7 @@
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1281,7 +1294,7 @@
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 309f676..bd9c8b4 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -25,47 +25,55 @@
 
 namespace art {
 
-// TODO: rework this when c++11 support allows.
-static const RegStorage core_regs_arr[] =
+#ifdef ARM_R4_SUSPEND_FLAG
+static constexpr RegStorage core_regs_arr[] =
     {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
      rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static const RegStorage sp_regs_arr[] =
+#else
+static constexpr RegStorage core_regs_arr[] =
+    {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
+     rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+#endif
+static constexpr RegStorage sp_regs_arr[] =
     {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
      rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
      rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30,
      rs_fr31};
-static const RegStorage dp_regs_arr[] =
+static constexpr RegStorage dp_regs_arr[] =
     {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
      rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
-static const RegStorage reserved_regs_arr[] =
+#ifdef ARM_R4_SUSPEND_FLAG
+static constexpr RegStorage reserved_regs_arr[] =
     {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
-static const RegStorage sp_temps_arr[] =
+static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+#else
+static constexpr RegStorage reserved_regs_arr[] =
+    {rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r12};
+#endif
+static constexpr RegStorage sp_temps_arr[] =
     {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
      rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
-static const RegStorage dp_temps_arr[] =
+static constexpr RegStorage dp_temps_arr[] =
     {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
 
-static const std::vector<RegStorage> empty_pool;
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
-    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
-    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+static constexpr ArrayRef<const RegStorage> empty_pool;
+static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
+static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
+static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
+static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
 
 RegLocation ArmMir2Lir::LocCReturn() {
   return arm_loc_c_return;
 }
 
+RegLocation ArmMir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation ArmMir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
@@ -83,7 +91,11 @@
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
     case kSelf: res_reg = rs_rARM_SELF; break;
+#ifdef ARM_R4_SUSPEND_FLAG
     case kSuspend: res_reg =  rs_rARM_SUSPEND; break;
+#else
+    case kSuspend: res_reg = RegStorage::InvalidReg(); break;
+#endif
     case kLr: res_reg =  rs_rARM_LR; break;
     case kPc: res_reg =  rs_rARM_PC; break;
     case kSp: res_reg =  rs_rARM_SP; break;
@@ -582,11 +594,13 @@
     }
   }
 
+#ifdef ARM_R4_SUSPEND_FLAG
   // TODO: re-enable this when we can safely save r4 over the suspension code path.
   bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
   if (no_suspend) {
     GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
   }
+#endif
 
   // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
   // TODO: adjust when we roll to hard float calling convention.
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 2e3ef86..d0f8e74 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -287,9 +287,9 @@
 
 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
-  Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg);
+  StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 16bb701..6251f4f 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 882ee66..acc7d17 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -47,7 +47,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -90,7 +90,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index d9428f9..0a76b9b 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -41,8 +41,8 @@
 /*
  * 64-bit 3way compare function.
  *     cmp   xA, xB
- *     csinc wC, wzr, wzr, eq
- *     csneg wC, wC, wC, le
+ *     csinc wC, wzr, wzr, eq  // wC = (xA == xB) ? 0 : 1
+ *     csneg wC, wC, wC, ge    // wC = (xA >= xB) ? wC : -wC
  */
 void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                               RegLocation rl_src2) {
@@ -52,10 +52,10 @@
   rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  NewLIR4(WIDE(kA64Csinc4rrrc), rl_result.reg.GetReg(), rxzr, rxzr, kArmCondEq);
-  NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_result.reg.GetReg(),
-          rl_result.reg.GetReg(), kArmCondLe);
-  StoreValueWide(rl_dest, rl_result);
+  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
+  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), kArmCondGe);
+  StoreValue(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
@@ -88,14 +88,16 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode);
 
   RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
   RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-  rl_true = LoadValue(rl_true, kCoreReg);
-  rl_false = LoadValue(rl_false, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  rl_true = LoadValue(rl_true, result_reg_class);
+  rl_false = LoadValue(rl_false, result_reg_class);
+  rl_result = EvalLoc(rl_dest, result_reg_class, true);
   OpRegImm(kOpCmp, rl_src.reg, 0);
   NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(),
           rl_false.reg.GetReg(), code);
@@ -501,10 +503,10 @@
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -527,7 +529,7 @@
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -769,7 +771,7 @@
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -804,7 +806,7 @@
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -830,7 +832,7 @@
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -871,7 +873,7 @@
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -885,7 +887,7 @@
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 598d05b..b287399 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -25,77 +25,72 @@
 
 namespace art {
 
-// TODO: rework this when c++11 support allows.
-static const RegStorage core_regs_arr[] =
+static constexpr RegStorage core_regs_arr[] =
     {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
      rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15,
      rs_w16, rs_w17, rs_w18, rs_w19, rs_w20, rs_w21, rs_w22, rs_w23,
      rs_w24, rs_w25, rs_w26, rs_w27, rs_w28, rs_w29, rs_w30, rs_w31,
      rs_wzr};
-static const RegStorage core64_regs_arr[] =
+static constexpr RegStorage core64_regs_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
      rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
      rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31,
      rs_xzr};
-static const RegStorage sp_regs_arr[] =
+static constexpr RegStorage sp_regs_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
      rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
      rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
      rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
-static const RegStorage dp_regs_arr[] =
+static constexpr RegStorage dp_regs_arr[] =
     {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
      rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
-static const RegStorage reserved_regs_arr[] =
+static constexpr RegStorage reserved_regs_arr[] =
     {rs_rA32_SUSPEND, rs_rA32_SELF, rs_rA32_SP, rs_rA32_LR, rs_wzr};
-static const RegStorage reserved64_regs_arr[] =
+static constexpr RegStorage reserved64_regs_arr[] =
     {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR, rs_xzr};
 // TUNING: Are there too many temp registers and too less promote target?
 // This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
 // Note: we are not able to call to C function directly if it un-match C ABI.
 // Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
-static const RegStorage core_temps_arr[] =
+static constexpr RegStorage core_temps_arr[] =
     {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
      rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
      rs_w17};
-static const RegStorage core64_temps_arr[] =
+static constexpr RegStorage core64_temps_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
      rs_x17};
-static const RegStorage sp_temps_arr[] =
+static constexpr RegStorage sp_temps_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
      rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
      rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
-static const RegStorage dp_temps_arr[] =
+static constexpr RegStorage dp_temps_arr[] =
     {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
 
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + arraysize(core_regs_arr));
-static const std::vector<RegStorage> core64_regs(core64_regs_arr,
-    core64_regs_arr + arraysize(core64_regs_arr));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + arraysize(sp_regs_arr));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + arraysize(dp_regs_arr));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + arraysize(reserved_regs_arr));
-static const std::vector<RegStorage> reserved64_regs(reserved64_regs_arr,
-    reserved64_regs_arr + arraysize(reserved64_regs_arr));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + arraysize(core_temps_arr));
-static const std::vector<RegStorage> core64_temps(core64_temps_arr,
-    core64_temps_arr + arraysize(core64_temps_arr));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr));
+static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
+static constexpr ArrayRef<const RegStorage> core64_regs(core64_regs_arr);
+static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved64_regs(reserved64_regs_arr);
+static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
+static constexpr ArrayRef<const RegStorage> core64_temps(core64_temps_arr);
+static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
 
 RegLocation Arm64Mir2Lir::LocCReturn() {
   return arm_loc_c_return;
 }
 
+RegLocation Arm64Mir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
@@ -572,7 +567,7 @@
   if (UNLIKELY(is_volatile)) {
     // On arm64, fp register load/store is atomic only for single bytes.
     if (size != kSignedByte && size != kUnsignedByte) {
-      return kCoreReg;
+      return (size == kReference) ? kRefReg : kCoreReg;
     }
   }
   return RegClassBySize(size);
@@ -835,7 +830,7 @@
   int num_fpr_used = 0;
 
   /*
-   * Dummy up a RegLocation for the incoming Method*
+   * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
    * It will attempt to keep kArg0 live (or copy it to home location
    * if promoted).
    */
@@ -844,14 +839,10 @@
   rl_src.reg = TargetReg(kArg0);
   rl_src.home = false;
   MarkLive(rl_src);
-
-  // rl_method might be 32-bit, but ArtMethod* on stack is 64-bit, so always flush it.
-  StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
-
-  // If Method* has been promoted, load it,
-  // otherwise, rl_method is the 32-bit value on [sp], and has already been loaded.
+  StoreValue(rl_method, rl_src);
+  // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreValue(rl_method, rl_src);
+    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
   }
 
   if (cu_->num_ins == 0) {
@@ -908,9 +899,7 @@
     RegLocation rl_arg = info->args[next_arg++];
     rl_arg = UpdateRawLoc(rl_arg);
     if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
-      RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1);
-      LoadValueDirectWideFixed(rl_arg, r_tmp);
-      next_reg++;
+      LoadValueDirectWideFixed(rl_arg, RegStorage::Solo64(next_reg));
       next_arg++;
     } else {
       if (rl_arg.wide) {
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 4f2a876..62c81d0 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -220,6 +220,8 @@
 void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                                   RegLocation rl_src2, LIR* taken,
                                   LIR* fall_through) {
+  DCHECK(!rl_src1.fp);
+  DCHECK(!rl_src2.fp);
   ConditionCode cond;
   switch (opcode) {
     case Instruction::IF_EQ:
@@ -253,7 +255,7 @@
     cond = FlipComparisonOrder(cond);
   }
 
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
+  rl_src1 = LoadValue(rl_src1);
   // Is this really an immediate comparison?
   if (rl_src2.is_const) {
     // If it's already live in a register or not easily materialized, just keep going
@@ -265,14 +267,15 @@
       return;
     }
   }
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  rl_src2 = LoadValue(rl_src2);
   OpCmpBranch(cond, rl_src1.reg, rl_src2.reg, taken);
 }
 
 void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken,
                                       LIR* fall_through) {
   ConditionCode cond;
-  rl_src = LoadValue(rl_src, kCoreReg);
+  DCHECK(!rl_src.fp);
+  rl_src = LoadValue(rl_src);
   switch (opcode) {
     case Instruction::IF_EQZ:
       cond = kCondEq;
@@ -371,7 +374,7 @@
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -503,7 +506,7 @@
     }
   }
   if (info->result.location != kLocInvalid) {
-    StoreValue(info->result, GetReturn(false /* not fp */));
+    StoreValue(info->result, GetReturn(kRefReg));
   }
 }
 
@@ -563,7 +566,7 @@
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method = LoadCurrMethod();
-      r_base = AllocTempWord();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
       if (IsTemp(rl_method.reg)) {
         FreeTemp(rl_method.reg);
@@ -603,6 +606,8 @@
                                                      field_info.StorageIndex(), r_base));
 
         FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadLoad);
       }
       FreeTemp(r_method);
     }
@@ -658,7 +663,7 @@
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
-      r_base = AllocTempWord();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
@@ -694,6 +699,8 @@
                                                      field_info.StorageIndex(), r_base));
 
         FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadLoad);
       }
       FreeTemp(r_method);
     }
@@ -726,10 +733,10 @@
       GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -766,7 +773,7 @@
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     GenNullCheck(rl_obj.reg, opt_flags);
     RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
     int field_offset = field_info.FieldOffset().Int32Value();
@@ -793,10 +800,10 @@
       GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -824,7 +831,7 @@
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     if (is_long_or_double) {
       rl_src = LoadValueWide(rl_src, reg_class);
     } else {
@@ -881,7 +888,7 @@
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
   RegStorage res_reg = AllocTemp();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                    *cu_->dex_file,
                                                    type_idx)) {
@@ -894,15 +901,15 @@
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                               type_idx, rl_method.reg, true);
     }
-    RegLocation rl_result = GetReturn(false);
+    RegLocation rl_result = GetReturn(kRefReg);
     StoreValue(rl_dest, rl_result);
   } else {
     // We're don't need access checks, load type from dex cache
     int32_t dex_cache_offset =
         mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
-    Load32Disp(rl_method.reg, dex_cache_offset, res_reg);
+    LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
-    Load32Disp(res_reg, offset_of_type, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_type, rl_result.reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
@@ -976,7 +983,7 @@
                 TargetReg(kArg0));
 
     // Might call out to helper, which will return resolved string in kRet0
-    Load32Disp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
+    LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
     LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
@@ -1010,13 +1017,13 @@
     }
 
     GenBarrier();
-    StoreValue(rl_dest, GetReturn(false));
+    StoreValue(rl_dest, GetReturn(kRefReg));
   } else {
     RegLocation rl_method = LoadCurrMethod();
-    RegStorage res_reg = AllocTemp();
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    RegStorage res_reg = AllocTempRef();
+    RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
     LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg);
-    Load32Disp(res_reg, offset_of_string, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_string, rl_result.reg);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1071,7 +1078,7 @@
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -1103,7 +1110,7 @@
   // X86 has its own implementation.
   DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
 
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
   if (result_reg == object.reg) {
@@ -1112,8 +1119,8 @@
   LoadConstant(result_reg, 0);     // assume false
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
-  RegStorage object_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
+  RegStorage object_class = AllocTypedTemp(false, kRefReg);
 
   LoadCurrMethodDirect(check_class);
   if (use_declaring_class) {
@@ -1206,7 +1213,7 @@
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
   if (cu_->instruction_set == kMips) {
     // On MIPS rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
@@ -1511,7 +1518,7 @@
   } else {
     GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift);
   }
-  RegLocation rl_result = GetReturnWide(false);
+  RegLocation rl_result = GetReturnWide(kCoreReg);
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1653,7 +1660,7 @@
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */);
       }
       if (op == kOpDiv)
-        rl_result = GetReturn(false);
+        rl_result = GetReturn(kCoreReg);
       else
         rl_result = GetReturnAlt();
     }
@@ -1918,7 +1925,7 @@
                                   false);
         }
         if (is_div)
-          rl_result = GetReturn(false);
+          rl_result = GetReturn(kCoreReg);
         else
           rl_result = GetReturnAlt();
       }
@@ -2081,7 +2088,7 @@
     }
     // Adjust return regs in to handle case of rem returning kArg2/kArg3
     if (ret_reg == mir_to_lir->TargetReg(kRet0).GetReg())
-      rl_result = mir_to_lir->GetReturnWide(false);
+      rl_result = mir_to_lir->GetReturnWide(kCoreReg);
     else
       rl_result = mir_to_lir->GetReturnWideAlt();
     mir_to_lir->StoreValueWide(rl_dest, rl_result);
@@ -2119,11 +2126,11 @@
   CallRuntimeHelperRegLocation(func_offset, rl_src, false);
   if (rl_dest.wide) {
     RegLocation rl_result;
-    rl_result = GetReturnWide(rl_dest.fp);
+    rl_result = GetReturnWide(LocToRegClass(rl_dest));
     StoreValueWide(rl_dest, rl_result);
   } else {
     RegLocation rl_result;
-    rl_result = GetReturn(rl_dest.fp);
+    rl_result = GetReturn(LocToRegClass(rl_dest));
     StoreValue(rl_dest, rl_result);
   }
 }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index eef3294..842533b 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -434,7 +434,7 @@
  */
 void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
-   * Dummy up a RegLocation for the incoming Method*
+   * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
    * It will attempt to keep kArg0 live (or copy it to home location
    * if promoted).
    */
@@ -1156,7 +1156,7 @@
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturn(false);
+    res = GetReturn(LocToRegClass(info->result));
   } else {
     res = info->result;
   }
@@ -1166,7 +1166,7 @@
 RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturnWide(false);
+    res = GetReturnWide(kCoreReg);
   } else {
     res = info->result;
   }
@@ -1189,7 +1189,7 @@
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   // X86 wants to avoid putting a constant index into a register.
   if (!((cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64)&& rl_idx.is_const)) {
     rl_idx = LoadValue(rl_idx, kCoreReg);
@@ -1202,7 +1202,7 @@
   RegStorage reg_ptr;
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     if (range_check) {
       reg_max = AllocTemp();
       Load32Disp(rl_obj.reg, count_offset, reg_max);
@@ -1232,9 +1232,9 @@
       }
     }
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     Load32Disp(rl_obj.reg, offset_offset, reg_off);
-    Load32Disp(rl_obj.reg, value_offset, reg_ptr);
+    LoadRefDisp(rl_obj.reg, value_offset, reg_ptr);
   }
   if (rl_idx.is_const) {
     OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
@@ -1271,7 +1271,7 @@
   }
   // dst = src.length();
   RegLocation rl_obj = info->args[0];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   GenNullCheck(rl_obj.reg, info->opt_flags);
@@ -1477,7 +1477,7 @@
     DCHECK_EQ(mir_graph_->ConstantValue(rl_char) & ~0xFFFF, 0);
     DCHECK(high_code_point_branch == nullptr);
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1523,7 +1523,7 @@
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
     }
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1575,7 +1575,7 @@
   rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
   RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info);  // result reg
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_long) {
@@ -1621,7 +1621,7 @@
     // There might have been a store before this volatile one so insert StoreStore barrier.
     GenMemBarrier(kStoreStore);
   }
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_value;
   if (is_long) {
@@ -1635,7 +1635,7 @@
       FreeTemp(rl_temp_offset);
     }
   } else {
-    rl_value = LoadValue(rl_src_value, kCoreReg);
+    rl_value = LoadValue(rl_src_value);
     StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
   }
 
@@ -1658,7 +1658,7 @@
     if (info->type != kStatic &&
         ((cu_->disable_opt & (1 << kNullCheckElimination)) != 0 ||
          (info->opt_flags & MIR_IGNORE_NULL_CHECK) == 0))  {
-      RegLocation rl_obj = LoadValue(info->args[0], kCoreReg);
+      RegLocation rl_obj = LoadValue(info->args[0], kRefReg);
       GenNullCheck(rl_obj.reg);
     }
     return;
@@ -1783,10 +1783,10 @@
   if (info->result.location != kLocInvalid) {
     // We have a following MOVE_RESULT - do it now.
     if (info->result.wide) {
-      RegLocation ret_loc = GetReturnWide(info->result.fp);
+      RegLocation ret_loc = GetReturnWide(LocToRegClass(info->result));
       StoreValueWide(info->result, ret_loc);
     } else {
-      RegLocation ret_loc = GetReturn(info->result.fp);
+      RegLocation ret_loc = GetReturn(LocToRegClass(info->result));
       StoreValue(info->result, ret_loc);
     }
   }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index f5e7e63..2c8b9b9 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -139,6 +139,7 @@
 }
 
 RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
+  DCHECK(!rl_src.ref || op_kind == kRefReg);
   rl_src = UpdateLoc(rl_src);
   if (rl_src.location == kLocPhysReg) {
     if (!RegClassMatches(op_kind, rl_src.reg)) {
@@ -162,6 +163,10 @@
   return rl_src;
 }
 
+RegLocation Mir2Lir::LoadValue(RegLocation rl_src) {
+  return LoadValue(rl_src, LocToRegClass(rl_src));
+}
+
 void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
   /*
    * Sanity checking - should never try to store to the same
@@ -366,7 +371,7 @@
 }
 
 RegLocation Mir2Lir::LoadCurrMethod() {
-  return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
+  return LoadValue(mir_graph_->GetMethodLoc(), kRefReg);
 }
 
 RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 3af3715..e1bdb2e 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -261,11 +261,11 @@
 
 void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rMIPS_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rMIPS_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e462173..ea3c901 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 9fffb2f..4e31477 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -52,7 +52,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -95,7 +95,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
@@ -204,7 +204,7 @@
   RegStorage r_tgt = LoadHelper(offset);
   // NOTE: not a safepoint
   OpReg(kOpBlx, r_tgt);
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kCoreReg);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 55cf434..c1a7c99 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -26,46 +26,43 @@
 
 namespace art {
 
-static const RegStorage core_regs_arr[] =
+static constexpr RegStorage core_regs_arr[] =
     {rs_rZERO, rs_rAT, rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2,
      rs_rT3, rs_rT4, rs_rT5, rs_rT6, rs_rT7, rs_rS0, rs_rS1, rs_rS2, rs_rS3, rs_rS4, rs_rS5,
      rs_rS6, rs_rS7, rs_rT8, rs_rT9, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rFP, rs_rRA};
-static RegStorage sp_regs_arr[] =
+static constexpr RegStorage sp_regs_arr[] =
     {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
      rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static RegStorage dp_regs_arr[] =
+static constexpr RegStorage dp_regs_arr[] =
     {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
-static const RegStorage reserved_regs_arr[] =
+static constexpr RegStorage reserved_regs_arr[] =
     {rs_rZERO, rs_rAT, rs_rS0, rs_rS1, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rRA};
-static RegStorage core_temps_arr[] =
+static constexpr RegStorage core_temps_arr[] =
     {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2, rs_rT3, rs_rT4,
      rs_rT5, rs_rT6, rs_rT7, rs_rT8};
-static RegStorage sp_temps_arr[] =
+static constexpr RegStorage sp_temps_arr[] =
     {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
      rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static RegStorage dp_temps_arr[] =
+static constexpr RegStorage dp_temps_arr[] =
     {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
 
-static const std::vector<RegStorage> empty_pool;
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
-    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
-    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+static constexpr ArrayRef<const RegStorage> empty_pool;
+static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
+static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
+static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
+static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
 
 RegLocation MipsMir2Lir::LocCReturn() {
   return mips_loc_c_return;
 }
 
+RegLocation MipsMir2Lir::LocCReturnRef() {
+  return mips_loc_c_return;
+}
+
 RegLocation MipsMir2Lir::LocCReturnWide() {
   return mips_loc_c_return_wide;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index df56820..1f12b6f 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -23,6 +23,36 @@
 
 namespace art {
 
+RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
+  RegisterClass res;
+  switch (shorty_type) {
+    case 'L':
+      res = kRefReg;
+      break;
+    case 'F':
+      // Expected fallthrough.
+    case 'D':
+      res = kFPReg;
+      break;
+    default:
+      res = kCoreReg;
+  }
+  return res;
+}
+
+RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) {
+  RegisterClass res;
+  if (loc.fp) {
+    DCHECK(!loc.ref) << "At most, one of ref/fp may be set";
+    res = kFPReg;
+  } else if (loc.ref) {
+    res = kRefReg;
+  } else {
+    res = kCoreReg;
+  }
+  return res;
+}
+
 void Mir2Lir::LockArg(int in_position, bool wide) {
   RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
   RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
@@ -149,15 +179,13 @@
     return false;
   }
 
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
-
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.object_arg);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
+  RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class);
   RegStorage r_result = rl_dest.reg;
   if (!RegClassMatches(reg_class, r_result)) {
     r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class)
@@ -205,7 +233,7 @@
   GenPrintLabel(mir);
   LockArg(data.object_arg);
   LockArg(data.src_arg, wide);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide);
   if (data.is_volatile) {
@@ -226,13 +254,12 @@
 bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
   const InlineReturnArgData& data = special.d.return_data;
   bool wide = (data.is_wide != 0u);
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.arg, wide);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
   LoadArgDirect(data.arg, rl_dest);
   return true;
 }
@@ -254,7 +281,7 @@
       break;
     case kInlineOpNonWideConst: {
       successful = true;
-      RegLocation rl_dest = GetReturn(cu_->shorty[0] == 'F');
+      RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
       GenPrintLabel(mir);
       LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
       return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir);
@@ -372,31 +399,35 @@
                                                           cu_->class_def_idx)) {
         GenMemBarrier(kStoreStore);
       }
-      if (!mir_graph_->MethodIsLeaf()) {
+      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
       break;
 
-    case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
-      if (!mir_graph_->MethodIsLeaf()) {
+      DCHECK(rl_src[0].ref);
+      // Intentional fallthrough.
+    case Instruction::RETURN:
+      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValue(GetReturn(cu_->shorty[0] == 'F'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValue(GetReturn(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::RETURN_WIDE:
-      if (!mir_graph_->MethodIsLeaf()) {
+      if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValueWide(GetReturnWide(cu_->shorty[0] == 'D'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValueWide(GetReturnWide(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::MOVE_RESULT_WIDE:
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValueWide(rl_dest, GetReturnWide(rl_dest.fp));
+      StoreValueWide(rl_dest, GetReturnWide(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE_RESULT:
@@ -404,7 +435,7 @@
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValue(rl_dest, GetReturn(rl_dest.fp));
+      StoreValue(rl_dest, GetReturn(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE:
@@ -474,7 +505,7 @@
     case Instruction::ARRAY_LENGTH:
       int len_offset;
       len_offset = mirror::Array::LengthOffset().Int32Value();
-      rl_src[0] = LoadValue(rl_src[0], kCoreReg);
+      rl_src[0] = LoadValue(rl_src[0], kRefReg);
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       Load32Disp(rl_src[0].reg, len_offset, rl_result.reg);
@@ -512,7 +543,8 @@
     case Instruction::GOTO:
     case Instruction::GOTO_16:
     case Instruction::GOTO_32:
-      if (mir_graph_->IsBackedge(bb, bb->taken)) {
+      if (mir_graph_->IsBackedge(bb, bb->taken) &&
+          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken))) {
         GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]);
       } else {
         OpUnconditionalBranch(&label_list[bb->taken]);
@@ -551,12 +583,15 @@
         bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg),
                                        mir_graph_->ConstantValue(rl_src[1].orig_sreg));
         BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
-        if (mir_graph_->IsBackedge(bb, target_id)) {
+        if (mir_graph_->IsBackedge(bb, target_id) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
           GenSuspendTest(opt_flags);
         }
         OpUnconditionalBranch(&label_list[target_id]);
       } else {
-        if (mir_graph_->IsBackwardsBranch(bb)) {
+        if (mir_graph_->IsBackwardsBranch(bb) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+             !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
           GenSuspendTest(opt_flags);
         }
         GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, fall_through);
@@ -576,12 +611,15 @@
       if (rl_src[0].is_const) {
         bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0);
         BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
-        if (mir_graph_->IsBackedge(bb, target_id)) {
+        if (mir_graph_->IsBackedge(bb, target_id) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
           GenSuspendTest(opt_flags);
         }
         OpUnconditionalBranch(&label_list[target_id]);
       } else {
-        if (mir_graph_->IsBackwardsBranch(bb)) {
+        if (mir_graph_->IsBackwardsBranch(bb) &&
+            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+             !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
           GenSuspendTest(opt_flags);
         }
         GenCompareZeroAndBranch(opcode, rl_src[0], taken, fall_through);
@@ -725,37 +763,69 @@
 
     case Instruction::INVOKE_STATIC_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        // If the invocation is not inlined, we can assume there is already a
+        // suspend check at the return site
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_STATIC:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_DIRECT:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_DIRECT_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_VIRTUAL:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_VIRTUAL_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_SUPER:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_SUPER_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::INVOKE_INTERFACE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
     case Instruction::INVOKE_INTERFACE_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true));
+      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+        mir_graph_->AppendGenSuspendTestList(bb);
+      }
       break;
 
     case Instruction::NEG_INT:
@@ -782,7 +852,7 @@
 
     case Instruction::LONG_TO_INT:
       rl_src[0] = UpdateLocWide(rl_src[0]);
-      rl_src[0] = WideToNarrow(rl_src[0]);
+      rl_src[0] = NarrowRegLoc(rl_src[0]);
       StoreValue(rl_dest, rl_src[0]);
       break;
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8d572ca..ed94a8d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -26,6 +26,7 @@
 #include "driver/compiler_driver.h"
 #include "leb128.h"
 #include "safe_map.h"
+#include "utils/array_ref.h"
 #include "utils/arena_allocator.h"
 #include "utils/growable_array.h"
 
@@ -429,16 +430,16 @@
     class RegisterPool {
      public:
       RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
-                   const std::vector<RegStorage>& core_regs,
-                   const std::vector<RegStorage>& core64_regs,
-                   const std::vector<RegStorage>& sp_regs,
-                   const std::vector<RegStorage>& dp_regs,
-                   const std::vector<RegStorage>& reserved_regs,
-                   const std::vector<RegStorage>& reserved64_regs,
-                   const std::vector<RegStorage>& core_temps,
-                   const std::vector<RegStorage>& core64_temps,
-                   const std::vector<RegStorage>& sp_temps,
-                   const std::vector<RegStorage>& dp_temps);
+                   const ArrayRef<const RegStorage>& core_regs,
+                   const ArrayRef<const RegStorage>& core64_regs,
+                   const ArrayRef<const RegStorage>& sp_regs,
+                   const ArrayRef<const RegStorage>& dp_regs,
+                   const ArrayRef<const RegStorage>& reserved_regs,
+                   const ArrayRef<const RegStorage>& reserved64_regs,
+                   const ArrayRef<const RegStorage>& core_temps,
+                   const ArrayRef<const RegStorage>& core64_temps,
+                   const ArrayRef<const RegStorage>& sp_temps,
+                   const ArrayRef<const RegStorage>& dp_temps);
       ~RegisterPool() {}
       static void* operator new(size_t size, ArenaAllocator* arena) {
         return arena->Alloc(size, kArenaAllocRegAlloc);
@@ -456,6 +457,8 @@
       int next_sp_reg_;
       GrowableArray<RegisterInfo*> dp_regs_;    // Double precision float.
       int next_dp_reg_;
+      GrowableArray<RegisterInfo*>* ref_regs_;  // Points to core_regs_ or core64_regs_
+      int* next_ref_reg_;
 
      private:
       Mir2Lir* const m2l_;
@@ -550,8 +553,12 @@
      * just use our knowledge of type to select the most appropriate register class?
      */
     RegisterClass RegClassBySize(OpSize size) {
-      return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
-              size == kSignedByte) ? kCoreReg : kAnyReg;
+      if (size == kReference) {
+        return kRefReg;
+      } else {
+        return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
+                size == kSignedByte) ? kCoreReg : kAnyReg;
+      }
     }
 
     size_t CodeBufferSizeInBytes() {
@@ -612,6 +619,8 @@
       return current_dalvik_offset_;
     }
 
+    RegisterClass ShortyToRegClass(char shorty_type);
+    RegisterClass LocToRegClass(RegLocation loc);
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -699,7 +708,7 @@
     virtual RegStorage AllocFreeTemp();
     virtual RegStorage AllocTemp();
     virtual RegStorage AllocTempWide();
-    virtual RegStorage AllocTempWord();
+    virtual RegStorage AllocTempRef();
     virtual RegStorage AllocTempSingle();
     virtual RegStorage AllocTempDouble();
     virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
@@ -719,7 +728,6 @@
     void NullifyRange(RegStorage reg, int s_reg);
     void MarkDef(RegLocation rl, LIR *start, LIR *finish);
     void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
-    virtual RegLocation WideToNarrow(RegLocation rl);
     void ResetDefLoc(RegLocation rl);
     void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
@@ -764,8 +772,8 @@
     void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
-    RegLocation GetReturnWide(bool is_double);
-    RegLocation GetReturn(bool is_float);
+    RegLocation GetReturnWide(RegisterClass reg_class);
+    RegLocation GetReturn(RegisterClass reg_class);
     RegisterInfo* GetRegInfo(RegStorage reg);
 
     // Shared by all targets - implemented in gen_common.cc.
@@ -973,6 +981,8 @@
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
+    // Same as above, but derive the target register class from the location record.
+    virtual RegLocation LoadValue(RegLocation rl_src);
     // Load Dalvik value with 64-bit memory storage.
     virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
@@ -1122,6 +1132,7 @@
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
+    virtual RegLocation LocCReturnRef() = 0;
     virtual RegLocation LocCReturnDouble() = 0;
     virtual RegLocation LocCReturnFloat() = 0;
     virtual RegLocation LocCReturnWide() = 0;
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 058b89c..bbeef50 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -56,16 +56,16 @@
 }
 
 Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
-                                    const std::vector<RegStorage>& core_regs,
-                                    const std::vector<RegStorage>& core64_regs,
-                                    const std::vector<RegStorage>& sp_regs,
-                                    const std::vector<RegStorage>& dp_regs,
-                                    const std::vector<RegStorage>& reserved_regs,
-                                    const std::vector<RegStorage>& reserved64_regs,
-                                    const std::vector<RegStorage>& core_temps,
-                                    const std::vector<RegStorage>& core64_temps,
-                                    const std::vector<RegStorage>& sp_temps,
-                                    const std::vector<RegStorage>& dp_temps) :
+                                    const ArrayRef<const RegStorage>& core_regs,
+                                    const ArrayRef<const RegStorage>& core64_regs,
+                                    const ArrayRef<const RegStorage>& sp_regs,
+                                    const ArrayRef<const RegStorage>& dp_regs,
+                                    const ArrayRef<const RegStorage>& reserved_regs,
+                                    const ArrayRef<const RegStorage>& reserved64_regs,
+                                    const ArrayRef<const RegStorage>& core_temps,
+                                    const ArrayRef<const RegStorage>& core64_temps,
+                                    const ArrayRef<const RegStorage>& sp_temps,
+                                    const ArrayRef<const RegStorage>& dp_temps) :
     core_regs_(arena, core_regs.size()), next_core_reg_(0),
     core64_regs_(arena, core64_regs.size()), next_core64_reg_(0),
     sp_regs_(arena, sp_regs.size()), next_sp_reg_(0),
@@ -128,6 +128,15 @@
   // Add an entry for InvalidReg with zero'd mask.
   RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0);
   m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg);
+
+  // Existence of core64 registers implies wide references.
+  if (core64_regs_.Size() != 0) {
+    ref_regs_ = &core64_regs_;
+    next_ref_reg_ = &next_core64_reg_;
+  } else {
+    ref_regs_ = &core_regs_;
+    next_ref_reg_ = &next_core_reg_;
+  }
 }
 
 void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) {
@@ -145,6 +154,7 @@
 
 void Mir2Lir::DumpCoreRegPool() {
   DumpRegPool(&reg_pool_->core_regs_);
+  DumpRegPool(&reg_pool_->core64_regs_);
 }
 
 void Mir2Lir::DumpFpRegPool() {
@@ -274,6 +284,7 @@
 
 /* Reserve a callee-save register.  Return InvalidReg if none available */
 RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
+  // TODO: 64-bit and refreg update
   RegStorage res;
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
@@ -406,17 +417,10 @@
   return res;
 }
 
-RegStorage Mir2Lir::AllocTempWord() {
-  // FIXME: temporary workaround.  For bring-up purposes, x86_64 needs the ability
-  // to allocate wide values as a pair of core registers.  However, we can't hold
-  // a reference in a register pair.  This workaround will be removed when the
-  // reference handling code is reworked, or x86_64 backend starts using wide core
-  // registers - whichever happens first.
-  if (cu_->instruction_set == kX86_64) {
-    return AllocTemp();
-  } else {
-    return (Is64BitInstructionSet(cu_->instruction_set)) ? AllocTempWide() : AllocTemp();
-  }
+RegStorage Mir2Lir::AllocTempRef() {
+  RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true);
+  DCHECK(!res.IsPair());
+  return res;
 }
 
 RegStorage Mir2Lir::AllocTempSingle() {
@@ -432,6 +436,7 @@
 }
 
 RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  DCHECK_NE(reg_class, kRefReg);  // NOTE: the Dalvik width of a reference is always 32 bits.
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempDouble();
   }
@@ -441,6 +446,8 @@
 RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempSingle();
+  } else if (reg_class == kRefReg) {
+    return AllocTempRef();
   }
   return AllocTemp();
 }
@@ -459,8 +466,10 @@
 
 RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
   RegStorage reg;
-  // TODO: might be worth a sanity check here to verify at most 1 live reg per s_reg.
-  if ((reg_class == kAnyReg) || (reg_class == kFPReg)) {
+  if (reg_class == kRefReg) {
+    reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg);
+  }
+  if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) {
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
@@ -675,39 +684,6 @@
   p->SetDefEnd(finish);
 }
 
-RegLocation Mir2Lir::WideToNarrow(RegLocation rl) {
-  DCHECK(rl.wide);
-  if (rl.location == kLocPhysReg) {
-    if (rl.reg.IsPair()) {
-      RegisterInfo* info_lo = GetRegInfo(rl.reg.GetLow());
-      RegisterInfo* info_hi = GetRegInfo(rl.reg.GetHigh());
-      if (info_lo->IsTemp()) {
-        info_lo->SetIsWide(false);
-        info_lo->ResetDefBody();
-      }
-      if (info_hi->IsTemp()) {
-        info_hi->SetIsWide(false);
-        info_hi->ResetDefBody();
-      }
-      rl.reg = rl.reg.GetLow();
-    } else {
-      /*
-       * TODO: If not a pair, we can't just drop the high register.  On some targets, we may be
-       * able to re-cast the 64-bit register as 32 bits, so it might be worthwhile to revisit
-       * this code.  Will probably want to make this a virtual function.
-       */
-      // Can't narrow 64-bit register.  Clobber.
-      if (GetRegInfo(rl.reg)->IsTemp()) {
-        Clobber(rl.reg);
-        FreeTemp(rl.reg);
-      }
-      rl.location = kLocDalvikFrame;
-    }
-  }
-  rl.wide = false;
-  return rl;
-}
-
 void Mir2Lir::ResetDefLoc(RegLocation rl) {
   DCHECK(!rl.wide);
   if (IsTemp(rl.reg) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
@@ -727,16 +703,8 @@
 }
 
 void Mir2Lir::ResetDefTracking() {
-  GrowableArray<RegisterInfo*>::Iterator core_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = core_it.Next(); info != nullptr; info = core_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator sp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = sp_it.Next(); info != nullptr; info = sp_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator dp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = dp_it.Next(); info != nullptr; info = dp_it.Next()) {
+  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
     info->ResetDefBody();
   }
 }
@@ -811,7 +779,11 @@
 bool Mir2Lir::RegClassMatches(int reg_class, RegStorage reg) {
   if (reg_class == kAnyReg) {
     return true;
-  } else if (reg_class == kCoreReg) {
+  } else if ((reg_class == kCoreReg) || (reg_class == kRefReg)) {
+    /*
+     * For this purpose, consider Core and Ref to be the same class. We aren't dealing
+     * with width here - that should be checked at a higher level (if needed).
+     */
     return !reg.IsFloat();
   } else {
     return reg.IsFloat();
@@ -1347,20 +1319,26 @@
 }
 
 /* Mark register usage state and return long retloc */
-RegLocation Mir2Lir::GetReturnWide(bool is_double) {
-  RegLocation gpr_res = LocCReturnWide();
-  RegLocation fpr_res = LocCReturnDouble();
-  RegLocation res = is_double ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturnWide(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: LOG(FATAL); break;
+    case kFPReg: res = LocCReturnDouble(); break;
+    default: res = LocCReturnWide(); break;
+  }
   Clobber(res.reg);
   LockTemp(res.reg);
   MarkWide(res.reg);
   return res;
 }
 
-RegLocation Mir2Lir::GetReturn(bool is_float) {
-  RegLocation gpr_res = LocCReturn();
-  RegLocation fpr_res = LocCReturnFloat();
-  RegLocation res = is_float ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturn(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: res = LocCReturnRef(); break;
+    case kFPReg: res = LocCReturnFloat(); break;
+    default: res = LocCReturn(); break;
+  }
   Clobber(res.reg);
   if (cu_->instruction_set == kMips) {
     MarkInUse(res.reg);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 4673cc0..f363eb3 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -169,7 +169,7 @@
   int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ?
       Thread::ExceptionOffset<8>().Int32Value() :
       Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   NewLIR2(kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
   NewLIR2(kX86Mov32TI, ex_offset, 0);
   StoreValue(rl_dest, rl_result);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d66790d..648c148 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index aec39ab..0421a59 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -56,7 +56,7 @@
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -118,7 +118,7 @@
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 48bff6e..1cc16b9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -173,7 +173,10 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
 
   // The kMirOpSelect has two variants, one for constants and one for moves.
@@ -182,7 +185,7 @@
   if (is_constant_case) {
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -203,6 +206,8 @@
      *     mov t1, $true_case
      *     cmovz result_reg, t1
      */
+    // FIXME: depending on how you use registers you could get a false != mismatch when dealing
+    // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
     const bool result_reg_same_as_src =
         (rl_src.location == kLocPhysReg && rl_src.reg.GetReg() == rl_result.reg.GetReg());
     const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
@@ -224,7 +229,7 @@
     if (true_zero_case || false_zero_case || catch_all_case) {
       ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
       int immediateForTemp = true_zero_case ? false_val : true_val;
-      RegStorage temp1_reg = AllocTemp();
+      RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
       OpRegImm(kOpMov, temp1_reg, immediateForTemp);
 
       OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
@@ -234,9 +239,9 @@
   } else {
     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -792,8 +797,8 @@
     Clobber(rs_r0);
     LockTemp(rs_r0);
 
-    RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
-    RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
+    RegLocation rl_new_value = LoadValue(rl_src_new_value);
 
     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
       // Mark card for object assuming new value is stored.
@@ -1441,7 +1446,7 @@
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   RegLocation rl_result;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
 
   int data_offset;
   if (size == k64 || size == kDouble) {
@@ -1497,7 +1502,7 @@
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   bool constant_index = rl_index.is_const;
   int32_t constant_index_value = 0;
   if (!constant_index) {
@@ -1880,7 +1885,7 @@
 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
@@ -1894,7 +1899,7 @@
   LoadConstant(result_reg, 0);
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
@@ -1972,8 +1977,8 @@
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>)
-        * type_idx);
+        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+        (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
     LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
@@ -1992,7 +1997,7 @@
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
 
   // SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 8b34168..4d8fd1b 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -26,124 +26,105 @@
 
 namespace art {
 
-static const RegStorage core_regs_arr_32[] = {
+static constexpr RegStorage core_regs_arr_32[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
 };
-static const RegStorage core_regs_arr_64[] = {
-    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
+static constexpr RegStorage core_regs_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
 #ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
 #endif
 };
-static const RegStorage core_regs_arr_64q[] = {
+static constexpr RegStorage core_regs_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
 #ifdef TARGET_REX_SUPPORT
     rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
 #endif
 };
-static const RegStorage sp_regs_arr_32[] = {
+static constexpr RegStorage sp_regs_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
-static const RegStorage sp_regs_arr_64[] = {
+static constexpr RegStorage sp_regs_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_regs_arr_32[] = {
+static constexpr RegStorage dp_regs_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
-static const RegStorage dp_regs_arr_64[] = {
+static constexpr RegStorage dp_regs_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
-static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
-static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
-static const RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
-static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
-static const RegStorage core_temps_arr_64[] = {
+static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
+static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
+static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
+static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
+static constexpr RegStorage core_temps_arr_64[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
 #ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11
 #endif
 };
-static const RegStorage core_temps_arr_64q[] = {
+static constexpr RegStorage core_temps_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
 #ifdef TARGET_REX_SUPPORT
     rs_r8q, rs_r9q, rs_r10q, rs_r11q
 #endif
 };
-static const RegStorage sp_temps_arr_32[] = {
+static constexpr RegStorage sp_temps_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
-static const RegStorage sp_temps_arr_64[] = {
+static constexpr RegStorage sp_temps_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_temps_arr_32[] = {
+static constexpr RegStorage dp_temps_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
-static const RegStorage dp_temps_arr_64[] = {
+static constexpr RegStorage dp_temps_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
 
-static const RegStorage xp_temps_arr_32[] = {
+static constexpr RegStorage xp_temps_arr_32[] = {
     rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
 };
-static const RegStorage xp_temps_arr_64[] = {
+static constexpr RegStorage xp_temps_arr_64[] = {
     rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
 #ifdef TARGET_REX_SUPPORT
     rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
 #endif
 };
 
-static const std::vector<RegStorage> empty_pool;
-static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
-    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
-static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
-    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
-static const std::vector<RegStorage> core_regs_64q(core_regs_arr_64q,
-    core_regs_arr_64q + sizeof(core_regs_arr_64q) / sizeof(core_regs_arr_64q[0]));
-static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
-    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
-static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
-    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
-static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
-    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
-static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
-    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
-static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
-    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
-static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
-    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
-static const std::vector<RegStorage> reserved_regs_64q(reserved_regs_arr_64q,
-    reserved_regs_arr_64q + sizeof(reserved_regs_arr_64q) / sizeof(reserved_regs_arr_64q[0]));
-static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
-    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
-static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
-    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
-static const std::vector<RegStorage> core_temps_64q(core_temps_arr_64q,
-    core_temps_arr_64q + sizeof(core_temps_arr_64q) / sizeof(core_temps_arr_64q[0]));
-static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
-    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
-static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
-    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
-static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
-    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
-static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
-    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
+static constexpr ArrayRef<const RegStorage> empty_pool;
+static constexpr ArrayRef<const RegStorage> core_regs_32(core_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> core_regs_64(core_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> core_regs_64q(core_regs_arr_64q);
+static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q);
+static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> core_temps_64(core_temps_arr_64);
+static constexpr ArrayRef<const RegStorage> core_temps_64q(core_temps_arr_64q);
+static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> sp_temps_64(sp_temps_arr_64);
+static constexpr ArrayRef<const RegStorage> dp_temps_32(dp_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> dp_temps_64(dp_temps_arr_64);
 
-static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32,
-    xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0]));
-static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64,
-    xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0]));
+static constexpr ArrayRef<const RegStorage> xp_temps_32(xp_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> xp_temps_64(xp_temps_arr_64);
 
 RegStorage rs_rX86_SP;
 
@@ -177,6 +158,11 @@
   return x86_loc_c_return;
 }
 
+RegLocation X86Mir2Lir::LocCReturnRef() {
+  // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported.
+  return x86_loc_c_return;
+}
+
 RegLocation X86Mir2Lir::LocCReturnWide() {
   return x86_loc_c_return_wide;
 }
@@ -565,9 +551,9 @@
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
   if (Gen64Bit()) {
-    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, empty_pool/*core_regs_64q*/, sp_regs_64,
-                                          dp_regs_64, reserved_regs_64, empty_pool/*reserved_regs_64q*/,
-                                          core_temps_64, empty_pool/*core_temps_64q*/, sp_temps_64, dp_temps_64);
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64,
+                                          dp_regs_64, reserved_regs_64, reserved_regs_64q,
+                                          core_temps_64, core_temps_64q, sp_temps_64, dp_temps_64);
   } else {
     reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
                                           dp_regs_32, reserved_regs_32, empty_pool,
@@ -577,7 +563,7 @@
   // Target-specific adjustments.
 
   // Add in XMM registers.
-  const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
+  const ArrayRef<const RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
   for (RegStorage reg : *xp_temps) {
     RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
     reginfo_map_.Put(reg.GetReg(), info);
@@ -597,10 +583,28 @@
     // Redirect 32-bit vector's master storage to 128-bit vector.
     info->SetMaster(xp_reg_info);
 
-    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
+    RegStorage dp_reg = RegStorage::FloatSolo64(sp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Redirect 64-bit vector's master storage to 128-bit vector.
     dp_reg_info->SetMaster(xp_reg_info);
+    // Singles should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+  }
+
+  if (Gen64Bit()) {
+    // Alias 32bit W registers to corresponding 64bit X registers.
+    GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
+    for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
+      int x_reg_num = info->GetReg().GetRegNum();
+      RegStorage x_reg = RegStorage::Solo64(x_reg_num);
+      RegisterInfo* x_reg_info = GetRegInfo(x_reg);
+      // 64bit X register's master storage should refer to itself.
+      DCHECK_EQ(x_reg_info, x_reg_info->Master());
+      // Redirect 32bit W master storage to 64bit X.
+      info->SetMaster(x_reg_info);
+      // 32bit W should show a single 32-bit mask bit, at first referring to the low half.
+      DCHECK_EQ(info->StorageMask(), 0x1U);
+    }
   }
 
   // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
@@ -981,7 +985,7 @@
   }
 
   // Okay, we are commited to inlining this.
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
 
   // Is the string non-NULL?
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 092e68e..618b3a5 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -572,7 +572,7 @@
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val);
         } else {
-          res = LoadConstantNoClobber(RegStorage::Solo32(low_reg_val), val_lo);
+          res = LoadConstantNoClobber(RegStorage::FloatSolo32(low_reg_val), val_lo);
         }
         if (val_hi != 0) {
           RegStorage r_dest_hi = AllocTempDouble();
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 964dfeb..ca956aa 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -173,7 +173,10 @@
   env_->ExceptionClear();
   jclass jlame = env_->FindClass("java/lang/AbstractMethodError");
   EXPECT_TRUE(env_->IsInstanceOf(exception, jlame));
-  Thread::Current()->ClearException();
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    Thread::Current()->ClearException();
+  }
 }
 
 // TODO: need check-cast test (when stub complete & we can throw/catch
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index e37f943..ca1239f 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -650,34 +650,55 @@
       copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
           (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
     } else {
-      copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
-          (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_))));
       // Use original code if it exists. Otherwise, set the code pointer to the resolution
       // trampoline.
+
+      // Quick entrypoint:
       const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset());
+      bool quick_is_interpreted = false;
       if (quick_code != nullptr &&
           (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
         // We have code for a non-static or initialized method, just use the code.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
       } else if (quick_code == nullptr && orig->IsNative() &&
           (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
         // Non-static or initialized native method missing compiled code, use generic JNI version.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_generic_jni_trampoline_offset_));
+        quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
       } else if (quick_code == nullptr && !orig->IsNative()) {
         // We don't have code at all for a non-native method, use the interpreter.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_to_interpreter_bridge_offset_));
+        quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+        quick_is_interpreted = true;
       } else {
         CHECK(!orig->GetDeclaringClass()->IsInitialized());
         // We have code for a static method, but need to go through the resolution stub for class
         // initialization.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_resolution_trampoline_offset_));
+        quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
       }
+      copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
+
+      // Portable entrypoint:
       const byte* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset());
-      if (portable_code != nullptr) {
-        copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
+      bool portable_is_interpreted = false;
+      if (portable_code != nullptr &&
+          (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
+        // We have code for a non-static or initialized method, just use the code.
+      } else if (portable_code == nullptr && orig->IsNative() &&
+          (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
+        // Non-static or initialized native method missing compiled code, use generic JNI version.
+        // TODO: generic JNI support for LLVM.
+        portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
+      } else if (portable_code == nullptr && !orig->IsNative()) {
+        // We don't have code at all for a non-native method, use the interpreter.
+        portable_code = GetOatAddress(portable_to_interpreter_bridge_offset_);
+        portable_is_interpreted = true;
       } else {
-        copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_resolution_trampoline_offset_));
+        CHECK(!orig->GetDeclaringClass()->IsInitialized());
+        // We have code for a static method, but need to go through the resolution stub for class
+        // initialization.
+        portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
       }
+      copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
+
+      // JNI entrypoint:
       if (orig->IsNative()) {
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
@@ -688,6 +709,15 @@
         const byte* native_gc_map = GetOatAddress(native_gc_map_offset);
         copy->SetNativeGcMap<kVerifyNone>(reinterpret_cast<const uint8_t*>(native_gc_map));
       }
+
+      // Interpreter entrypoint:
+      // Set the interpreter entrypoint depending on whether there is compiled code or not.
+      uint32_t interpreter_code = (quick_is_interpreted && portable_is_interpreted)
+          ? interpreter_to_interpreter_bridge_offset_
+          : interpreter_to_compiled_code_bridge_offset_;
+      copy->SetEntryPointFromInterpreter<kVerifyNone>(
+          reinterpret_cast<EntryPointFromInterpreter*>(
+              const_cast<byte*>(GetOatAddress(interpreter_code))));
     }
   }
 }
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 7664a7f..3bbb723 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <memory>
 #include <vector>
+#include <fstream>
 
 #include "base/logging.h"
 #include "base/macros.h"
@@ -61,9 +62,6 @@
   const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   InstructionSet instruction_set = driver->GetInstructionSet();
-  if (instruction_set == kThumb2) {
-    instruction_set = kArm;
-  }
   const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
   // Calling conventions used to iterate over parameters to method
   std::unique_ptr<JniCallingConvention> main_jni_conv(
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index 25c9b20..f8dca66 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -1868,6 +1868,10 @@
 
   phi->addIncoming(storage_object_addr, block_check_init);
   phi->addIncoming(loaded_storage_object_addr, block_after_load_static);
+
+  // Ensure load of status and load of value don't re-order.
+  irb_.CreateMemoryBarrier(art::kLoadLoad);
+
   return phi;
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 423b13e..c945a06 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -19,7 +19,7 @@
 
 #include "code_generator.h"
 #include "nodes.h"
-#include "utils/arm/assembler_arm.h"
+#include "utils/arm/assembler_arm32.h"
 
 namespace art {
 namespace arm {
@@ -152,7 +152,7 @@
 
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
-  ArmAssembler assembler_;
+  Arm32Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 53e7bbe..7a33620 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -26,6 +26,18 @@
 
 namespace art {
 
+static void DumpBitVector(BitVector* vector,
+                          std::ostream& buffer,
+                          size_t count,
+                          const char* prefix) {
+  buffer << prefix;
+  buffer << '(';
+  for (size_t i = 0; i < count; ++i) {
+    buffer << vector->IsBitSet(i);
+  }
+  buffer << ")\n";
+}
+
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -43,12 +55,13 @@
   for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     buffer << "Block " << block->GetBlockId() << std::endl;
+    size_t ssa_values = liveness.GetNumberOfSsaValues();
     BitVector* live_in = liveness.GetLiveInSet(*block);
-    live_in->Dump(buffer, "  live in: ");
+    DumpBitVector(live_in, buffer, ssa_values, "  live in: ");
     BitVector* live_out = liveness.GetLiveOutSet(*block);
-    live_out->Dump(buffer, "  live out: ");
+    DumpBitVector(live_out, buffer, ssa_values, "  live out: ");
     BitVector* kill = liveness.GetKillSet(*block);
-    kill->Dump(buffer, "  kill: ");
+    DumpBitVector(kill, buffer, ssa_values, "  kill: ");
   }
   ASSERT_STREQ(expected, buffer.str().c_str());
 }
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 33084df..1284a97 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -95,14 +95,26 @@
     // All predecessors have already been visited because we are visiting in reverse post order.
     // We merge the values of all locals, creating phis if those values differ.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
+      bool one_predecessor_has_no_value = false;
       bool is_different = false;
       HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(0), local);
-      for (size_t i = 1; i < block->GetPredecessors().Size(); i++) {
-        if (ValueOfLocal(block->GetPredecessors().Get(i), local) != value) {
+
+      for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+        HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local);
+        if (current == nullptr) {
+//          one_predecessor_has_no_value = true;
+//          break;
+        } else if (current != value) {
           is_different = true;
-          break;
         }
       }
+
+      if (one_predecessor_has_no_value) {
+        // If one predecessor has no value for this local, we trust the verifier has
+        // successfully checked that there is a store dominating any read after this block.
+        continue;
+      }
+
       if (is_different) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid);
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index d104619..485ea27 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -459,4 +459,34 @@
   TestCode(data, expected);
 }
 
+TEST(SsaTest, LocalInIf) {
+  // Test that we do not create a phi in the join block when one predecessor
+  // does not update the local.
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  0: IntConstant 0 [3, 3]\n"
+    "  1: IntConstant 4\n"
+    "  2: Goto\n"
+    "BasicBlock 1, pred: 0, succ: 2, 5\n"
+    "  3: Equal(0, 0) [4]\n"
+    "  4: If(3)\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  5: Goto\n"
+    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "  6: ReturnVoid\n"
+    "BasicBlock 4, pred: 3\n"
+    "  7: Exit\n"
+    // Synthesized block to avoid critical edge.
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  8: Goto\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
 }  // namespace art
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 24378b4..ac84d6a 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -30,6 +30,10 @@
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
+  // NOTE: the assembler used here is ARM, not Thumb.  This is because the address
+  // returned by this function is a pointer and for thumb we would have to set the
+  // bottom bit.  It doesn't matter since the instructions generated are the same
+  // size anyway.
   std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
 
   switch (abi) {
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 64685c1..b607a1d 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -25,66 +25,16 @@
 namespace art {
 namespace arm {
 
-// Instruction encoding bits.
-enum {
-  H   = 1 << 5,   // halfword (or byte)
-  L   = 1 << 20,  // load (or store)
-  S   = 1 << 20,  // set condition code (or leave unchanged)
-  W   = 1 << 21,  // writeback base register (or leave unchanged)
-  A   = 1 << 21,  // accumulate in multiply instruction (or not)
-  B   = 1 << 22,  // unsigned byte (or word)
-  N   = 1 << 22,  // long (or short)
-  U   = 1 << 23,  // positive (or negative) offset/index
-  P   = 1 << 24,  // offset/pre-indexed addressing (or post-indexed addressing)
-  I   = 1 << 25,  // immediate shifter operand (or not)
-
-  B0 = 1,
-  B1 = 1 << 1,
-  B2 = 1 << 2,
-  B3 = 1 << 3,
-  B4 = 1 << 4,
-  B5 = 1 << 5,
-  B6 = 1 << 6,
-  B7 = 1 << 7,
-  B8 = 1 << 8,
-  B9 = 1 << 9,
-  B10 = 1 << 10,
-  B11 = 1 << 11,
-  B12 = 1 << 12,
-  B16 = 1 << 16,
-  B17 = 1 << 17,
-  B18 = 1 << 18,
-  B19 = 1 << 19,
-  B20 = 1 << 20,
-  B21 = 1 << 21,
-  B22 = 1 << 22,
-  B23 = 1 << 23,
-  B24 = 1 << 24,
-  B25 = 1 << 25,
-  B26 = 1 << 26,
-  B27 = 1 << 27,
-
-  // Instruction bit masks.
-  RdMask = 15 << 12,  // in str instruction
-  CondMask = 15 << 28,
-  CoprocessorMask = 15 << 8,
-  OpCodeMask = 15 << 21,  // in data-processing instructions
-  Imm24Mask = (1 << 24) - 1,
-  Off12Mask = (1 << 12) - 1,
-
-  // ldrex/strex register field encodings.
-  kLdExRnShift = 16,
-  kLdExRtShift = 12,
-  kStrExRnShift = 16,
-  kStrExRdShift = 12,
-  kStrExRtShift = 0,
-};
-
-
-static const char* kRegisterNames[] = {
+const char* kRegisterNames[] = {
   "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
   "fp", "ip", "sp", "lr", "pc"
 };
+
+const char* kConditionNames[] = {
+  "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC", "HI", "LS", "GE", "LT", "GT",
+  "LE", "AL",
+};
+
 std::ostream& operator<<(std::ostream& os, const Register& rhs) {
   if (rhs >= R0 && rhs <= PC) {
     os << kRegisterNames[rhs];
@@ -114,11 +64,6 @@
   return os;
 }
 
-
-static const char* kConditionNames[] = {
-  "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC", "HI", "LS", "GE", "LT", "GT",
-  "LE", "AL",
-};
 std::ostream& operator<<(std::ostream& os, const Condition& rhs) {
   if (rhs >= EQ && rhs <= AL) {
     os << kConditionNames[rhs];
@@ -128,1084 +73,218 @@
   return os;
 }
 
-void ArmAssembler::Emit(int32_t value) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<int32_t>(value);
-}
 
 
-void ArmAssembler::EmitType01(Condition cond,
-                              int type,
-                              Opcode opcode,
-                              int set_cc,
-                              Register rn,
-                              Register rd,
-                              ShifterOperand so) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     type << kTypeShift |
-                     static_cast<int32_t>(opcode) << kOpcodeShift |
-                     set_cc << kSShift |
-                     static_cast<int32_t>(rn) << kRnShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     so.encoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitType5(Condition cond, int offset, bool link) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     5 << kTypeShift |
-                     (link ? 1 : 0) << kLinkShift;
-  Emit(ArmAssembler::EncodeBranchOffset(offset, encoding));
-}
-
-
-void ArmAssembler::EmitMemOp(Condition cond,
-                             bool load,
-                             bool byte,
-                             Register rd,
-                             Address ad) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B26 |
-                     (load ? L : 0) |
-                     (byte ? B : 0) |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     ad.encoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitMemOpAddressMode3(Condition cond,
-                                         int32_t mode,
-                                         Register rd,
-                                         Address ad) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B22  |
-                     mode |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     ad.encoding3();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitMultiMemOp(Condition cond,
-                                  BlockAddressMode am,
-                                  bool load,
-                                  Register base,
-                                  RegList regs) {
-  CHECK_NE(base, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 |
-                     am |
-                     (load ? L : 0) |
-                     (static_cast<int32_t>(base) << kRnShift) |
-                     regs;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitShiftImmediate(Condition cond,
-                                      Shift opcode,
-                                      Register rd,
-                                      Register rm,
-                                      ShifterOperand so) {
-  CHECK_NE(cond, kNoCondition);
-  CHECK_EQ(so.type(), 1U);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     static_cast<int32_t>(MOV) << kOpcodeShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     so.encoding() << kShiftImmShift |
-                     static_cast<int32_t>(opcode) << kShiftShift |
-                     static_cast<int32_t>(rm);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitShiftRegister(Condition cond,
-                                     Shift opcode,
-                                     Register rd,
-                                     Register rm,
-                                     ShifterOperand so) {
-  CHECK_NE(cond, kNoCondition);
-  CHECK_EQ(so.type(), 0U);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     static_cast<int32_t>(MOV) << kOpcodeShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     so.encoding() << kShiftRegisterShift |
-                     static_cast<int32_t>(opcode) << kShiftShift |
-                     B4 |
-                     static_cast<int32_t>(rm);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitBranch(Condition cond, Label* label, bool link) {
-  if (label->IsBound()) {
-    EmitType5(cond, label->Position() - buffer_.Size(), link);
-  } else {
-    int position = buffer_.Size();
-    // Use the offset field of the branch instruction for linking the sites.
-    EmitType5(cond, label->position_, link);
-    label->LinkTo(position);
-  }
-}
-
-void ArmAssembler::and_(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), AND, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::eor(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), EOR, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::sub(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SUB, 0, rn, rd, so);
-}
-
-void ArmAssembler::rsb(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSB, 0, rn, rd, so);
-}
-
-void ArmAssembler::rsbs(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), RSB, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::add(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADD, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::adds(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), ADD, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::subs(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), SUB, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::adc(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::sbc(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SBC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::rsc(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::tst(Register rn, ShifterOperand so, Condition cond) {
-  CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TST, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::teq(Register rn, ShifterOperand so, Condition cond) {
-  CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TEQ, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::cmp(Register rn, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), CMP, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::cmn(Register rn, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), CMN, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::orr(Register rd, Register rn,
-                    ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::orrs(Register rd, Register rn,
-                        ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::mov(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 0, R0, rd, so);
-}
-
-
-void ArmAssembler::movs(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 1, R0, rd, so);
-}
-
-
-void ArmAssembler::bic(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), BIC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::mvn(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 0, R0, rd, so);
-}
-
-
-void ArmAssembler::mvns(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 1, R0, rd, so);
-}
-
-
-void ArmAssembler::clz(Register rd, Register rm, Condition cond) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  CHECK_NE(rd, PC);
-  CHECK_NE(rm, PC);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 | B22 | B21 | (0xf << 16) |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     (0xf << 8) | B4 | static_cast<int32_t>(rm);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::movw(Register rd, uint16_t imm16, Condition cond) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     B25 | B24 | ((imm16 >> 12) << 16) |
-                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::movt(Register rd, uint16_t imm16, Condition cond) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     B25 | B24 | B22 | ((imm16 >> 12) << 16) |
-                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitMulOp(Condition cond, int32_t opcode,
-                             Register rd, Register rn,
-                             Register rm, Register rs) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(rn, kNoRegister);
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(rs, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = opcode |
-      (static_cast<int32_t>(cond) << kConditionShift) |
-      (static_cast<int32_t>(rn) << kRnShift) |
-      (static_cast<int32_t>(rd) << kRdShift) |
-      (static_cast<int32_t>(rs) << kRsShift) |
-      B7 | B4 |
-      (static_cast<int32_t>(rm) << kRmShift);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::mul(Register rd, Register rn, Register rm, Condition cond) {
-  // Assembler registers rd, rn, rm are encoded as rn, rm, rs.
-  EmitMulOp(cond, 0, R0, rd, rn, rm);
-}
-
-
-void ArmAssembler::mla(Register rd, Register rn, Register rm, Register ra,
-                       Condition cond) {
-  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
-  EmitMulOp(cond, B21, ra, rd, rn, rm);
-}
-
-
-void ArmAssembler::mls(Register rd, Register rn, Register rm, Register ra,
-                       Condition cond) {
-  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
-  EmitMulOp(cond, B22 | B21, ra, rd, rn, rm);
-}
-
-
-void ArmAssembler::umull(Register rd_lo, Register rd_hi, Register rn,
-                         Register rm, Condition cond) {
-  // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
-  EmitMulOp(cond, B23, rd_lo, rd_hi, rn, rm);
-}
-
-
-void ArmAssembler::ldr(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, true, false, rd, ad);
-}
-
-
-void ArmAssembler::str(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, false, false, rd, ad);
-}
-
-
-void ArmAssembler::ldrb(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, true, true, rd, ad);
-}
-
-
-void ArmAssembler::strb(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, false, true, rd, ad);
-}
-
-
-void ArmAssembler::ldrh(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, L | B7 | H | B4, rd, ad);
-}
-
-
-void ArmAssembler::strh(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, B7 | H | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldrsb(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, L | B7 | B6 | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldrsh(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, L | B7 | B6 | H | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldrd(Register rd, Address ad, Condition cond) {
-  CHECK_EQ(rd % 2, 0);
-  EmitMemOpAddressMode3(cond, B7 | B6 | B4, rd, ad);
-}
-
-
-void ArmAssembler::strd(Register rd, Address ad, Condition cond) {
-  CHECK_EQ(rd % 2, 0);
-  EmitMemOpAddressMode3(cond, B7 | B6 | B5 | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldm(BlockAddressMode am,
-                       Register base,
-                       RegList regs,
-                       Condition cond) {
-  EmitMultiMemOp(cond, am, true, base, regs);
-}
-
-
-void ArmAssembler::stm(BlockAddressMode am,
-                       Register base,
-                       RegList regs,
-                       Condition cond) {
-  EmitMultiMemOp(cond, am, false, base, regs);
-}
-
-
-void ArmAssembler::ldrex(Register rt, Register rn, Condition cond) {
-  CHECK_NE(rn, kNoRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 |
-                     B23 |
-                     L   |
-                     (static_cast<int32_t>(rn) << kLdExRnShift) |
-                     (static_cast<int32_t>(rt) << kLdExRtShift) |
-                     B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::strex(Register rd,
-                         Register rt,
-                         Register rn,
-                         Condition cond) {
-  CHECK_NE(rn, kNoRegister);
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 |
-                     B23 |
-                     (static_cast<int32_t>(rn) << kStrExRnShift) |
-                     (static_cast<int32_t>(rd) << kStrExRdShift) |
-                     B11 | B10 | B9 | B8 | B7 | B4 |
-                     (static_cast<int32_t>(rt) << kStrExRtShift);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::clrex() {
-  int32_t encoding = (kSpecialCondition << kConditionShift) |
-                     B26 | B24 | B22 | B21 | B20 | (0xff << 12) | B4 | 0xf;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::nop(Condition cond) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B25 | B24 | B21 | (0xf << 12);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovsr(SRegister sn, Register rt, Condition cond) {
-  CHECK_NE(sn, kNoSRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 |
-                     ((static_cast<int32_t>(sn) >> 1)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovrs(Register rt, SRegister sn, Condition cond) {
-  CHECK_NE(sn, kNoSRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B20 |
-                     ((static_cast<int32_t>(sn) >> 1)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovsrr(SRegister sm, Register rt, Register rt2,
-                           Condition cond) {
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(sm, S31);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovrrs(Register rt, Register rt2, SRegister sm,
-                           Condition cond) {
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(sm, S31);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(rt, rt2);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 | B20 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovdrr(DRegister dm, Register rt, Register rt2,
-                           Condition cond) {
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovrrd(Register rt, Register rt2, DRegister dm,
-                           Condition cond) {
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(rt, rt2);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 | B20 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vldrs(SRegister sd, Address ad, Condition cond) {
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 | B20 |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     B11 | B9 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vstrs(SRegister sd, Address ad, Condition cond) {
-  CHECK_NE(static_cast<Register>(ad.encoding_ & (0xf << kRnShift)), PC);
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     B11 | B9 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vldrd(DRegister dd, Address ad, Condition cond) {
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 | B20 |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     B11 | B9 | B8 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vstrd(DRegister dd, Address ad, Condition cond) {
-  CHECK_NE(static_cast<Register>(ad.encoding_ & (0xf << kRnShift)), PC);
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     B11 | B9 | B8 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitVFPsss(Condition cond, int32_t opcode,
-                              SRegister sd, SRegister sn, SRegister sm) {
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(sn, kNoSRegister);
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | opcode |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sn) >> 1)*B16) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     ((static_cast<int32_t>(sn) & 1)*B7) |
-                     ((static_cast<int32_t>(sm) & 1)*B5) |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitVFPddd(Condition cond, int32_t opcode,
-                              DRegister dd, DRegister dn, DRegister dm) {
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(dn, kNoDRegister);
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | B8 | opcode |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dn) & 0xf)*B16) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     ((static_cast<int32_t>(dn) >> 4)*B7) |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovs(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vmovd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B6, dd, D0, dm);
-}
-
-
-bool ArmAssembler::vmovs(SRegister sd, float s_imm, Condition cond) {
-  uint32_t imm32 = bit_cast<uint32_t, float>(s_imm);
-  if (((imm32 & ((1 << 19) - 1)) == 0) &&
-      ((((imm32 >> 25) & ((1 << 6) - 1)) == (1 << 5)) ||
-       (((imm32 >> 25) & ((1 << 6) - 1)) == ((1 << 5) -1)))) {
-    uint8_t imm8 = ((imm32 >> 31) << 7) | (((imm32 >> 29) & 1) << 6) |
-        ((imm32 >> 19) & ((1 << 6) -1));
-    EmitVFPsss(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | (imm8 & 0xf),
-               sd, S0, S0);
-    return true;
-  }
-  return false;
-}
-
-
-bool ArmAssembler::vmovd(DRegister dd, double d_imm, Condition cond) {
-  uint64_t imm64 = bit_cast<uint64_t, double>(d_imm);
-  if (((imm64 & ((1LL << 48) - 1)) == 0) &&
-      ((((imm64 >> 54) & ((1 << 9) - 1)) == (1 << 8)) ||
-       (((imm64 >> 54) & ((1 << 9) - 1)) == ((1 << 8) -1)))) {
-    uint8_t imm8 = ((imm64 >> 63) << 7) | (((imm64 >> 61) & 1) << 6) |
-        ((imm64 >> 48) & ((1 << 6) -1));
-    EmitVFPddd(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | B8 | (imm8 & 0xf),
-               dd, D0, D0);
-    return true;
-  }
-  return false;
-}
-
-
-void ArmAssembler::vadds(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B21 | B20, sd, sn, sm);
-}
-
-
-void ArmAssembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B21 | B20, dd, dn, dm);
-}
-
-
-void ArmAssembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
-}
-
-
-void ArmAssembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
-}
-
-
-void ArmAssembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B21, sd, sn, sm);
-}
-
-
-void ArmAssembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B21, dd, dn, dm);
-}
-
-
-void ArmAssembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, 0, sd, sn, sm);
-}
-
-
-void ArmAssembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, 0, dd, dn, dm);
-}
-
-
-void ArmAssembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B6, sd, sn, sm);
-}
-
-
-void ArmAssembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B6, dd, dn, dm);
-}
-
-
-void ArmAssembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B23, sd, sn, sm);
-}
-
-
-void ArmAssembler::vdivd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B23, dd, dn, dm);
-}
-
-
-void ArmAssembler::vabss(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vabsd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B7 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::vnegs(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vnegd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::vsqrts(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B7 | B6, sd, S0, sm);
-}
-
-void ArmAssembler::vsqrtd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B7 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::EmitVFPsd(Condition cond, int32_t opcode,
-                             SRegister sd, DRegister dm) {
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | opcode |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitVFPds(Condition cond, int32_t opcode,
-                             DRegister dd, SRegister sm) {
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | opcode |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     ((static_cast<int32_t>(sm) & 1)*B5) |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vcvtsd(SRegister sd, DRegister dm, Condition cond) {
-  EmitVFPsd(cond, B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6, sd, dm);
-}
-
-
-void ArmAssembler::vcvtds(DRegister dd, SRegister sm, Condition cond) {
-  EmitVFPds(cond, B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6, dd, sm);
-}
-
-
-void ArmAssembler::vcvtis(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtid(SRegister sd, DRegister dm, Condition cond) {
-  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6, sd, dm);
-}
-
-
-void ArmAssembler::vcvtsi(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtdi(DRegister dd, SRegister sm, Condition cond) {
-  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B7 | B6, dd, sm);
-}
-
-
-void ArmAssembler::vcvtus(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtud(SRegister sd, DRegister dm, Condition cond) {
-  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6, sd, dm);
-}
-
-
-void ArmAssembler::vcvtsu(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtdu(DRegister dd, SRegister sm, Condition cond) {
-  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B6, dd, sm);
-}
-
-
-void ArmAssembler::vcmps(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcmpd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::vcmpsz(SRegister sd, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B16 | B6, sd, S0, S0);
-}
-
-
-void ArmAssembler::vcmpdz(DRegister dd, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
-}
-
-
-void ArmAssembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B23 | B22 | B21 | B20 | B16 |
-                     (static_cast<int32_t>(PC)*B12) |
-                     B11 | B9 | B4;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::svc(uint32_t imm24) {
-  CHECK(IsUint(24, imm24)) << imm24;
-  int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::bkpt(uint16_t imm16) {
-  int32_t encoding = (AL << kConditionShift) | B24 | B21 |
-                     ((imm16 >> 4) << 8) | B6 | B5 | B4 | (imm16 & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::b(Label* label, Condition cond) {
-  EmitBranch(cond, label, false);
-}
-
-
-void ArmAssembler::bl(Label* label, Condition cond) {
-  EmitBranch(cond, label, true);
-}
-
-
-void ArmAssembler::blx(Register rm, Condition cond) {
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 | B21 | (0xfff << 8) | B5 | B4 |
-                     (static_cast<int32_t>(rm) << kRmShift);
-  Emit(encoding);
-}
-
-void ArmAssembler::bx(Register rm, Condition cond) {
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 | B21 | (0xfff << 8) | B4 |
-                     (static_cast<int32_t>(rm) << kRmShift);
-  Emit(encoding);
-}
-
-void ArmAssembler::MarkExceptionHandler(Label* label) {
-  EmitType01(AL, 1, TST, 1, PC, R0, ShifterOperand(0));
-  Label l;
-  b(&l);
-  EmitBranch(AL, label, false);
-  Bind(&l);
-}
-
-
-void ArmAssembler::Bind(Label* label) {
-  CHECK(!label->IsBound());
-  int bound_pc = buffer_.Size();
-  while (label->IsLinked()) {
-    int32_t position = label->Position();
-    int32_t next = buffer_.Load<int32_t>(position);
-    int32_t encoded = ArmAssembler::EncodeBranchOffset(bound_pc - position, next);
-    buffer_.Store<int32_t>(position, encoded);
-    label->position_ = ArmAssembler::DecodeBranchOffset(next);
-  }
-  label->BindTo(bound_pc);
-}
-
-
-void ArmAssembler::EncodeUint32InTstInstructions(uint32_t data) {
-  // TODO: Consider using movw ip, <16 bits>.
-  while (!IsUint(8, data)) {
-    tst(R0, ShifterOperand(data & 0xFF), VS);
-    data >>= 8;
-  }
-  tst(R0, ShifterOperand(data), MI);
-}
-
-
-int32_t ArmAssembler::EncodeBranchOffset(int offset, int32_t inst) {
-  // The offset is off by 8 due to the way the ARM CPUs read PC.
-  offset -= 8;
-  CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
-
-  // Properly preserve only the bits supported in the instruction.
-  offset >>= 2;
-  offset &= kBranchOffsetMask;
-  return (inst & ~kBranchOffsetMask) | offset;
-}
-
-
-int ArmAssembler::DecodeBranchOffset(int32_t inst) {
-  // Sign-extend, left-shift by 2, then add 8.
-  return ((((inst & kBranchOffsetMask) << 8) >> 6) + 8);
-}
-
-void ArmAssembler::AddConstant(Register rd, int32_t value, Condition cond) {
-  AddConstant(rd, rd, value, cond);
-}
-
-
-void ArmAssembler::AddConstant(Register rd, Register rn, int32_t value,
-                               Condition cond) {
-  if (value == 0) {
-    if (rd != rn) {
-      mov(rd, ShifterOperand(rn), cond);
-    }
-    return;
-  }
-  // We prefer to select the shorter code sequence rather than selecting add for
-  // positive values and sub for negatives ones, which would slightly improve
-  // the readability of generated code for some constants.
-  ShifterOperand shifter_op;
-  if (ShifterOperand::CanHold(value, &shifter_op)) {
-    add(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHold(-value, &shifter_op)) {
-    sub(rd, rn, shifter_op, cond);
-  } else {
-    CHECK(rn != IP);
-    if (ShifterOperand::CanHold(~value, &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      add(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHold(~(-value), &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      sub(rd, rn, ShifterOperand(IP), cond);
-    } else {
-      movw(IP, Low16Bits(value), cond);
-      uint16_t value_high = High16Bits(value);
-      if (value_high != 0) {
-        movt(IP, value_high, cond);
+uint32_t ShifterOperand::encodingArm() const {
+  CHECK(is_valid());
+  switch (type_) {
+    case kImmediate:
+      if (is_rotate_) {
+        return (rotate_ << kRotateShift) | (immed_ << kImmed8Shift);
+      } else {
+        return immed_;
       }
-      add(rd, rn, ShifterOperand(IP), cond);
-    }
-  }
-}
-
-
-void ArmAssembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
-                                       Condition cond) {
-  ShifterOperand shifter_op;
-  if (ShifterOperand::CanHold(value, &shifter_op)) {
-    adds(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHold(-value, &shifter_op)) {
-    subs(rd, rn, shifter_op, cond);
-  } else {
-    CHECK(rn != IP);
-    if (ShifterOperand::CanHold(~value, &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      adds(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHold(~(-value), &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      subs(rd, rn, ShifterOperand(IP), cond);
-    } else {
-      movw(IP, Low16Bits(value), cond);
-      uint16_t value_high = High16Bits(value);
-      if (value_high != 0) {
-        movt(IP, value_high, cond);
+      break;
+    case kRegister:
+      if (is_shift_) {
+        // Shifted immediate or register.
+        if (rs_ == kNoRegister) {
+          // Immediate shift.
+          return immed_ << kShiftImmShift |
+                          static_cast<uint32_t>(shift_) << kShiftShift |
+                          static_cast<uint32_t>(rm_);
+        } else {
+          // Register shift.
+          return static_cast<uint32_t>(rs_) << kShiftRegisterShift |
+              static_cast<uint32_t>(shift_) << kShiftShift | (1 << 4) |
+              static_cast<uint32_t>(rm_);
+        }
+      } else {
+        // Simple register
+        return static_cast<uint32_t>(rm_);
       }
-      adds(rd, rn, ShifterOperand(IP), cond);
-    }
+      break;
+    default:
+      // Can't get here.
+      LOG(FATAL) << "Invalid shifter operand for ARM";
+      return 0;
   }
 }
 
-
-void ArmAssembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
-  ShifterOperand shifter_op;
-  if (ShifterOperand::CanHold(value, &shifter_op)) {
-    mov(rd, shifter_op, cond);
-  } else if (ShifterOperand::CanHold(~value, &shifter_op)) {
-    mvn(rd, shifter_op, cond);
+uint32_t ShifterOperand::encodingThumb(int version) const {
+  CHECK(version == 1 || version == 2);
+  if (version == 1) {
+    LOG(FATAL) << "Invalid of use encodingThumb with version 1";
   } else {
-    movw(rd, Low16Bits(value), cond);
-    uint16_t value_high = High16Bits(value);
-    if (value_high != 0) {
-      movt(rd, value_high, cond);
+    switch (type_) {
+      case kImmediate:
+        return immed_;
+      case kRegister:
+        if (is_shift_) {
+          // Shifted immediate or register.
+          if (rs_ == kNoRegister) {
+            // Immediate shift.
+            if (shift_ == RRX) {
+              // RRX is encoded as an ROR with imm 0.
+              return ROR << 4 | static_cast<uint32_t>(rm_);
+            } else {
+              uint32_t imm3 = immed_ >> 2;
+              uint32_t imm2 = immed_ & 0b11;
+
+              return imm3 << 12 | imm2 << 6 | shift_ << 4 |
+                  static_cast<uint32_t>(rm_);
+            }
+          } else {
+            LOG(FATAL) << "No register-shifted register instruction available in thumb";
+            return 0;
+          }
+        } else {
+          // Simple register
+          return static_cast<uint32_t>(rm_);
+        }
+        break;
+      default:
+        // Can't get here.
+        LOG(FATAL) << "Invalid shifter operand for thumb";
+        return 0;
     }
   }
+  return 0;
+}
+
+bool ShifterOperand::CanHoldThumb(Register rd, Register rn, Opcode opcode,
+                                  uint32_t immediate, ShifterOperand* shifter_op) {
+  shifter_op->type_ = kImmediate;
+  shifter_op->immed_ = immediate;
+  shifter_op->is_shift_ = false;
+  shifter_op->is_rotate_ = false;
+  switch (opcode) {
+    case ADD:
+    case SUB:
+      if (rn == SP) {
+        if (rd == SP) {
+          return immediate < (1 << 9);    // 9 bits allowed.
+        } else {
+          return immediate < (1 << 12);   // 12 bits.
+        }
+      }
+      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+        return true;
+      }
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+
+    case MOV:
+      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+        return true;
+      }
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+    case MVN:
+    default:
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+  }
 }
 
+uint32_t Address::encodingArm() const {
+  CHECK(IsAbsoluteUint(12, offset_));
+  uint32_t encoding;
+  if (offset_ < 0) {
+    encoding = (am_ ^ (1 << kUShift)) | -offset_;  // Flip U to adjust sign.
+  } else {
+    encoding =  am_ | offset_;
+  }
+  encoding |= static_cast<uint32_t>(rn_) << kRnShift;
+  return encoding;
+}
 
-bool Address::CanHoldLoadOffset(LoadOperandType type, int offset) {
+
+uint32_t Address::encodingThumb(int version) const {
+  CHECK(version == 1 || version == 2);
+  uint32_t encoding = 0;
+  if (version == 2) {
+      encoding = static_cast<uint32_t>(rn_) << 16;
+      // Check for the T3/T4 encoding.
+      // PUW must Offset for T3
+      // Convert ARM PU0W to PUW
+      // The Mode is in ARM encoding format which is:
+      // |P|U|0|W|
+      // we need this in thumb2 mode:
+      // |P|U|W|
+
+      uint32_t am = am_;
+      int32_t offset = offset_;
+      if (offset < 0) {
+        am ^= 1 << kUShift;
+        offset = -offset;
+      }
+      if (offset_ < 0 || (offset >= 0 && offset < 256 &&
+        am_ != Mode::Offset)) {
+          // T4 encoding.
+        uint32_t PUW = am >> 21;   // Move down to bottom of word.
+        PUW = (PUW >> 1) | (PUW & 1);   // Bits 3, 2 and 0.
+        // If P is 0 then W must be 1 (Different from ARM).
+        if ((PUW & 0b100) == 0) {
+          PUW |= 0b1;
+        }
+        encoding |= B11 | PUW << 8 | offset;
+      } else {
+        // T3 encoding (also sets op1 to 0b01).
+        encoding |= B23 | offset_;
+      }
+  } else {
+    LOG(FATAL) << "Invalid use of encodingThumb for version 1";
+  }
+  return encoding;
+}
+
+// This is very like the ARM encoding except the offset is 10 bits.
+uint32_t Address::encodingThumbLdrdStrd() const {
+  uint32_t encoding;
+  uint32_t am = am_;
+  // If P is 0 then W must be 1 (Different from ARM).
+  uint32_t PU1W = am_ >> 21;   // Move down to bottom of word.
+  if ((PU1W & 0b1000) == 0) {
+    am |= 1 << 21;      // Set W bit.
+  }
+  if (offset_ < 0) {
+    int32_t off = -offset_;
+    CHECK_LT(off, 1024);
+    CHECK_EQ((off & 0b11), 0);    // Must be multiple of 4.
+    encoding = (am ^ (1 << kUShift)) | off >> 2;  // Flip U to adjust sign.
+  } else {
+    CHECK_LT(offset_, 1024);
+    CHECK_EQ((offset_ & 0b11), 0);    // Must be multiple of 4.
+    encoding =  am | offset_ >> 2;
+  }
+  encoding |= static_cast<uint32_t>(rn_) << 16;
+  return encoding;
+}
+
+// Encoding for ARM addressing mode 3.
+uint32_t Address::encoding3() const {
+  const uint32_t offset_mask = (1 << 12) - 1;
+  uint32_t encoding = encodingArm();
+  uint32_t offset = encoding & offset_mask;
+  CHECK_LT(offset, 256u);
+  return (encoding & ~offset_mask) | ((offset & 0xf0) << 4) | (offset & 0xf);
+}
+
+// Encoding for vfp load/store addressing.
+uint32_t Address::vencoding() const {
+  const uint32_t offset_mask = (1 << 12) - 1;
+  uint32_t encoding = encodingArm();
+  uint32_t offset = encoding & offset_mask;
+  CHECK(IsAbsoluteUint(10, offset));  // In the range -1020 to +1020.
+  CHECK_ALIGNED(offset, 2);  // Multiple of 4.
+  CHECK((am_ == Offset) || (am_ == NegOffset));
+  uint32_t vencoding = (encoding & (0xf << kRnShift)) | (offset >> 2);
+  if (am_ == Offset) {
+    vencoding |= 1 << 23;
+  }
+  return vencoding;
+}
+
+
+bool Address::CanHoldLoadOffsetArm(LoadOperandType type, int offset) {
   switch (type) {
     case kLoadSignedByte:
     case kLoadSignedHalfword:
@@ -1225,7 +304,7 @@
 }
 
 
-bool Address::CanHoldStoreOffset(StoreOperandType type, int offset) {
+bool Address::CanHoldStoreOffsetArm(StoreOperandType type, int offset) {
   switch (type) {
     case kStoreHalfword:
     case kStoreWordPair:
@@ -1242,198 +321,48 @@
   }
 }
 
-
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldLoadOffset.
-void ArmAssembler::LoadFromOffset(LoadOperandType type,
-                                  Register reg,
-                                  Register base,
-                                  int32_t offset,
-                                  Condition cond) {
-  if (!Address::CanHoldLoadOffset(type, offset)) {
-    CHECK(base != IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldLoadOffset(type, offset));
+bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
   switch (type) {
     case kLoadSignedByte:
-      ldrsb(reg, Address(base, offset), cond);
-      break;
-    case kLoadUnsignedByte:
-      ldrb(reg, Address(base, offset), cond);
-      break;
     case kLoadSignedHalfword:
-      ldrsh(reg, Address(base, offset), cond);
-      break;
     case kLoadUnsignedHalfword:
-      ldrh(reg, Address(base, offset), cond);
-      break;
+    case kLoadUnsignedByte:
     case kLoadWord:
-      ldr(reg, Address(base, offset), cond);
-      break;
+      return IsAbsoluteUint(12, offset);
+    case kLoadSWord:
+    case kLoadDWord:
+      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     case kLoadWordPair:
-      ldrd(reg, Address(base, offset), cond);
-      break;
-    default:
+      return IsAbsoluteUint(10, offset);
+  default:
       LOG(FATAL) << "UNREACHABLE";
+      return false;
   }
 }
 
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldLoadOffset, as expected by JIT::GuardedLoadFromOffset.
-void ArmAssembler::LoadSFromOffset(SRegister reg,
-                                   Register base,
-                                   int32_t offset,
-                                   Condition cond) {
-  if (!Address::CanHoldLoadOffset(kLoadSWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldLoadOffset(kLoadSWord, offset));
-  vldrs(reg, Address(base, offset), cond);
-}
 
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldLoadOffset, as expected by JIT::GuardedLoadFromOffset.
-void ArmAssembler::LoadDFromOffset(DRegister reg,
-                                   Register base,
-                                   int32_t offset,
-                                   Condition cond) {
-  if (!Address::CanHoldLoadOffset(kLoadDWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldLoadOffset(kLoadDWord, offset));
-  vldrd(reg, Address(base, offset), cond);
-}
-
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldStoreOffset.
-void ArmAssembler::StoreToOffset(StoreOperandType type,
-                                 Register reg,
-                                 Register base,
-                                 int32_t offset,
-                                 Condition cond) {
-  if (!Address::CanHoldStoreOffset(type, offset)) {
-    CHECK(reg != IP);
-    CHECK(base != IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldStoreOffset(type, offset));
+bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
   switch (type) {
-    case kStoreByte:
-      strb(reg, Address(base, offset), cond);
-      break;
     case kStoreHalfword:
-      strh(reg, Address(base, offset), cond);
-      break;
+    case kStoreByte:
     case kStoreWord:
-      str(reg, Address(base, offset), cond);
-      break;
+      return IsAbsoluteUint(12, offset);
+    case kStoreSWord:
+    case kStoreDWord:
+      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     case kStoreWordPair:
-      strd(reg, Address(base, offset), cond);
-      break;
-    default:
+      return IsAbsoluteUint(10, offset);
+  default:
       LOG(FATAL) << "UNREACHABLE";
+      return false;
   }
 }
 
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldStoreOffset, as expected by JIT::GuardedStoreToOffset.
-void ArmAssembler::StoreSToOffset(SRegister reg,
-                                  Register base,
-                                  int32_t offset,
-                                  Condition cond) {
-  if (!Address::CanHoldStoreOffset(kStoreSWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
+void ArmAssembler::Pad(uint32_t bytes) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  for (uint32_t i = 0; i < bytes; ++i) {
+    buffer_.Emit<byte>(0);
   }
-  CHECK(Address::CanHoldStoreOffset(kStoreSWord, offset));
-  vstrs(reg, Address(base, offset), cond);
-}
-
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldStoreOffset, as expected by JIT::GuardedStoreSToOffset.
-void ArmAssembler::StoreDToOffset(DRegister reg,
-                                  Register base,
-                                  int32_t offset,
-                                  Condition cond) {
-  if (!Address::CanHoldStoreOffset(kStoreDWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldStoreOffset(kStoreDWord, offset));
-  vstrd(reg, Address(base, offset), cond);
-}
-
-void ArmAssembler::Push(Register rd, Condition cond) {
-  str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
-}
-
-void ArmAssembler::Pop(Register rd, Condition cond) {
-  ldr(rd, Address(SP, kRegisterSize, Address::PostIndex), cond);
-}
-
-void ArmAssembler::PushList(RegList regs, Condition cond) {
-  stm(DB_W, SP, regs, cond);
-}
-
-void ArmAssembler::PopList(RegList regs, Condition cond) {
-  ldm(IA_W, SP, regs, cond);
-}
-
-void ArmAssembler::Mov(Register rd, Register rm, Condition cond) {
-  if (rd != rm) {
-    mov(rd, ShifterOperand(rm), cond);
-  }
-}
-
-void ArmAssembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
-  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
-}
-
-void ArmAssembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
-  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
-}
-
-void ArmAssembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
-  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
-}
-
-void ArmAssembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
-  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
-}
-
-void ArmAssembler::Rrx(Register rd, Register rm, Condition cond) {
-  mov(rd, ShifterOperand(rm, ROR, 0), cond);
 }
 
 constexpr size_t kFramePointerSize = 4;
@@ -1472,7 +401,7 @@
 void ArmAssembler::RemoveFrame(size_t frame_size,
                               const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  // Compute callee saves to pop and PC
+  // Compute callee saves to pop and PC.
   RegList pop_list = 1 << PC;
   size_t pop_values = 1;
   for (size_t i = 0; i < callee_save_regs.size(); i++) {
@@ -1481,12 +410,12 @@
     pop_values++;
   }
 
-  // Decrease frame to start of callee saves
+  // Decrease frame to start of callee saves.
   CHECK_GT(frame_size, pop_values * kFramePointerSize);
   size_t adjust = frame_size - (pop_values * kFramePointerSize);
   DecreaseFrameSize(adjust);
 
-  // Pop callee saves and PC
+  // Pop callee saves and PC.
   PopList(pop_list);
 }
 
@@ -1681,7 +610,7 @@
     } else {
       CHECK(dst.IsRegisterPair()) << dst;
       CHECK(src.IsRegisterPair()) << src;
-      // Ensure that the first move doesn't clobber the input of the second
+      // Ensure that the first move doesn't clobber the input of the second.
       if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
         mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
         mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
@@ -1743,15 +672,6 @@
   UNIMPLEMENTED(FATAL);
 }
 
-
-void ArmAssembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-#if ANDROID_SMP != 0
-  int32_t encoding = 0xf57ff05f;  // dmb
-  Emit(encoding);
-#endif
-}
-
 void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
                                    FrameOffset handle_scope_offset,
                                    ManagedRegister min_reg, bool null_allowed) {
@@ -1770,7 +690,10 @@
     }
     cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
     if (!out_reg.Equals(in_reg)) {
+      it(EQ, kItElse);
       LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
+    } else {
+      it(NE);
     }
     AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
   } else {
@@ -1791,6 +714,7 @@
     // the address in the handle scope holding the reference.
     // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
     cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+    it(NE);
     AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
   } else {
     AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
@@ -1806,19 +730,20 @@
   CHECK(in_reg.IsCoreRegister()) << in_reg;
   Label null_arg;
   if (!out_reg.Equals(in_reg)) {
-    LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
+    LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
   }
   cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+  it(NE);
   LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
                  in_reg.AsCoreRegister(), 0, NE);
 }
 
 void ArmAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+  // TODO: not validating references.
 }
 
 void ArmAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+  // TODO: not validating references.
 }
 
 void ArmAssembler::Call(ManagedRegister mbase, Offset offset,
@@ -1830,7 +755,7 @@
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  base.AsCoreRegister(), offset.Int32Value());
   blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call
+  // TODO: place reference map on call.
 }
 
 void ArmAssembler::Call(FrameOffset base, Offset offset,
@@ -1876,16 +801,71 @@
   if (stack_adjust_ != 0) {  // Fix up the frame.
     __ DecreaseFrameSize(stack_adjust_);
   }
-  // Pass exception object as argument
-  // Don't care about preserving R0 as this call won't return
+  // Pass exception object as argument.
+  // Don't care about preserving R0 as this call won't return.
   __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
-  // Set up call to Thread::Current()->pDeliverException
+  // Set up call to Thread::Current()->pDeliverException.
   __ LoadFromOffset(kLoadWord, R12, TR, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
   __ blx(R12);
-  // Call never returns
+  // Call never returns.
   __ bkpt(0);
 #undef __
 }
 
+
+static int LeadingZeros(uint32_t val) {
+  uint32_t alt;
+  int32_t n;
+  int32_t count;
+
+  count = 16;
+  n = 32;
+  do {
+    alt = val >> count;
+    if (alt != 0) {
+      n = n - count;
+      val = alt;
+    }
+    count >>= 1;
+  } while (count);
+  return n - val;
+}
+
+
+uint32_t ArmAssembler::ModifiedImmediate(uint32_t value) {
+  int32_t z_leading;
+  int32_t z_trailing;
+  uint32_t b0 = value & 0xff;
+
+  /* Note: case of value==0 must use 0:000:0:0000000 encoding */
+  if (value <= 0xFF)
+    return b0;  // 0:000:a:bcdefgh.
+  if (value == ((b0 << 16) | b0))
+    return (0x1 << 12) | b0; /* 0:001:a:bcdefgh */
+  if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
+    return (0x3 << 12) | b0; /* 0:011:a:bcdefgh */
+  b0 = (value >> 8) & 0xff;
+  if (value == ((b0 << 24) | (b0 << 8)))
+    return (0x2 << 12) | b0; /* 0:010:a:bcdefgh */
+  /* Can we do it with rotation? */
+  z_leading = LeadingZeros(value);
+  z_trailing = 32 - LeadingZeros(~value & (value - 1));
+  /* A run of eight or fewer active bits? */
+  if ((z_leading + z_trailing) < 24)
+    return kInvalidModifiedImmediate;  /* No - bail */
+  /* left-justify the constant, discarding msb (known to be 1) */
+  value <<= z_leading + 1;
+  /* Create bcdefgh */
+  value >>= 25;
+
+  /* Put it all together */
+  uint32_t v = 8 + z_leading;
+
+  uint32_t i = (v & 0b10000) >> 4;
+  uint32_t imm3 = (v >> 1) & 0b111;
+  uint32_t a = v & 1;
+  return value | i << 26 | imm3 << 12 | a << 7;
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 396e603..7b662e1 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -29,88 +29,118 @@
 namespace art {
 namespace arm {
 
-// Encodes Addressing Mode 1 - Data-processing operands defined in Section 5.1.
 class ShifterOperand {
  public:
-  // Data-processing operands - Uninitialized
-  ShifterOperand() {
-    type_ = -1;
-    encoding_ = 0;
+  ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
+      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(0) {
   }
 
-  // Data-processing operands - Immediate
-  explicit ShifterOperand(uint32_t immediate) {
-    CHECK(immediate < (1 << kImmed8Bits));
-    type_ = 1;
-    encoding_ = immediate;
-  }
-
-  // Data-processing operands - Rotated immediate
-  ShifterOperand(uint32_t rotate, uint32_t immed8) {
-    CHECK((rotate < (1 << kRotateBits)) && (immed8 < (1 << kImmed8Bits)));
-    type_ = 1;
-    encoding_ = (rotate << kRotateShift) | (immed8 << kImmed8Shift);
+  explicit ShifterOperand(uint32_t immed) : type_(kImmediate), rm_(kNoRegister), rs_(kNoRegister),
+      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(immed) {
   }
 
   // Data-processing operands - Register
-  explicit ShifterOperand(Register rm) {
-    type_ = 0;
-    encoding_ = static_cast<uint32_t>(rm);
+  explicit ShifterOperand(Register rm) : type_(kRegister), rm_(rm), rs_(kNoRegister),
+      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(0) {
   }
 
-  // Data-processing operands - Logical shift/rotate by immediate
-  ShifterOperand(Register rm, Shift shift, uint32_t shift_imm) {
-    CHECK(shift_imm < (1 << kShiftImmBits));
-    type_ = 0;
-    encoding_ = shift_imm << kShiftImmShift |
-                static_cast<uint32_t>(shift) << kShiftShift |
-                static_cast<uint32_t>(rm);
+  ShifterOperand(uint32_t rotate, uint32_t immed8) : type_(kImmediate), rm_(kNoRegister),
+      rs_(kNoRegister),
+      is_rotate_(true), is_shift_(false), shift_(kNoShift), rotate_(rotate), immed_(immed8) {
+  }
+
+  ShifterOperand(Register rm, Shift shift, uint32_t shift_imm = 0) : type_(kRegister), rm_(rm),
+      rs_(kNoRegister),
+      is_rotate_(false), is_shift_(true), shift_(shift), rotate_(0), immed_(shift_imm) {
   }
 
   // Data-processing operands - Logical shift/rotate by register
-  ShifterOperand(Register rm, Shift shift, Register rs) {
-    type_ = 0;
-    encoding_ = static_cast<uint32_t>(rs) << kShiftRegisterShift |
-                static_cast<uint32_t>(shift) << kShiftShift | (1 << 4) |
-                static_cast<uint32_t>(rm);
+  ShifterOperand(Register rm, Shift shift, Register rs)  : type_(kRegister), rm_(rm),
+      rs_(rs),
+      is_rotate_(false), is_shift_(true), shift_(shift), rotate_(0), immed_(0) {
   }
 
-  static bool CanHold(uint32_t immediate, ShifterOperand* shifter_op) {
-    // Avoid the more expensive test for frequent small immediate values.
-    if (immediate < (1 << kImmed8Bits)) {
-      shifter_op->type_ = 1;
-      shifter_op->encoding_ = (0 << kRotateShift) | (immediate << kImmed8Shift);
-      return true;
-    }
-    // Note that immediate must be unsigned for the test to work correctly.
-    for (int rot = 0; rot < 16; rot++) {
-      uint32_t imm8 = (immediate << 2*rot) | (immediate >> (32 - 2*rot));
-      if (imm8 < (1 << kImmed8Bits)) {
-        shifter_op->type_ = 1;
-        shifter_op->encoding_ = (rot << kRotateShift) | (imm8 << kImmed8Shift);
-        return true;
-      }
-    }
-    return false;
-  }
-
- private:
-  bool is_valid() const { return (type_ == 0) || (type_ == 1); }
+  bool is_valid() const { return (type_ == kImmediate) || (type_ == kRegister); }
 
   uint32_t type() const {
     CHECK(is_valid());
     return type_;
   }
 
-  uint32_t encoding() const {
-    CHECK(is_valid());
-    return encoding_;
+  uint32_t encodingArm() const;
+  uint32_t encodingThumb(int version) const;
+
+  bool IsEmpty() const {
+    return type_ == kUnknown;
   }
 
-  uint32_t type_;  // Encodes the type field (bits 27-25) in the instruction.
-  uint32_t encoding_;
+  bool IsImmediate() const {
+    return type_ == kImmediate;
+  }
 
-  friend class ArmAssembler;
+  bool IsRegister() const {
+    return type_ == kRegister;
+  }
+
+  bool IsShift() const {
+    return is_shift_;
+  }
+
+  uint32_t GetImmediate() const {
+    return immed_;
+  }
+
+  Shift GetShift() const {
+    return shift_;
+  }
+
+  Register GetRegister() const {
+    return rm_;
+  }
+
+  enum Type {
+    kUnknown = -1,
+    kRegister,
+    kImmediate
+  };
+
+  static bool CanHoldArm(uint32_t immediate, ShifterOperand* shifter_op) {
+    // Avoid the more expensive test for frequent small immediate values.
+    if (immediate < (1 << kImmed8Bits)) {
+      shifter_op->type_ = kImmediate;
+      shifter_op->is_rotate_ = true;
+      shifter_op->rotate_ = 0;
+      shifter_op->immed_ = immediate;
+      return true;
+    }
+    // Note that immediate must be unsigned for the test to work correctly.
+    for (int rot = 0; rot < 16; rot++) {
+      uint32_t imm8 = (immediate << 2*rot) | (immediate >> (32 - 2*rot));
+      if (imm8 < (1 << kImmed8Bits)) {
+        shifter_op->type_ = kImmediate;
+        shifter_op->is_rotate_ = true;
+        shifter_op->rotate_ = rot;
+        shifter_op->immed_ = imm8;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  static bool CanHoldThumb(Register rd, Register rn, Opcode opcode,
+                           uint32_t immediate, ShifterOperand* shifter_op);
+
+
+ private:
+  Type type_;
+  Register rm_;
+  Register rs_;
+  bool is_rotate_;
+  bool is_shift_;
+  Shift shift_;
+  uint32_t rotate_;
+  uint32_t immed_;
+
 #ifdef SOURCE_ASSEMBLER_SUPPORT
   friend class BinaryAssembler;
 #endif
@@ -152,10 +182,10 @@
   IB_W         = (8|4|1) << 21   // increment before with writeback to base
 };
 
-
 class Address {
  public:
-  // Memory operand addressing mode
+  // Memory operand addressing mode (in ARM encoding form.  For others we need
+  // to adjust)
   enum Mode {
     // bit encoding P U W
     Offset       = (8|4|0) << 21,  // offset (w/o writeback to base)
@@ -166,273 +196,366 @@
     NegPostIndex = (0|0|0) << 21   // negative post-indexed with writeback
   };
 
-  explicit Address(Register rn, int32_t offset = 0, Mode am = Offset) {
-    CHECK(IsAbsoluteUint(12, offset));
-    if (offset < 0) {
-      encoding_ = (am ^ (1 << kUShift)) | -offset;  // Flip U to adjust sign.
-    } else {
-      encoding_ = am | offset;
-    }
-    encoding_ |= static_cast<uint32_t>(rn) << kRnShift;
+  explicit Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), offset_(offset),
+      am_(am) {
   }
 
-  static bool CanHoldLoadOffset(LoadOperandType type, int offset);
-  static bool CanHoldStoreOffset(StoreOperandType type, int offset);
+  static bool CanHoldLoadOffsetArm(LoadOperandType type, int offset);
+  static bool CanHoldStoreOffsetArm(StoreOperandType type, int offset);
+
+  static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset);
+  static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset);
+
+  uint32_t encodingArm() const;
+  uint32_t encodingThumb(int version) const;
+
+  uint32_t encoding3() const;
+  uint32_t vencoding() const;
+
+  uint32_t encodingThumbLdrdStrd() const;
+
+  Register GetRegister() const {
+    return rn_;
+  }
+
+  int32_t GetOffset() const {
+    return offset_;
+  }
+
+  Mode GetMode() const {
+    return am_;
+  }
 
  private:
-  uint32_t encoding() const { return encoding_; }
-
-  // Encoding for addressing mode 3.
-  uint32_t encoding3() const {
-    const uint32_t offset_mask = (1 << 12) - 1;
-    uint32_t offset = encoding_ & offset_mask;
-    CHECK_LT(offset, 256u);
-    return (encoding_ & ~offset_mask) | ((offset & 0xf0) << 4) | (offset & 0xf);
-  }
-
-  // Encoding for vfp load/store addressing.
-  uint32_t vencoding() const {
-    const uint32_t offset_mask = (1 << 12) - 1;
-    uint32_t offset = encoding_ & offset_mask;
-    CHECK(IsAbsoluteUint(10, offset));  // In the range -1020 to +1020.
-    CHECK_ALIGNED(offset, 2);  // Multiple of 4.
-    int mode = encoding_ & ((8|4|1) << 21);
-    CHECK((mode == Offset) || (mode == NegOffset));
-    uint32_t vencoding = (encoding_ & (0xf << kRnShift)) | (offset >> 2);
-    if (mode == Offset) {
-      vencoding |= 1 << 23;
-    }
-    return vencoding;
-  }
-
-  uint32_t encoding_;
-
-  friend class ArmAssembler;
+  Register rn_;
+  int32_t offset_;
+  Mode am_;
 };
 
+// Instruction encoding bits.
+enum {
+  H   = 1 << 5,   // halfword (or byte)
+  L   = 1 << 20,  // load (or store)
+  S   = 1 << 20,  // set condition code (or leave unchanged)
+  W   = 1 << 21,  // writeback base register (or leave unchanged)
+  A   = 1 << 21,  // accumulate in multiply instruction (or not)
+  B   = 1 << 22,  // unsigned byte (or word)
+  N   = 1 << 22,  // long (or short)
+  U   = 1 << 23,  // positive (or negative) offset/index
+  P   = 1 << 24,  // offset/pre-indexed addressing (or post-indexed addressing)
+  I   = 1 << 25,  // immediate shifter operand (or not)
 
-class ArmAssembler FINAL : public Assembler {
+  B0 = 1,
+  B1 = 1 << 1,
+  B2 = 1 << 2,
+  B3 = 1 << 3,
+  B4 = 1 << 4,
+  B5 = 1 << 5,
+  B6 = 1 << 6,
+  B7 = 1 << 7,
+  B8 = 1 << 8,
+  B9 = 1 << 9,
+  B10 = 1 << 10,
+  B11 = 1 << 11,
+  B12 = 1 << 12,
+  B13 = 1 << 13,
+  B14 = 1 << 14,
+  B15 = 1 << 15,
+  B16 = 1 << 16,
+  B17 = 1 << 17,
+  B18 = 1 << 18,
+  B19 = 1 << 19,
+  B20 = 1 << 20,
+  B21 = 1 << 21,
+  B22 = 1 << 22,
+  B23 = 1 << 23,
+  B24 = 1 << 24,
+  B25 = 1 << 25,
+  B26 = 1 << 26,
+  B27 = 1 << 27,
+  B28 = 1 << 28,
+  B29 = 1 << 29,
+  B30 = 1 << 30,
+  B31 = 1 << 31,
+
+  // Instruction bit masks.
+  RdMask = 15 << 12,  // in str instruction
+  CondMask = 15 << 28,
+  CoprocessorMask = 15 << 8,
+  OpCodeMask = 15 << 21,  // in data-processing instructions
+  Imm24Mask = (1 << 24) - 1,
+  Off12Mask = (1 << 12) - 1,
+
+  // ldrex/strex register field encodings.
+  kLdExRnShift = 16,
+  kLdExRtShift = 12,
+  kStrExRnShift = 16,
+  kStrExRdShift = 12,
+  kStrExRtShift = 0,
+};
+
+// IfThen state for IT instructions.
+enum ItState {
+  kItOmitted,
+  kItThen,
+  kItT = kItThen,
+  kItElse,
+  kItE = kItElse
+};
+
+constexpr uint32_t kNoItCondition = 3;
+constexpr uint32_t kInvalidModifiedImmediate = -1;
+
+extern const char* kRegisterNames[];
+extern const char* kConditionNames[];
+extern std::ostream& operator<<(std::ostream& os, const Register& rhs);
+extern std::ostream& operator<<(std::ostream& os, const SRegister& rhs);
+extern std::ostream& operator<<(std::ostream& os, const DRegister& rhs);
+extern std::ostream& operator<<(std::ostream& os, const Condition& rhs);
+
+// This is an abstract ARM assembler.  Subclasses provide assemblers for the individual
+// instruction sets (ARM32, Thumb2, etc.)
+//
+class ArmAssembler : public Assembler {
  public:
-  ArmAssembler() {}
   virtual ~ArmAssembler() {}
 
+  // Is this assembler for the thumb instruction set?
+  virtual bool IsThumb() const = 0;
+
   // Data-processing instructions.
-  void and_(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void eor(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void sub(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
-  void subs(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void rsb(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
-  void rsbs(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void add(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void adds(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void adc(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void sbc(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void rsc(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void tst(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void tst(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void teq(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void teq(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void cmp(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void cmn(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void orr(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
-  void orrs(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void mov(Register rd, ShifterOperand so, Condition cond = AL);
-  void movs(Register rd, ShifterOperand so, Condition cond = AL);
+  virtual void mov(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void movs(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void bic(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void mvn(Register rd, ShifterOperand so, Condition cond = AL);
-  void mvns(Register rd, ShifterOperand so, Condition cond = AL);
+  virtual void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
 
   // Miscellaneous data-processing instructions.
-  void clz(Register rd, Register rm, Condition cond = AL);
-  void movw(Register rd, uint16_t imm16, Condition cond = AL);
-  void movt(Register rd, uint16_t imm16, Condition cond = AL);
+  virtual void clz(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void movw(Register rd, uint16_t imm16, Condition cond = AL) = 0;
+  virtual void movt(Register rd, uint16_t imm16, Condition cond = AL) = 0;
 
   // Multiply instructions.
-  void mul(Register rd, Register rn, Register rm, Condition cond = AL);
-  void mla(Register rd, Register rn, Register rm, Register ra,
-           Condition cond = AL);
-  void mls(Register rd, Register rn, Register rm, Register ra,
-           Condition cond = AL);
-  void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
-             Condition cond = AL);
+  virtual void mul(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
+  virtual void mla(Register rd, Register rn, Register rm, Register ra,
+                   Condition cond = AL) = 0;
+  virtual void mls(Register rd, Register rn, Register rm, Register ra,
+                   Condition cond = AL) = 0;
+  virtual void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+                     Condition cond = AL) = 0;
+
+  virtual void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
+  virtual void udiv(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
 
   // Load/store instructions.
-  void ldr(Register rd, Address ad, Condition cond = AL);
-  void str(Register rd, Address ad, Condition cond = AL);
+  virtual void ldr(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void str(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrb(Register rd, Address ad, Condition cond = AL);
-  void strb(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrb(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void strb(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrh(Register rd, Address ad, Condition cond = AL);
-  void strh(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrh(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void strh(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrsb(Register rd, Address ad, Condition cond = AL);
-  void ldrsh(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrsb(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void ldrsh(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrd(Register rd, Address ad, Condition cond = AL);
-  void strd(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrd(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void strd(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldm(BlockAddressMode am, Register base,
-           RegList regs, Condition cond = AL);
-  void stm(BlockAddressMode am, Register base,
-           RegList regs, Condition cond = AL);
+  virtual void ldm(BlockAddressMode am, Register base,
+                   RegList regs, Condition cond = AL) = 0;
+  virtual void stm(BlockAddressMode am, Register base,
+                   RegList regs, Condition cond = AL) = 0;
 
-  void ldrex(Register rd, Register rn, Condition cond = AL);
-  void strex(Register rd, Register rt, Register rn, Condition cond = AL);
+  virtual void ldrex(Register rd, Register rn, Condition cond = AL) = 0;
+  virtual void strex(Register rd, Register rt, Register rn, Condition cond = AL) = 0;
 
   // Miscellaneous instructions.
-  void clrex();
-  void nop(Condition cond = AL);
+  virtual void clrex(Condition cond = AL) = 0;
+  virtual void nop(Condition cond = AL) = 0;
 
   // Note that gdb sets breakpoints using the undefined instruction 0xe7f001f0.
-  void bkpt(uint16_t imm16);
-  void svc(uint32_t imm24);
+  virtual void bkpt(uint16_t imm16) = 0;
+  virtual void svc(uint32_t imm24) = 0;
+
+  virtual void it(Condition firstcond, ItState i1 = kItOmitted,
+                  ItState i2 = kItOmitted, ItState i3 = kItOmitted) {
+    // Ignored if not supported.
+  }
+
+  virtual void cbz(Register rn, Label* target) = 0;
+  virtual void cbnz(Register rn, Label* target) = 0;
 
   // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
-  void vmovsr(SRegister sn, Register rt, Condition cond = AL);
-  void vmovrs(Register rt, SRegister sn, Condition cond = AL);
-  void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL);
-  void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL);
-  void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL);
-  void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL);
-  void vmovs(SRegister sd, SRegister sm, Condition cond = AL);
-  void vmovd(DRegister dd, DRegister dm, Condition cond = AL);
+  virtual void vmovsr(SRegister sn, Register rt, Condition cond = AL) = 0;
+  virtual void vmovrs(Register rt, SRegister sn, Condition cond = AL) = 0;
+  virtual void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) = 0;
+  virtual void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) = 0;
+  virtual void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmovs(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmovd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
 
   // Returns false if the immediate cannot be encoded.
-  bool vmovs(SRegister sd, float s_imm, Condition cond = AL);
-  bool vmovd(DRegister dd, double d_imm, Condition cond = AL);
+  virtual bool vmovs(SRegister sd, float s_imm, Condition cond = AL) = 0;
+  virtual bool vmovd(DRegister dd, double d_imm, Condition cond = AL) = 0;
 
-  void vldrs(SRegister sd, Address ad, Condition cond = AL);
-  void vstrs(SRegister sd, Address ad, Condition cond = AL);
-  void vldrd(DRegister dd, Address ad, Condition cond = AL);
-  void vstrd(DRegister dd, Address ad, Condition cond = AL);
+  virtual void vldrs(SRegister sd, const Address& ad, Condition cond = AL) = 0;
+  virtual void vstrs(SRegister sd, const Address& ad, Condition cond = AL) = 0;
+  virtual void vldrd(DRegister dd, const Address& ad, Condition cond = AL) = 0;
+  virtual void vstrd(DRegister dd, const Address& ad, Condition cond = AL) = 0;
 
-  void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
+  virtual void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
 
-  void vabss(SRegister sd, SRegister sm, Condition cond = AL);
-  void vabsd(DRegister dd, DRegister dm, Condition cond = AL);
-  void vnegs(SRegister sd, SRegister sm, Condition cond = AL);
-  void vnegd(DRegister dd, DRegister dm, Condition cond = AL);
-  void vsqrts(SRegister sd, SRegister sm, Condition cond = AL);
-  void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL);
+  virtual void vabss(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vabsd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vnegs(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vnegd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
 
-  void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL);
-  void vcvtds(DRegister dd, SRegister sm, Condition cond = AL);
-  void vcvtis(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtid(SRegister sd, DRegister dm, Condition cond = AL);
-  void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL);
-  void vcvtus(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtud(SRegister sd, DRegister dm, Condition cond = AL);
-  void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL);
+  virtual void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) = 0;
 
-  void vcmps(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcmpd(DRegister dd, DRegister dm, Condition cond = AL);
-  void vcmpsz(SRegister sd, Condition cond = AL);
-  void vcmpdz(DRegister dd, Condition cond = AL);
-  void vmstat(Condition cond = AL);  // VMRS APSR_nzcv, FPSCR
+  virtual void vcmps(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcmpsz(SRegister sd, Condition cond = AL) = 0;
+  virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0;
+  virtual void vmstat(Condition cond = AL) = 0;  // VMRS APSR_nzcv, FPSCR
+
+  virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vpopd(DRegister reg, int nregs, Condition cond = AL) = 0;
 
   // Branch instructions.
-  void b(Label* label, Condition cond = AL);
-  void bl(Label* label, Condition cond = AL);
-  void blx(Register rm, Condition cond = AL);
-  void bx(Register rm, Condition cond = AL);
+  virtual void b(Label* label, Condition cond = AL) = 0;
+  virtual void bl(Label* label, Condition cond = AL) = 0;
+  virtual void blx(Register rm, Condition cond = AL) = 0;
+  virtual void bx(Register rm, Condition cond = AL) = 0;
+
+  void Pad(uint32_t bytes);
 
   // Macros.
+  // Most of these are pure virtual as they need to be implemented per instruction set.
+
   // Add signed constant value to rd. May clobber IP.
-  void AddConstant(Register rd, int32_t value, Condition cond = AL);
-  void AddConstant(Register rd, Register rn, int32_t value,
-                   Condition cond = AL);
-  void AddConstantSetFlags(Register rd, Register rn, int32_t value,
-                           Condition cond = AL);
-  void AddConstantWithCarry(Register rd, Register rn, int32_t value,
-                            Condition cond = AL);
+  virtual void AddConstant(Register rd, int32_t value, Condition cond = AL) = 0;
+  virtual void AddConstant(Register rd, Register rn, int32_t value,
+                           Condition cond = AL) = 0;
+  virtual void AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                                   Condition cond = AL) = 0;
+  virtual void AddConstantWithCarry(Register rd, Register rn, int32_t value,
+                                    Condition cond = AL) = 0;
 
   // Load and Store. May clobber IP.
-  void LoadImmediate(Register rd, int32_t value, Condition cond = AL);
-  void LoadSImmediate(SRegister sd, float value, Condition cond = AL);
-  void LoadDImmediate(DRegister dd, double value,
-                      Register scratch, Condition cond = AL);
-  void MarkExceptionHandler(Label* label);
-  void LoadFromOffset(LoadOperandType type,
-                      Register reg,
-                      Register base,
-                      int32_t offset,
-                      Condition cond = AL);
-  void StoreToOffset(StoreOperandType type,
-                     Register reg,
-                     Register base,
-                     int32_t offset,
-                     Condition cond = AL);
-  void LoadSFromOffset(SRegister reg,
-                       Register base,
-                       int32_t offset,
-                       Condition cond = AL);
-  void StoreSToOffset(SRegister reg,
-                      Register base,
-                      int32_t offset,
-                      Condition cond = AL);
-  void LoadDFromOffset(DRegister reg,
-                       Register base,
-                       int32_t offset,
-                       Condition cond = AL);
-  void StoreDToOffset(DRegister reg,
-                      Register base,
-                      int32_t offset,
-                      Condition cond = AL);
+  virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
+  virtual void LoadSImmediate(SRegister sd, float value, Condition cond = AL) = 0;
+  virtual void LoadDImmediate(DRegister dd, double value,
+                              Register scratch, Condition cond = AL) = 0;
+  virtual void MarkExceptionHandler(Label* label) = 0;
+  virtual void LoadFromOffset(LoadOperandType type,
+                              Register reg,
+                              Register base,
+                              int32_t offset,
+                              Condition cond = AL) = 0;
+  virtual void StoreToOffset(StoreOperandType type,
+                             Register reg,
+                             Register base,
+                             int32_t offset,
+                             Condition cond = AL) = 0;
+  virtual void LoadSFromOffset(SRegister reg,
+                               Register base,
+                               int32_t offset,
+                               Condition cond = AL) = 0;
+  virtual void StoreSToOffset(SRegister reg,
+                              Register base,
+                              int32_t offset,
+                              Condition cond = AL) = 0;
+  virtual void LoadDFromOffset(DRegister reg,
+                               Register base,
+                               int32_t offset,
+                               Condition cond = AL) = 0;
+  virtual void StoreDToOffset(DRegister reg,
+                              Register base,
+                              int32_t offset,
+                              Condition cond = AL) = 0;
 
-  void Push(Register rd, Condition cond = AL);
-  void Pop(Register rd, Condition cond = AL);
+  virtual void Push(Register rd, Condition cond = AL) = 0;
+  virtual void Pop(Register rd, Condition cond = AL) = 0;
 
-  void PushList(RegList regs, Condition cond = AL);
-  void PopList(RegList regs, Condition cond = AL);
+  virtual void PushList(RegList regs, Condition cond = AL) = 0;
+  virtual void PopList(RegList regs, Condition cond = AL) = 0;
 
-  void Mov(Register rd, Register rm, Condition cond = AL);
+  virtual void Mov(Register rd, Register rm, Condition cond = AL) = 0;
 
   // Convenience shift instructions. Use mov instruction with shifter operand
   // for variants setting the status flags or using a register shift count.
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Rrx(Register rd, Register rm, Condition cond = AL);
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Rrx(Register rd, Register rm, Condition cond = AL) = 0;
 
-  // Encode a signed constant in tst instructions, only affecting the flags.
-  void EncodeUint32InTstInstructions(uint32_t data);
-  // ... and decode from a pc pointing to the start of encoding instructions.
-  static uint32_t DecodeUint32FromTstInstructions(uword pc);
   static bool IsInstructionForExceptionHandling(uword pc);
 
-  // Emit data (e.g. encoded instruction or immediate) to the
-  // instruction stream.
-  void Emit(int32_t value);
-  void Bind(Label* label);
+  virtual void Bind(Label* label) = 0;
+
+  virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
+  virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0;
 
   //
   // Overridden common assembler high-level functionality
@@ -445,7 +568,7 @@
 
   // Emit code that will remove an activation from the stack
   void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
+    OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
@@ -509,8 +632,6 @@
   void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
             ManagedRegister scratch, size_t size) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
   // Sign extension
   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
@@ -550,81 +671,9 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
- private:
-  void EmitType01(Condition cond,
-                  int type,
-                  Opcode opcode,
-                  int set_cc,
-                  Register rn,
-                  Register rd,
-                  ShifterOperand so);
+  static uint32_t ModifiedImmediate(uint32_t value);
 
-  void EmitType5(Condition cond, int offset, bool link);
-
-  void EmitMemOp(Condition cond,
-                 bool load,
-                 bool byte,
-                 Register rd,
-                 Address ad);
-
-  void EmitMemOpAddressMode3(Condition cond,
-                             int32_t mode,
-                             Register rd,
-                             Address ad);
-
-  void EmitMultiMemOp(Condition cond,
-                      BlockAddressMode am,
-                      bool load,
-                      Register base,
-                      RegList regs);
-
-  void EmitShiftImmediate(Condition cond,
-                          Shift opcode,
-                          Register rd,
-                          Register rm,
-                          ShifterOperand so);
-
-  void EmitShiftRegister(Condition cond,
-                         Shift opcode,
-                         Register rd,
-                         Register rm,
-                         ShifterOperand so);
-
-  void EmitMulOp(Condition cond,
-                 int32_t opcode,
-                 Register rd,
-                 Register rn,
-                 Register rm,
-                 Register rs);
-
-  void EmitVFPsss(Condition cond,
-                  int32_t opcode,
-                  SRegister sd,
-                  SRegister sn,
-                  SRegister sm);
-
-  void EmitVFPddd(Condition cond,
-                  int32_t opcode,
-                  DRegister dd,
-                  DRegister dn,
-                  DRegister dm);
-
-  void EmitVFPsd(Condition cond,
-                 int32_t opcode,
-                 SRegister sd,
-                 DRegister dm);
-
-  void EmitVFPds(Condition cond,
-                 int32_t opcode,
-                 DRegister dd,
-                 SRegister sm);
-
-  void EmitBranch(Condition cond, Label* label, bool link);
-  static int32_t EncodeBranchOffset(int offset, int32_t inst);
-  static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
-
+ protected:
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
     return *reg1 - *reg2;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
new file mode 100644
index 0000000..b2bb20f
--- /dev/null
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -0,0 +1,1423 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_arm32.h"
+
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "offsets.h"
+#include "thread.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+void Arm32Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), AND, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), EOR, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), SUB, 0, rn, rd, so);
+}
+
+void Arm32Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), RSB, 0, rn, rd, so);
+}
+
+void Arm32Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), RSB, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::add(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), ADD, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), ADD, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), SUB, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), ADC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), SBC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), RSC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
+  EmitType01(cond, so.type(), TST, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
+  EmitType01(cond, so.type(), TEQ, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), CMP, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), CMN, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::orr(Register rd, Register rn,
+                    const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), ORR, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::orrs(Register rd, Register rn,
+                        const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), ORR, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MOV, 0, R0, rd, so);
+}
+
+
+void Arm32Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MOV, 1, R0, rd, so);
+}
+
+
+void Arm32Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), BIC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MVN, 0, R0, rd, so);
+}
+
+
+void Arm32Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MVN, 1, R0, rd, so);
+}
+
+
+void Arm32Assembler::mul(Register rd, Register rn, Register rm, Condition cond) {
+  // Assembler registers rd, rn, rm are encoded as rn, rm, rs.
+  EmitMulOp(cond, 0, R0, rd, rn, rm);
+}
+
+
+void Arm32Assembler::mla(Register rd, Register rn, Register rm, Register ra,
+                         Condition cond) {
+  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
+  EmitMulOp(cond, B21, ra, rd, rn, rm);
+}
+
+
+void Arm32Assembler::mls(Register rd, Register rn, Register rm, Register ra,
+                         Condition cond) {
+  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
+  EmitMulOp(cond, B22 | B21, ra, rd, rn, rm);
+}
+
+
+void Arm32Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
+                           Register rm, Condition cond) {
+  // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
+  EmitMulOp(cond, B23, rd_lo, rd_hi, rn, rm);
+}
+
+
+void Arm32Assembler::sdiv(Register rd, Register rn, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = B26 | B25 | B24 | B20 |
+      B15 | B14 | B13 | B12 |
+      (static_cast<int32_t>(cond) << kConditionShift) |
+      (static_cast<int32_t>(rn) << 0) |
+      (static_cast<int32_t>(rd) << 16) |
+      (static_cast<int32_t>(rm) << 8) |
+      B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::udiv(Register rd, Register rn, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = B26 | B25 | B24 | B21 | B20 |
+      B15 | B14 | B13 | B12 |
+      (static_cast<int32_t>(cond) << kConditionShift) |
+      (static_cast<int32_t>(rn) << 0) |
+      (static_cast<int32_t>(rd) << 16) |
+      (static_cast<int32_t>(rm) << 8) |
+      B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::ldr(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, true, false, rd, ad);
+}
+
+
+void Arm32Assembler::str(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, false, false, rd, ad);
+}
+
+
+void Arm32Assembler::ldrb(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, true, true, rd, ad);
+}
+
+
+void Arm32Assembler::strb(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, false, true, rd, ad);
+}
+
+
+void Arm32Assembler::ldrh(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, L | B7 | H | B4, rd, ad);
+}
+
+
+void Arm32Assembler::strh(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, B7 | H | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldrsb(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, L | B7 | B6 | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldrsh(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, L | B7 | B6 | H | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  EmitMemOpAddressMode3(cond, B7 | B6 | B4, rd, ad);
+}
+
+
+void Arm32Assembler::strd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  EmitMemOpAddressMode3(cond, B7 | B6 | B5 | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldm(BlockAddressMode am,
+                       Register base,
+                       RegList regs,
+                       Condition cond) {
+  EmitMultiMemOp(cond, am, true, base, regs);
+}
+
+
+void Arm32Assembler::stm(BlockAddressMode am,
+                       Register base,
+                       RegList regs,
+                       Condition cond) {
+  EmitMultiMemOp(cond, am, false, base, regs);
+}
+
+
+void Arm32Assembler::vmovs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vmovd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B6, dd, D0, dm);
+}
+
+
+bool Arm32Assembler::vmovs(SRegister sd, float s_imm, Condition cond) {
+  uint32_t imm32 = bit_cast<uint32_t, float>(s_imm);
+  if (((imm32 & ((1 << 19) - 1)) == 0) &&
+      ((((imm32 >> 25) & ((1 << 6) - 1)) == (1 << 5)) ||
+       (((imm32 >> 25) & ((1 << 6) - 1)) == ((1 << 5) -1)))) {
+    uint8_t imm8 = ((imm32 >> 31) << 7) | (((imm32 >> 29) & 1) << 6) |
+        ((imm32 >> 19) & ((1 << 6) -1));
+    EmitVFPsss(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | (imm8 & 0xf),
+               sd, S0, S0);
+    return true;
+  }
+  return false;
+}
+
+
+bool Arm32Assembler::vmovd(DRegister dd, double d_imm, Condition cond) {
+  uint64_t imm64 = bit_cast<uint64_t, double>(d_imm);
+  if (((imm64 & ((1LL << 48) - 1)) == 0) &&
+      ((((imm64 >> 54) & ((1 << 9) - 1)) == (1 << 8)) ||
+       (((imm64 >> 54) & ((1 << 9) - 1)) == ((1 << 8) -1)))) {
+    uint8_t imm8 = ((imm64 >> 63) << 7) | (((imm64 >> 61) & 1) << 6) |
+        ((imm64 >> 48) & ((1 << 6) -1));
+    EmitVFPddd(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | B8 | (imm8 & 0xf),
+               dd, D0, D0);
+    return true;
+  }
+  return false;
+}
+
+
+void Arm32Assembler::vadds(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B21 | B20, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B21 | B20, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B21, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B21, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, 0, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, 0, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B6, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B6, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B23, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vdivd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B23, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vabss(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vabsd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B7 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vnegs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vnegd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vsqrts(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B7 | B6, sd, S0, sm);
+}
+
+void Arm32Assembler::vsqrtd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B7 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vcvtsd(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Arm32Assembler::vcvtds(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6, dd, sm);
+}
+
+
+void Arm32Assembler::vcvtis(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtid(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Arm32Assembler::vcvtsi(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtdi(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B7 | B6, dd, sm);
+}
+
+
+void Arm32Assembler::vcvtus(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtud(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Arm32Assembler::vcvtsu(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtdu(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B6, dd, sm);
+}
+
+
+void Arm32Assembler::vcmps(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcmpd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vcmpsz(SRegister sd, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B16 | B6, sd, S0, S0);
+}
+
+
+void Arm32Assembler::vcmpdz(DRegister dd, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
+}
+
+void Arm32Assembler::b(Label* label, Condition cond) {
+  EmitBranch(cond, label, false);
+}
+
+
+void Arm32Assembler::bl(Label* label, Condition cond) {
+  EmitBranch(cond, label, true);
+}
+
+
+void Arm32Assembler::MarkExceptionHandler(Label* label) {
+  EmitType01(AL, 1, TST, 1, PC, R0, ShifterOperand(0));
+  Label l;
+  b(&l);
+  EmitBranch(AL, label, false);
+  Bind(&l);
+}
+
+
+void Arm32Assembler::Emit(int32_t value) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Emit<int32_t>(value);
+}
+
+
+void Arm32Assembler::EmitType01(Condition cond,
+                                int type,
+                                Opcode opcode,
+                                int set_cc,
+                                Register rn,
+                                Register rd,
+                                const ShifterOperand& so) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     type << kTypeShift |
+                     static_cast<int32_t>(opcode) << kOpcodeShift |
+                     set_cc << kSShift |
+                     static_cast<int32_t>(rn) << kRnShift |
+                     static_cast<int32_t>(rd) << kRdShift |
+                     so.encodingArm();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitType5(Condition cond, int offset, bool link) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     5 << kTypeShift |
+                     (link ? 1 : 0) << kLinkShift;
+  Emit(Arm32Assembler::EncodeBranchOffset(offset, encoding));
+}
+
+
+void Arm32Assembler::EmitMemOp(Condition cond,
+                             bool load,
+                             bool byte,
+                             Register rd,
+                             const Address& ad) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  const Address& addr = static_cast<const Address&>(ad);
+
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B26 |
+                     (load ? L : 0) |
+                     (byte ? B : 0) |
+                     (static_cast<int32_t>(rd) << kRdShift) |
+                     addr.encodingArm();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitMemOpAddressMode3(Condition cond,
+                                           int32_t mode,
+                                           Register rd,
+                                           const Address& ad) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  const Address& addr = static_cast<const Address&>(ad);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B22  |
+                     mode |
+                     (static_cast<int32_t>(rd) << kRdShift) |
+                     addr.encoding3();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitMultiMemOp(Condition cond,
+                                    BlockAddressMode am,
+                                    bool load,
+                                    Register base,
+                                    RegList regs) {
+  CHECK_NE(base, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 |
+                     am |
+                     (load ? L : 0) |
+                     (static_cast<int32_t>(base) << kRnShift) |
+                     regs;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitShiftImmediate(Condition cond,
+                                        Shift opcode,
+                                        Register rd,
+                                        Register rm,
+                                        const ShifterOperand& so) {
+  CHECK_NE(cond, kNoCondition);
+  CHECK(so.IsImmediate());
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     static_cast<int32_t>(MOV) << kOpcodeShift |
+                     static_cast<int32_t>(rd) << kRdShift |
+                     so.encodingArm() << kShiftImmShift |
+                     static_cast<int32_t>(opcode) << kShiftShift |
+                     static_cast<int32_t>(rm);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitShiftRegister(Condition cond,
+                                       Shift opcode,
+                                       Register rd,
+                                       Register rm,
+                                       const ShifterOperand& so) {
+  CHECK_NE(cond, kNoCondition);
+  CHECK(so.IsRegister());
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     static_cast<int32_t>(MOV) << kOpcodeShift |
+                     static_cast<int32_t>(rd) << kRdShift |
+                     so.encodingArm() << kShiftRegisterShift |
+                     static_cast<int32_t>(opcode) << kShiftShift |
+                     B4 |
+                     static_cast<int32_t>(rm);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitBranch(Condition cond, Label* label, bool link) {
+  if (label->IsBound()) {
+    EmitType5(cond, label->Position() - buffer_.Size(), link);
+  } else {
+    int position = buffer_.Size();
+    // Use the offset field of the branch instruction for linking the sites.
+    EmitType5(cond, label->position_, link);
+    label->LinkTo(position);
+  }
+}
+
+
+void Arm32Assembler::clz(Register rd, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 | B22 | B21 | (0xf << 16) |
+                     (static_cast<int32_t>(rd) << kRdShift) |
+                     (0xf << 8) | B4 | static_cast<int32_t>(rm);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     B25 | B24 | ((imm16 >> 12) << 16) |
+                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::movt(Register rd, uint16_t imm16, Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     B25 | B24 | B22 | ((imm16 >> 12) << 16) |
+                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode,
+                               Register rd, Register rn,
+                               Register rm, Register rs) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(rs, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = opcode |
+      (static_cast<int32_t>(cond) << kConditionShift) |
+      (static_cast<int32_t>(rn) << kRnShift) |
+      (static_cast<int32_t>(rd) << kRdShift) |
+      (static_cast<int32_t>(rs) << kRsShift) |
+      B7 | B4 |
+      (static_cast<int32_t>(rm) << kRmShift);
+  Emit(encoding);
+}
+
+void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 |
+                     B23 |
+                     L   |
+                     (static_cast<int32_t>(rn) << kLdExRnShift) |
+                     (static_cast<int32_t>(rt) << kLdExRtShift) |
+                     B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::strex(Register rd,
+                           Register rt,
+                           Register rn,
+                           Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 |
+                     B23 |
+                     (static_cast<int32_t>(rn) << kStrExRnShift) |
+                     (static_cast<int32_t>(rd) << kStrExRdShift) |
+                     B11 | B10 | B9 | B8 | B7 | B4 |
+                     (static_cast<int32_t>(rt) << kStrExRtShift);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::clrex(Condition cond) {
+  CHECK_EQ(cond, AL);   // This cannot be conditional on ARM.
+  int32_t encoding = (kSpecialCondition << kConditionShift) |
+                     B26 | B24 | B22 | B21 | B20 | (0xff << 12) | B4 | 0xf;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::nop(Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B25 | B24 | B21 | (0xf << 12);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovsr(SRegister sn, Register rt, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovrs(Register rt, SRegister sn, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B20 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovsrr(SRegister sm, Register rt, Register rt2,
+                             Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovrrs(Register rt, Register rt2, SRegister sm,
+                             Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovdrr(DRegister dm, Register rt, Register rt2,
+                             Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovrrd(Register rt, Register rt2, DRegister dm,
+                             Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vldrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vstrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vldrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vstrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vpushs(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, false, cond);
+}
+
+
+void Arm32Assembler::vpushd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, true, cond);
+}
+
+
+void Arm32Assembler::vpops(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, false, cond);
+}
+
+
+void Arm32Assembler::vpopd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, true, cond);
+}
+
+
+void Arm32Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  CHECK_GT(nregs, 0);
+  uint32_t D;
+  uint32_t Vd;
+  if (dbl) {
+    // Encoded as D:Vd.
+    D = (reg >> 4) & 1;
+    Vd = reg & 0b1111;
+  } else {
+    // Encoded as Vd:D.
+    D = reg & 1;
+    Vd = (reg >> 1) & 0b1111;
+  }
+  int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
+                    B11 | B9 |
+        (dbl ? B8 : 0) |
+        (push ? B24 : (B23 | B20)) |
+        static_cast<int32_t>(cond) << kConditionShift |
+        nregs << (dbl ? 1 : 0) |
+        D << 22 |
+        Vd << 12;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPsss(Condition cond, int32_t opcode,
+                                SRegister sd, SRegister sn, SRegister sm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(sn) & 1)*B7) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPddd(Condition cond, int32_t opcode,
+                                DRegister dd, DRegister dn, DRegister dm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(dn, kNoDRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | B8 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dn) & 0xf)*B16) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(dn) >> 4)*B7) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPsd(Condition cond, int32_t opcode,
+                               SRegister sd, DRegister dm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPds(Condition cond, int32_t opcode,
+                             DRegister dd, SRegister sm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+}
+
+
+void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+}
+
+
+void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+}
+
+
+void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+}
+
+void Arm32Assembler::Rrx(Register rd, Register rm, Condition cond) {
+  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+}
+
+
+void Arm32Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+      B27 | B26 | B25 | B23 | B22 | B21 | B20 | B16 |
+      (static_cast<int32_t>(PC)*B12) |
+      B11 | B9 | B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::svc(uint32_t imm24) {
+  CHECK(IsUint(24, imm24)) << imm24;
+  int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::bkpt(uint16_t imm16) {
+  int32_t encoding = (AL << kConditionShift) | B24 | B21 |
+                     ((imm16 >> 4) << 8) | B6 | B5 | B4 | (imm16 & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::blx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 | B21 | (0xfff << 8) | B5 | B4 |
+                     (static_cast<int32_t>(rm) << kRmShift);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::bx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 | B21 | (0xfff << 8) | B4 |
+                     (static_cast<int32_t>(rm) << kRmShift);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::Push(Register rd, Condition cond) {
+  str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
+}
+
+
+void Arm32Assembler::Pop(Register rd, Condition cond) {
+  ldr(rd, Address(SP, kRegisterSize, Address::PostIndex), cond);
+}
+
+
+void Arm32Assembler::PushList(RegList regs, Condition cond) {
+  stm(DB_W, SP, regs, cond);
+}
+
+
+void Arm32Assembler::PopList(RegList regs, Condition cond) {
+  ldm(IA_W, SP, regs, cond);
+}
+
+
+void Arm32Assembler::Mov(Register rd, Register rm, Condition cond) {
+  if (rd != rm) {
+    mov(rd, ShifterOperand(rm), cond);
+  }
+}
+
+
+void Arm32Assembler::Bind(Label* label) {
+  CHECK(!label->IsBound());
+  int bound_pc = buffer_.Size();
+  while (label->IsLinked()) {
+    int32_t position = label->Position();
+    int32_t next = buffer_.Load<int32_t>(position);
+    int32_t encoded = Arm32Assembler::EncodeBranchOffset(bound_pc - position, next);
+    buffer_.Store<int32_t>(position, encoded);
+    label->position_ = Arm32Assembler::DecodeBranchOffset(next);
+  }
+  label->BindTo(bound_pc);
+}
+
+
+int32_t Arm32Assembler::EncodeBranchOffset(int offset, int32_t inst) {
+  // The offset is off by 8 due to the way the ARM CPUs read PC.
+  offset -= 8;
+  CHECK_ALIGNED(offset, 4);
+  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
+
+  // Properly preserve only the bits supported in the instruction.
+  offset >>= 2;
+  offset &= kBranchOffsetMask;
+  return (inst & ~kBranchOffsetMask) | offset;
+}
+
+
+int Arm32Assembler::DecodeBranchOffset(int32_t inst) {
+  // Sign-extend, left-shift by 2, then add 8.
+  return ((((inst & kBranchOffsetMask) << 8) >> 6) + 8);
+}
+
+
+void Arm32Assembler::AddConstant(Register rd, int32_t value, Condition cond) {
+  AddConstant(rd, rd, value, cond);
+}
+
+
+void Arm32Assembler::AddConstant(Register rd, Register rn, int32_t value,
+                                 Condition cond) {
+  if (value == 0) {
+    if (rd != rn) {
+      mov(rd, ShifterOperand(rn), cond);
+    }
+    return;
+  }
+  // We prefer to select the shorter code sequence rather than selecting add for
+  // positive values and sub for negatives ones, which would slightly improve
+  // the readability of generated code for some constants.
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+    add(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldArm(-value, &shifter_op)) {
+    sub(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      add(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldArm(~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      sub(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      add(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Arm32Assembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                                         Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+    adds(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldArm(-value, &shifter_op)) {
+    subs(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      adds(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldArm(~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      subs(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      adds(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+    mov(rd, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+    mvn(rd, shifter_op, cond);
+  } else {
+    movw(rd, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(rd, value_high, cond);
+    }
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetArm.
+void Arm32Assembler::LoadFromOffset(LoadOperandType type,
+                                    Register reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldLoadOffsetArm(type, offset)) {
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetArm(type, offset));
+  switch (type) {
+    case kLoadSignedByte:
+      ldrsb(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedByte:
+      ldrb(reg, Address(base, offset), cond);
+      break;
+    case kLoadSignedHalfword:
+      ldrsh(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedHalfword:
+      ldrh(reg, Address(base, offset), cond);
+      break;
+    case kLoadWord:
+      ldr(reg, Address(base, offset), cond);
+      break;
+    case kLoadWordPair:
+      ldrd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetArm, as expected by JIT::GuardedLoadFromOffset.
+void Arm32Assembler::LoadSFromOffset(SRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldLoadOffsetArm(kLoadSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetArm(kLoadSWord, offset));
+  vldrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetArm, as expected by JIT::GuardedLoadFromOffset.
+void Arm32Assembler::LoadDFromOffset(DRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldLoadOffsetArm(kLoadDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetArm(kLoadDWord, offset));
+  vldrd(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetArm.
+void Arm32Assembler::StoreToOffset(StoreOperandType type,
+                                   Register reg,
+                                   Register base,
+                                   int32_t offset,
+                                   Condition cond) {
+  if (!Address::CanHoldStoreOffsetArm(type, offset)) {
+    CHECK(reg != IP);
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetArm(type, offset));
+  switch (type) {
+    case kStoreByte:
+      strb(reg, Address(base, offset), cond);
+      break;
+    case kStoreHalfword:
+      strh(reg, Address(base, offset), cond);
+      break;
+    case kStoreWord:
+      str(reg, Address(base, offset), cond);
+      break;
+    case kStoreWordPair:
+      strd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetArm, as expected by JIT::GuardedStoreToOffset.
+void Arm32Assembler::StoreSToOffset(SRegister reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldStoreOffsetArm(kStoreSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetArm(kStoreSWord, offset));
+  vstrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetArm, as expected by JIT::GuardedStoreSToOffset.
+void Arm32Assembler::StoreDToOffset(DRegister reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldStoreOffsetArm(kStoreDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetArm(kStoreDWord, offset));
+  vstrd(reg, Address(base, offset), cond);
+}
+
+
+void Arm32Assembler::MemoryBarrier(ManagedRegister mscratch) {
+  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
+#if ANDROID_SMP != 0
+  int32_t encoding = 0xf57ff05f;  // dmb
+  Emit(encoding);
+#endif
+}
+
+
+void Arm32Assembler::cbz(Register rn, Label* target) {
+  LOG(FATAL) << "cbz is not supported on ARM32";
+}
+
+
+void Arm32Assembler::cbnz(Register rn, Label* target) {
+  LOG(FATAL) << "cbnz is not supported on ARM32";
+}
+
+
+void Arm32Assembler::CompareAndBranchIfZero(Register r, Label* label) {
+  cmp(r, ShifterOperand(0));
+  b(label, EQ);
+}
+
+
+void Arm32Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
+  cmp(r, ShifterOperand(0));
+  b(label, NE);
+}
+
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
new file mode 100644
index 0000000..7a0fce2
--- /dev/null
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_
+#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_
+
+#include <vector>
+
+#include "base/logging.h"
+#include "constants_arm.h"
+#include "utils/arm/managed_register_arm.h"
+#include "utils/arm/assembler_arm.h"
+#include "offsets.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+class Arm32Assembler FINAL : public ArmAssembler {
+ public:
+  Arm32Assembler() {
+  }
+  virtual ~Arm32Assembler() {}
+
+  bool IsThumb() const OVERRIDE {
+    return false;
+  }
+
+  // Data-processing instructions.
+  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  // Miscellaneous data-processing instructions.
+  void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+  void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+
+  // Multiply instructions.
+  void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void mla(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void mls(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+             Condition cond = AL) OVERRIDE;
+
+  void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+
+  // Load/store instructions.
+  void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+  void stm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+
+  void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
+  void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;
+
+  // Miscellaneous instructions.
+  void clrex(Condition cond = AL) OVERRIDE;
+  void nop(Condition cond = AL) OVERRIDE;
+
+  // Note that gdb sets breakpoints using the undefined instruction 0xe7f001f0.
+  void bkpt(uint16_t imm16) OVERRIDE;
+  void svc(uint32_t imm24) OVERRIDE;
+
+  void cbz(Register rn, Label* target) OVERRIDE;
+  void cbnz(Register rn, Label* target) OVERRIDE;
+
+  // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
+  void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
+  void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
+  void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  // Returns false if the immediate cannot be encoded.
+  bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
+  bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;
+
+  void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+
+  void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
+  void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
+  void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
+
+  void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+
+  // Branch instructions.
+  void b(Label* label, Condition cond = AL);
+  void bl(Label* label, Condition cond = AL);
+  void blx(Register rm, Condition cond = AL) OVERRIDE;
+  void bx(Register rm, Condition cond = AL) OVERRIDE;
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Rrx(Register rd, Register rm, Condition cond = AL);
+
+  void Push(Register rd, Condition cond = AL) OVERRIDE;
+  void Pop(Register rd, Condition cond = AL) OVERRIDE;
+
+  void PushList(RegList regs, Condition cond = AL) OVERRIDE;
+  void PopList(RegList regs, Condition cond = AL) OVERRIDE;
+
+  void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+
+  void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
+  void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
+
+
+  // Macros.
+  // Add signed constant value to rd. May clobber IP.
+  void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void AddConstant(Register rd, Register rn, int32_t value,
+                   Condition cond = AL) OVERRIDE;
+  void AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                           Condition cond = AL) OVERRIDE;
+  void AddConstantWithCarry(Register rd, Register rn, int32_t value,
+                            Condition cond = AL) {}
+
+  // Load and Store. May clobber IP.
+  void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {}
+  void LoadDImmediate(DRegister dd, double value,
+                      Register scratch, Condition cond = AL) {}
+  void MarkExceptionHandler(Label* label) OVERRIDE;
+  void LoadFromOffset(LoadOperandType type,
+                      Register reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void StoreToOffset(StoreOperandType type,
+                     Register reg,
+                     Register base,
+                     int32_t offset,
+                     Condition cond = AL) OVERRIDE;
+  void LoadSFromOffset(SRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreSToOffset(SRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void LoadDFromOffset(DRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreDToOffset(DRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+
+
+  static bool IsInstructionForExceptionHandling(uword pc);
+
+  // Emit data (e.g. encoded instruction or immediate) to the
+  // instruction stream.
+  void Emit(int32_t value);
+  void Bind(Label* label) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+ private:
+  void EmitType01(Condition cond,
+                  int type,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  void EmitType5(Condition cond, int offset, bool link);
+
+  void EmitMemOp(Condition cond,
+                 bool load,
+                 bool byte,
+                 Register rd,
+                 const Address& ad);
+
+  void EmitMemOpAddressMode3(Condition cond,
+                             int32_t mode,
+                             Register rd,
+                             const Address& ad);
+
+  void EmitMultiMemOp(Condition cond,
+                      BlockAddressMode am,
+                      bool load,
+                      Register base,
+                      RegList regs);
+
+  void EmitShiftImmediate(Condition cond,
+                          Shift opcode,
+                          Register rd,
+                          Register rm,
+                          const ShifterOperand& so);
+
+  void EmitShiftRegister(Condition cond,
+                         Shift opcode,
+                         Register rd,
+                         Register rm,
+                         const ShifterOperand& so);
+
+  void EmitMulOp(Condition cond,
+                 int32_t opcode,
+                 Register rd,
+                 Register rn,
+                 Register rm,
+                 Register rs);
+
+  void EmitVFPsss(Condition cond,
+                  int32_t opcode,
+                  SRegister sd,
+                  SRegister sn,
+                  SRegister sm);
+
+  void EmitVFPddd(Condition cond,
+                  int32_t opcode,
+                  DRegister dd,
+                  DRegister dn,
+                  DRegister dm);
+
+  void EmitVFPsd(Condition cond,
+                 int32_t opcode,
+                 SRegister sd,
+                 DRegister dm);
+
+  void EmitVFPds(Condition cond,
+                 int32_t opcode,
+                 DRegister dd,
+                 SRegister sm);
+
+  void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
+
+  void EmitBranch(Condition cond, Label* label, bool link);
+  static int32_t EncodeBranchOffset(int offset, int32_t inst);
+  static int DecodeBranchOffset(int32_t inst);
+  int32_t EncodeTstOffset(int offset, int32_t inst);
+  int DecodeTstOffset(int32_t inst);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
new file mode 100644
index 0000000..703d68e
--- /dev/null
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -0,0 +1,2363 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_thumb2.h"
+
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "offsets.h"
+#include "thread.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+void Thumb2Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, AND, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, EOR, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, SUB, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, RSB, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, RSB, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::add(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, ADD, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, ADD, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, SUB, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, ADC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, SBC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, RSC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
+  EmitDataProcessing(cond, TST, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
+  EmitDataProcessing(cond, TEQ, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, CMP, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, CMN, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::orr(Register rd, Register rn,
+                          const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, ORR, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::orrs(Register rd, Register rn,
+                           const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, ORR, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MOV, 0, R0, rd, so);
+}
+
+
+void Thumb2Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MOV, 1, R0, rd, so);
+}
+
+
+void Thumb2Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitDataProcessing(cond, BIC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MVN, 0, R0, rd, so);
+}
+
+
+void Thumb2Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MVN, 1, R0, rd, so);
+}
+
+
+void Thumb2Assembler::mul(Register rd, Register rn, Register rm, Condition cond) {
+  if (rd == rm && !IsHighRegister(rd) && !IsHighRegister(rn) && !force_32bit_) {
+    // 16 bit.
+    int16_t encoding = B14 | B9 | B8 | B6 |
+        rn << 3 | rd;
+    Emit16(encoding);
+  } else {
+    // 32 bit.
+    uint32_t op1 = 0b000;
+    uint32_t op2 = 0b00;
+    int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
+        op1 << 20 |
+        B15 | B14 | B13 | B12 |
+        op2 << 4 |
+        static_cast<uint32_t>(rd) << 8 |
+        static_cast<uint32_t>(rn) << 16 |
+        static_cast<uint32_t>(rm);
+
+    Emit32(encoding);
+  }
+}
+
+
+void Thumb2Assembler::mla(Register rd, Register rn, Register rm, Register ra,
+                          Condition cond) {
+  uint32_t op1 = 0b000;
+  uint32_t op2 = 0b00;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
+      op1 << 20 |
+      op2 << 4 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(ra) << 12 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::mls(Register rd, Register rn, Register rm, Register ra,
+                          Condition cond) {
+  uint32_t op1 = 0b000;
+  uint32_t op2 = 0b01;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
+      op1 << 20 |
+      op2 << 4 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(ra) << 12 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
+                            Register rm, Condition cond) {
+  uint32_t op1 = 0b010;
+  uint32_t op2 = 0b0000;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 |
+      op1 << 20 |
+      op2 << 4 |
+      static_cast<uint32_t>(rd_lo) << 12 |
+      static_cast<uint32_t>(rd_hi) << 8 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::sdiv(Register rd, Register rn, Register rm, Condition cond) {
+  uint32_t op1 = 0b001;
+  uint32_t op2 = 0b1111;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 | B20 |
+      op1 << 20 |
+      op2 << 4 |
+      0xf << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::udiv(Register rd, Register rn, Register rm, Condition cond) {
+  uint32_t op1 = 0b001;
+  uint32_t op2 = 0b1111;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 | B21 | B20 |
+      op1 << 20 |
+      op2 << 4 |
+      0xf << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldr(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, false, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::str(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, false, false, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrb(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, true, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::strb(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, false, true, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrh(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, false, true, false, rd, ad);
+}
+
+
+void Thumb2Assembler::strh(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, false, false, true, false, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrsb(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, true, false, true, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, false, true, true, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  // This is different from other loads.  The encoding is like ARM.
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
+      static_cast<int32_t>(rd) << 12 |
+      (static_cast<int32_t>(rd) + 1) << 8 |
+      ad.encodingThumbLdrdStrd();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  // This is different from other loads.  The encoding is like ARM.
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 |
+      static_cast<int32_t>(rd) << 12 |
+      (static_cast<int32_t>(rd) + 1) << 8 |
+      ad.encodingThumbLdrdStrd();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldm(BlockAddressMode am,
+                          Register base,
+                          RegList regs,
+                          Condition cond) {
+  if (__builtin_popcount(regs) == 1) {
+    // Thumb doesn't support one reg in the list.
+    // Find the register number.
+    int reg = 0;
+    while (reg < 16) {
+      if ((regs & (1 << reg)) != 0) {
+         break;
+      }
+      ++reg;
+    }
+    CHECK_LT(reg, 16);
+    CHECK(am == DB_W);      // Only writeback is supported.
+    ldr(static_cast<Register>(reg), Address(base, kRegisterSize, Address::PostIndex), cond);
+  } else {
+    EmitMultiMemOp(cond, am, true, base, regs);
+  }
+}
+
+
+void Thumb2Assembler::stm(BlockAddressMode am,
+                          Register base,
+                          RegList regs,
+                          Condition cond) {
+  if (__builtin_popcount(regs) == 1) {
+    // Thumb doesn't support one reg in the list.
+    // Find the register number.
+    int reg = 0;
+    while (reg < 16) {
+      if ((regs & (1 << reg)) != 0) {
+         break;
+      }
+      ++reg;
+    }
+    CHECK_LT(reg, 16);
+    CHECK(am == IA || am == IA_W);
+    Address::Mode strmode = am == IA ? Address::PreIndex : Address::Offset;
+    str(static_cast<Register>(reg), Address(base, -kRegisterSize, strmode), cond);
+  } else {
+    EmitMultiMemOp(cond, am, false, base, regs);
+  }
+}
+
+
+bool Thumb2Assembler::vmovs(SRegister sd, float s_imm, Condition cond) {
+  uint32_t imm32 = bit_cast<uint32_t, float>(s_imm);
+  if (((imm32 & ((1 << 19) - 1)) == 0) &&
+      ((((imm32 >> 25) & ((1 << 6) - 1)) == (1 << 5)) ||
+       (((imm32 >> 25) & ((1 << 6) - 1)) == ((1 << 5) -1)))) {
+    uint8_t imm8 = ((imm32 >> 31) << 7) | (((imm32 >> 29) & 1) << 6) |
+        ((imm32 >> 19) & ((1 << 6) -1));
+    EmitVFPsss(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | (imm8 & 0xf),
+               sd, S0, S0);
+    return true;
+  }
+  return false;
+}
+
+
+bool Thumb2Assembler::vmovd(DRegister dd, double d_imm, Condition cond) {
+  uint64_t imm64 = bit_cast<uint64_t, double>(d_imm);
+  if (((imm64 & ((1LL << 48) - 1)) == 0) &&
+      ((((imm64 >> 54) & ((1 << 9) - 1)) == (1 << 8)) ||
+       (((imm64 >> 54) & ((1 << 9) - 1)) == ((1 << 8) -1)))) {
+    uint8_t imm8 = ((imm64 >> 63) << 7) | (((imm64 >> 61) & 1) << 6) |
+        ((imm64 >> 48) & ((1 << 6) -1));
+    EmitVFPddd(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | B8 | (imm8 & 0xf),
+               dd, D0, D0);
+    return true;
+  }
+  return false;
+}
+
+
+void Thumb2Assembler::vmovs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vmovd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vadds(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B21 | B20, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B21 | B20, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B21, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B21, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, 0, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, 0, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B6, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B6, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B23, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vdivd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B23, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vabss(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vabsd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B7 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vnegs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vnegd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vsqrts(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B7 | B6, sd, S0, sm);
+}
+
+void Thumb2Assembler::vsqrtd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B7 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vcvtsd(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Thumb2Assembler::vcvtds(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6, dd, sm);
+}
+
+
+void Thumb2Assembler::vcvtis(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtid(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Thumb2Assembler::vcvtsi(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtdi(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B7 | B6, dd, sm);
+}
+
+
+void Thumb2Assembler::vcvtus(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtud(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Thumb2Assembler::vcvtsu(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtdu(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B6, dd, sm);
+}
+
+
+void Thumb2Assembler::vcmps(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcmpd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vcmpsz(SRegister sd, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B16 | B6, sd, S0, S0);
+}
+
+
+void Thumb2Assembler::vcmpdz(DRegister dd, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
+}
+
+void Thumb2Assembler::b(Label* label, Condition cond) {
+  EmitBranch(cond, label, false, false);
+}
+
+
+void Thumb2Assembler::bl(Label* label, Condition cond) {
+  CheckCondition(cond);
+  EmitBranch(cond, label, true, false);
+}
+
+
+void Thumb2Assembler::blx(Label* label) {
+  EmitBranch(AL, label, true, true);
+}
+
+
+void Thumb2Assembler::MarkExceptionHandler(Label* label) {
+  EmitDataProcessing(AL, TST, 1, PC, R0, ShifterOperand(0));
+  Label l;
+  b(&l);
+  EmitBranch(AL, label, false, false);
+  Bind(&l);
+}
+
+
+void Thumb2Assembler::Emit32(int32_t value) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Emit<int16_t>(value >> 16);
+  buffer_.Emit<int16_t>(value & 0xffff);
+}
+
+
+void Thumb2Assembler::Emit16(int16_t value) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Emit<int16_t>(value);
+}
+
+
+bool Thumb2Assembler::Is32BitDataProcessing(Condition cond,
+                                            Opcode opcode,
+                                            int set_cc,
+                                            Register rn,
+                                            Register rd,
+                                            const ShifterOperand& so) {
+  if (force_32bit_) {
+    return true;
+  }
+
+  bool can_contain_high_register = opcode == MOV || opcode == ADD || opcode == SUB;
+
+  if (IsHighRegister(rd) || IsHighRegister(rn)) {
+    if (can_contain_high_register) {
+      // There are high register instructions available for this opcode.
+      // However, there is no RRX available.
+      if (so.IsShift() && so.GetShift() == RRX) {
+        return true;
+      }
+
+      // Check special case for SP relative ADD and SUB immediate.
+      if ((opcode == ADD || opcode == SUB) && so.IsImmediate()) {
+        // If rn is SP and rd is a high register we need to use a 32 bit encoding.
+         if (rn == SP && rd != SP && IsHighRegister(rd)) {
+           return true;
+         }
+
+         uint32_t imm = so.GetImmediate();
+         // If the immediates are out of range use 32 bit.
+         if (rd == SP && rn == SP) {
+           if (imm > (1 << 9)) {    // 9 bit immediate.
+             return true;
+           }
+         } else if (opcode == ADD && rd != SP && rn == SP) {   // 10 bit immediate.
+           if (imm > (1 << 10)) {
+             return true;
+           }
+         } else if (opcode == SUB && rd != SP && rn == SP) {
+           // SUB rd, SP, #imm is always 32 bit.
+           return true;
+         }
+      }
+    }
+
+    // The ADD,SUB and MOV instructions that work with high registers don't have
+    // immediate variants.
+    if (so.IsImmediate()) {
+      return true;
+    }
+  }
+
+  if (so.IsRegister() && IsHighRegister(so.GetRegister()) && !can_contain_high_register) {
+    return true;
+  }
+
+  // Check for MOV with an ROR.
+  if (opcode == MOV && so.IsRegister() && so.IsShift() && so.GetShift() == ROR) {
+    if (so.GetImmediate() != 0) {
+      return true;
+    }
+  }
+
+  bool rn_is_valid = true;
+
+  // Check for single operand instructions and ADD/SUB.
+  switch (opcode) {
+    case CMP:
+    case MOV:
+    case TST:
+    case MVN:
+      rn_is_valid = false;      // There is no Rn for these instructions.
+      break;
+    case TEQ:
+      return true;
+      break;
+    case ADD:
+    case SUB:
+      break;
+    default:
+      if (so.IsRegister() && rd != rn) {
+        return true;
+      }
+  }
+
+  if (so.IsImmediate()) {
+    if (rn_is_valid && rn != rd) {
+      // The only thumb1 instruction with a register and an immediate are ADD and SUB.  The
+      // immediate must be 3 bits.
+      if (opcode != ADD && opcode != SUB) {
+        return true;
+      } else {
+        // Check that the immediate is 3 bits for ADD and SUB.
+        if (so.GetImmediate() >= 8) {
+          return true;
+        }
+      }
+    } else {
+      // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
+      if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
+        return true;
+      } else {
+        if (so.GetImmediate() > 255) {
+          return true;
+        }
+      }
+    }
+  }
+
+  // The instruction can be encoded in 16 bits.
+  return false;
+}
+
+
+void Thumb2Assembler::Emit32BitDataProcessing(Condition cond,
+                                              Opcode opcode,
+                                              int set_cc,
+                                              Register rn,
+                                              Register rd,
+                                              const ShifterOperand& so) {
+  uint8_t thumb_opcode = 0b11111111;
+  switch (opcode) {
+    case AND: thumb_opcode = 0b0000; break;
+    case EOR: thumb_opcode = 0b0100; break;
+    case SUB: thumb_opcode = 0b1101; break;
+    case RSB: thumb_opcode = 0b1110; break;
+    case ADD: thumb_opcode = 0b1000; break;
+    case ADC: thumb_opcode = 0b1010; break;
+    case SBC: thumb_opcode = 0b1011; break;
+    case RSC: break;
+    case TST: thumb_opcode = 0b0000; set_cc = true; rd = PC; break;
+    case TEQ: thumb_opcode = 0b0100; set_cc = true; rd = PC; break;
+    case CMP: thumb_opcode = 0b1101; set_cc = true; rd = PC; break;
+    case CMN: thumb_opcode = 0b1000; set_cc = true; rd = PC; break;
+    case ORR: thumb_opcode = 0b0010; break;
+    case MOV: thumb_opcode = 0b0010; rn = PC; break;
+    case BIC: thumb_opcode = 0b0001; break;
+    case MVN: thumb_opcode = 0b0011; rn = PC; break;
+    default:
+      break;
+  }
+
+  if (thumb_opcode == 0b11111111) {
+    LOG(FATAL) << "Invalid thumb2 opcode " << opcode;
+  }
+
+  int32_t encoding = 0;
+  if (so.IsImmediate()) {
+    // Check special cases.
+    if ((opcode == SUB || opcode == ADD) && rn == SP) {
+      // There are special ADD/SUB rd, SP, #imm12 instructions.
+      if (opcode == SUB) {
+        thumb_opcode = 0b0101;
+      } else {
+        thumb_opcode = 0;
+      }
+      uint32_t imm = so.GetImmediate();
+      CHECK_LT(imm, (1u << 12));
+
+      uint32_t i = (imm >> 11) & 1;
+      uint32_t imm3 = (imm >> 8) & 0b111;
+      uint32_t imm8 = imm & 0xff;
+
+      encoding = B31 | B30 | B29 | B28 | B25 |
+           B19 | B18 | B16 |
+           thumb_opcode << 21 |
+           rd << 8 |
+           i << 26 |
+           imm3 << 12 |
+           imm8;
+    } else {
+      // Modified immediate.
+      uint32_t imm = ModifiedImmediate(so.encodingThumb(2));
+      if (imm == kInvalidModifiedImmediate) {
+        LOG(FATAL) << "Immediate value cannot fit in thumb2 modified immediate";
+      }
+      encoding = B31 | B30 | B29 | B28 |
+          thumb_opcode << 21 |
+          set_cc << 20 |
+          rn << 16 |
+          rd << 8 |
+          imm;
+    }
+  } else if (so.IsRegister()) {
+     // Register (possibly shifted)
+     encoding = B31 | B30 | B29 | B27 | B25 |
+         thumb_opcode << 21 |
+         set_cc << 20 |
+         rn << 16 |
+         rd << 8 |
+         so.encodingThumb(2);
+  }
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::Emit16BitDataProcessing(Condition cond,
+                                              Opcode opcode,
+                                              int set_cc,
+                                              Register rn,
+                                              Register rd,
+                                              const ShifterOperand& so) {
+  if (opcode == ADD || opcode == SUB) {
+    Emit16BitAddSub(cond, opcode, set_cc, rn, rd, so);
+    return;
+  }
+  uint8_t thumb_opcode = 0b11111111;
+  // Thumb1.
+  uint8_t dp_opcode = 0b01;
+  uint8_t opcode_shift = 6;
+  uint8_t rd_shift = 0;
+  uint8_t rn_shift = 3;
+  uint8_t immediate_shift = 0;
+  bool use_immediate = false;
+  uint8_t immediate = 0;
+
+  if (opcode == MOV && so.IsRegister() && so.IsShift()) {
+    // Convert shifted mov operand2 into 16 bit opcodes.
+    dp_opcode = 0;
+    opcode_shift = 11;
+
+    use_immediate = true;
+    immediate = so.GetImmediate();
+    immediate_shift = 6;
+
+    rn = so.GetRegister();
+
+    switch (so.GetShift()) {
+    case LSL: thumb_opcode = 0b00; break;
+    case LSR: thumb_opcode = 0b01; break;
+    case ASR: thumb_opcode = 0b10; break;
+    case ROR:
+      // ROR doesn't allow immediates.
+      thumb_opcode = 0b111;
+      dp_opcode = 0b01;
+      opcode_shift = 6;
+      use_immediate = false;
+      break;
+    case RRX: break;
+    default:
+     break;
+    }
+  } else {
+    if (so.IsImmediate()) {
+      use_immediate = true;
+      immediate = so.GetImmediate();
+    }
+
+    switch (opcode) {
+      case AND: thumb_opcode = 0b0000; break;
+      case EOR: thumb_opcode = 0b0001; break;
+      case SUB: break;
+      case RSB: thumb_opcode = 0b1001; break;
+      case ADD: break;
+      case ADC: thumb_opcode = 0b0101; break;
+      case SBC: thumb_opcode = 0b0110; break;
+      case RSC: break;
+      case TST: thumb_opcode = 0b1000; rn = so.GetRegister(); break;
+      case TEQ: break;
+      case CMP:
+        if (use_immediate) {
+          // T2 encoding.
+           dp_opcode = 0;
+           opcode_shift = 11;
+           thumb_opcode = 0b101;
+           rd_shift = 8;
+           rn_shift = 8;
+        } else {
+          thumb_opcode = 0b1010;
+          rn = so.GetRegister();
+        }
+
+        break;
+      case CMN: thumb_opcode = 0b1011; rn = so.GetRegister(); break;
+      case ORR: thumb_opcode = 0b1100; break;
+      case MOV:
+        dp_opcode = 0;
+        if (use_immediate) {
+          // T2 encoding.
+          opcode_shift = 11;
+          thumb_opcode = 0b100;
+          rd_shift = 8;
+          rn_shift = 8;
+        } else {
+          rn = so.GetRegister();
+          if (IsHighRegister(rn) || IsHighRegister(rd)) {
+            // Special mov for high registers.
+            dp_opcode = 0b01;
+            opcode_shift = 7;
+            // Put the top bit of rd into the bottom bit of the opcode.
+            thumb_opcode = 0b0001100 | static_cast<uint32_t>(rd) >> 3;
+            rd = static_cast<Register>(static_cast<uint32_t>(rd) & 0b111);
+          } else {
+            thumb_opcode = 0;
+          }
+        }
+        break;
+      case BIC: thumb_opcode = 0b1110; break;
+      case MVN: thumb_opcode = 0b1111; rn = so.GetRegister(); break;
+      default:
+        break;
+    }
+  }
+
+  if (thumb_opcode == 0b11111111) {
+    LOG(FATAL) << "Invalid thumb1 opcode " << opcode;
+  }
+
+  int16_t encoding = dp_opcode << 14 |
+      (thumb_opcode << opcode_shift) |
+      rd << rd_shift |
+      rn << rn_shift |
+      (use_immediate ? (immediate << immediate_shift) : 0);
+
+  Emit16(encoding);
+}
+
+
+// ADD and SUB are complex enough to warrant their own emitter.
+void Thumb2Assembler::Emit16BitAddSub(Condition cond,
+                                      Opcode opcode,
+                                      int set_cc,
+                                      Register rn,
+                                      Register rd,
+                                      const ShifterOperand& so) {
+  uint8_t dp_opcode = 0;
+  uint8_t opcode_shift = 6;
+  uint8_t rd_shift = 0;
+  uint8_t rn_shift = 3;
+  uint8_t immediate_shift = 0;
+  bool use_immediate = false;
+  uint8_t immediate = 0;
+  uint8_t thumb_opcode;;
+
+  if (so.IsImmediate()) {
+    use_immediate = true;
+    immediate = so.GetImmediate();
+  }
+
+  switch (opcode) {
+    case ADD:
+      if (so.IsRegister()) {
+        Register rm = so.GetRegister();
+        if (rn == rd) {
+          // Can use T2 encoding (allows 4 bit registers)
+          dp_opcode = 0b01;
+          opcode_shift = 10;
+          thumb_opcode = 0b0001;
+          // Make Rn also contain the top bit of rd.
+          rn = static_cast<Register>(static_cast<uint32_t>(rm) |
+                                     (static_cast<uint32_t>(rd) & 0b1000) << 1);
+          rd = static_cast<Register>(static_cast<uint32_t>(rd) & 0b111);
+        } else {
+          // T1.
+          opcode_shift = 9;
+          thumb_opcode = 0b01100;
+          immediate = static_cast<uint32_t>(so.GetRegister());
+          use_immediate = true;
+          immediate_shift = 6;
+        }
+      } else {
+        // Immediate.
+        if (rd == SP && rn == SP) {
+          // ADD sp, sp, #imm
+          dp_opcode = 0b10;
+          thumb_opcode = 0b11;
+          opcode_shift = 12;
+          CHECK_LT(immediate, (1 << 9));
+          CHECK_EQ((immediate & 0b11), 0);
+
+          // Remove rd and rn from instruction by orring it with immed and clearing bits.
+          rn = R0;
+          rd = R0;
+          rd_shift = 0;
+          rn_shift = 0;
+          immediate >>= 2;
+        } else if (rd != SP && rn == SP) {
+          // ADD rd, SP, #imm
+          dp_opcode = 0b10;
+          thumb_opcode = 0b101;
+          opcode_shift = 11;
+          CHECK_LT(immediate, (1 << 10));
+          CHECK_EQ((immediate & 0b11), 0);
+
+          // Remove rn from instruction.
+          rn = R0;
+          rn_shift = 0;
+          rd_shift = 8;
+          immediate >>= 2;
+        } else if (rn != rd) {
+          // Must use T1.
+          opcode_shift = 9;
+          thumb_opcode = 0b01110;
+          immediate_shift = 6;
+        } else {
+          // T2 encoding.
+          opcode_shift = 11;
+          thumb_opcode = 0b110;
+          rd_shift = 8;
+          rn_shift = 8;
+        }
+      }
+      break;
+
+    case SUB:
+      if (so.IsRegister()) {
+         // T1.
+         opcode_shift = 9;
+         thumb_opcode = 0b01101;
+         immediate = static_cast<uint32_t>(so.GetRegister());
+         use_immediate = true;
+         immediate_shift = 6;
+       } else {
+         if (rd == SP && rn == SP) {
+           // SUB sp, sp, #imm
+           dp_opcode = 0b10;
+           thumb_opcode = 0b1100001;
+           opcode_shift = 7;
+           CHECK_LT(immediate, (1 << 9));
+           CHECK_EQ((immediate & 0b11), 0);
+
+           // Remove rd and rn from instruction by orring it with immed and clearing bits.
+           rn = R0;
+           rd = R0;
+           rd_shift = 0;
+           rn_shift = 0;
+           immediate >>= 2;
+         } else if (rn != rd) {
+           // Must use T1.
+           opcode_shift = 9;
+           thumb_opcode = 0b01111;
+           immediate_shift = 6;
+         } else {
+           // T2 encoding.
+           opcode_shift = 11;
+           thumb_opcode = 0b111;
+           rd_shift = 8;
+           rn_shift = 8;
+         }
+       }
+      break;
+    default:
+      LOG(FATAL) << "This opcode is not an ADD or SUB: " << opcode;
+      return;
+  }
+
+  int16_t encoding = dp_opcode << 14 |
+      (thumb_opcode << opcode_shift) |
+      rd << rd_shift |
+      rn << rn_shift |
+      (use_immediate ? (immediate << immediate_shift) : 0);
+
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::EmitDataProcessing(Condition cond,
+                                         Opcode opcode,
+                                         int set_cc,
+                                         Register rn,
+                                         Register rd,
+                                         const ShifterOperand& so) {
+  CHECK_NE(rd, kNoRegister);
+  CheckCondition(cond);
+
+  if (Is32BitDataProcessing(cond, opcode, set_cc, rn, rd, so)) {
+    Emit32BitDataProcessing(cond, opcode, set_cc, rn, rd, so);
+  } else {
+    Emit16BitDataProcessing(cond, opcode, set_cc, rn, rd, so);
+  }
+}
+
+
+void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const {
+  bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink;
+  bool x = type_ == kUnconditionalX || type_ == kUnconditionalLinkX;
+  int32_t offset = target_ - location_;
+
+  if (size_ == k32Bit) {
+    int32_t encoding = B31 | B30 | B29 | B28 | B15;
+    if (link) {
+      // BL or BLX immediate.
+      encoding |= B14;
+      if (!x) {
+        encoding |= B12;
+      } else {
+        // Bottom bit of offset must be 0.
+        CHECK_EQ((offset & 1), 0);
+      }
+    } else {
+      if (x) {
+        LOG(FATAL) << "Invalid use of BX";
+      } else {
+        if (cond_ == AL) {
+          // Can use the T4 encoding allowing a 24 bit offset.
+          if (!x) {
+            encoding |= B12;
+          }
+        } else {
+          // Must be T3 encoding with a 20 bit offset.
+          encoding |= cond_ << 22;
+        }
+      }
+    }
+    encoding = Thumb2Assembler::EncodeBranchOffset(offset, encoding);
+    buffer->Store<int16_t>(location_, static_cast<int16_t>(encoding >> 16));
+    buffer->Store<int16_t>(location_+2, static_cast<int16_t>(encoding & 0xffff));
+  } else {
+    if (IsCompareAndBranch()) {
+      offset -= 4;
+      uint16_t i = (offset >> 6) & 1;
+      uint16_t imm5 = (offset >> 1) & 0b11111;
+      int16_t encoding = B15 | B13 | B12 |
+            (type_ ==  kCompareAndBranchNonZero ? B11 : 0) |
+            static_cast<uint32_t>(rn_) |
+            B8 |
+            i << 9 |
+            imm5 << 3;
+      buffer->Store<int16_t>(location_, encoding);
+    } else {
+      offset -= 4;    // Account for PC offset.
+      int16_t encoding;
+      // 16 bit.
+      if (cond_ == AL) {
+        encoding = B15 | B14 | B13 |
+            ((offset >> 1) & 0x7ff);
+      } else {
+        encoding = B15 | B14 | B12 |
+            cond_ << 8 | ((offset >> 1) & 0xff);
+      }
+      buffer->Store<int16_t>(location_, encoding);
+    }
+  }
+}
+
+
+uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) {
+  uint32_t location = buffer_.Size();
+
+  // This is always unresolved as it must be a forward branch.
+  Emit16(prev);      // Previous link.
+  return AddBranch(n ? Branch::kCompareAndBranchNonZero : Branch::kCompareAndBranchZero,
+      location, rn);
+}
+
+
+// NOTE: this only support immediate offsets, not [rx,ry].
+// TODO: support [rx,ry] instructions.
+void Thumb2Assembler::EmitLoadStore(Condition cond,
+                                    bool load,
+                                    bool byte,
+                                    bool half,
+                                    bool is_signed,
+                                    Register rd,
+                                    const Address& ad) {
+  CHECK_NE(rd, kNoRegister);
+  CheckCondition(cond);
+  bool must_be_32bit = force_32bit_;
+  if (IsHighRegister(rd)) {
+    must_be_32bit = true;
+  }
+
+  Register rn = ad.GetRegister();
+  if (IsHighRegister(rn) && rn != SP) {
+    must_be_32bit = true;
+  }
+
+  if (is_signed || ad.GetOffset() < 0 || ad.GetMode() != Address::Offset) {
+    must_be_32bit = true;
+  }
+
+  int32_t offset = ad.GetOffset();
+
+  // The 16 bit SP relative instruction can only have a 10 bit offset.
+  if (rn == SP && offset > 1024) {
+    must_be_32bit = true;
+  }
+
+  if (byte) {
+    // 5 bit offset, no shift.
+    if (offset > 32) {
+      must_be_32bit = true;
+    }
+  } else if (half) {
+    // 6 bit offset, shifted by 1.
+    if (offset > 64) {
+      must_be_32bit = true;
+    }
+  } else {
+    // 7 bit offset, shifted by 2.
+    if (offset > 128) {
+       must_be_32bit = true;
+     }
+  }
+
+  if (must_be_32bit) {
+    int32_t encoding = B31 | B30 | B29 | B28 | B27 |
+                  (load ? B20 : 0) |
+                  (is_signed ? B24 : 0) |
+                  static_cast<uint32_t>(rd) << 12 |
+                  ad.encodingThumb(2) |
+                  (byte ? 0 : half ? B21 : B22);
+    Emit32(encoding);
+  } else {
+    // 16 bit thumb1.
+    uint8_t opA = 0;
+    bool sp_relative = false;
+
+    if (byte) {
+      opA = 0b0111;
+    } else if (half) {
+      opA = 0b1000;
+    } else {
+      if (rn == SP) {
+        opA = 0b1001;
+        sp_relative = true;
+      } else {
+        opA = 0b0110;
+      }
+    }
+    int16_t encoding = opA << 12 |
+                (load ? B11 : 0);
+
+    CHECK_GE(offset, 0);
+    if (sp_relative) {
+      // SP relative, 10 bit offset.
+      CHECK_LT(offset, 1024);
+      CHECK_EQ((offset & 0b11), 0);
+      encoding |= rd << 8 | offset >> 2;
+    } else {
+      // No SP relative.  The offset is shifted right depending on
+      // the size of the load/store.
+      encoding |= static_cast<uint32_t>(rd);
+
+      if (byte) {
+        // 5 bit offset, no shift.
+        CHECK_LT(offset, 32);
+      } else if (half) {
+        // 6 bit offset, shifted by 1.
+        CHECK_LT(offset, 64);
+        CHECK_EQ((offset & 0b1), 0);
+        offset >>= 1;
+      } else {
+        // 7 bit offset, shifted by 2.
+        CHECK_LT(offset, 128);
+        CHECK_EQ((offset & 0b11), 0);
+        offset >>= 2;
+      }
+      encoding |= rn << 3 | offset  << 6;
+    }
+
+    Emit16(encoding);
+  }
+}
+
+
+void Thumb2Assembler::EmitMultiMemOp(Condition cond,
+                                     BlockAddressMode am,
+                                     bool load,
+                                     Register base,
+                                     RegList regs) {
+  CHECK_NE(base, kNoRegister);
+  CheckCondition(cond);
+  bool must_be_32bit = force_32bit_;
+
+  if ((regs & 0xff00) != 0) {
+    must_be_32bit = true;
+  }
+
+  uint32_t w_bit = am == IA_W || am == DB_W || am == DA_W || am == IB_W;
+  // 16 bit always uses writeback.
+  if (!w_bit) {
+    must_be_32bit = true;
+  }
+
+  if (must_be_32bit) {
+    uint32_t op = 0;
+    switch (am) {
+      case IA:
+      case IA_W:
+        op = 0b01;
+        break;
+      case DB:
+      case DB_W:
+        op = 0b10;
+        break;
+      case DA:
+      case IB:
+      case DA_W:
+      case IB_W:
+        LOG(FATAL) << "LDM/STM mode not supported on thumb: " << am;
+    }
+    if (load) {
+      // Cannot have SP in the list.
+      CHECK_EQ((regs & (1 << SP)), 0);
+    } else {
+      // Cannot have PC or SP in the list.
+      CHECK_EQ((regs & (1 << PC | 1 << SP)), 0);
+    }
+    int32_t encoding = B31 | B30 | B29 | B27 |
+                    (op << 23) |
+                    (load ? B20 : 0) |
+                    base << 16 |
+                    regs |
+                    (w_bit << 21);
+    Emit32(encoding);
+  } else {
+    int16_t encoding = B15 | B14 |
+                    (load ? B11 : 0) |
+                    base << 8 |
+                    regs;
+    Emit16(encoding);
+  }
+}
+
+
+void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) {
+  uint32_t pc = buffer_.Size();
+  Branch::Type branch_type;
+  if (cond == AL) {
+    if (link) {
+      if (x) {
+        branch_type = Branch::kUnconditionalLinkX;      // BLX.
+      } else {
+        branch_type = Branch::kUnconditionalLink;       // BX.
+      }
+    } else {
+      branch_type = Branch::kUnconditional;             // B.
+    }
+  } else {
+    branch_type = Branch::kConditional;                 // B<cond>.
+  }
+
+  if (label->IsBound()) {
+    Branch::Size size = AddBranch(branch_type, pc, label->Position(), cond);  // Resolved branch.
+
+    // The branch is to a bound label which means that it's a backwards branch.  We know the
+    // current size of it so we can emit the appropriate space.  Note that if it's a 16 bit
+    // branch the size may change if it so happens that other branches change size that change
+    // the distance to the target and that distance puts this branch over the limit for 16 bits.
+    if (size == Branch::k16Bit) {
+      Emit16(0);          // Space for a 16 bit branch.
+    } else {
+      Emit32(0);            // Space for a 32 bit branch.
+    }
+  } else {
+    // Branch is to an unbound label.  Emit space for it.
+    uint16_t branch_id = AddBranch(branch_type, pc, cond);    // Unresolved branch.
+    if (force_32bit_) {
+      Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
+      Emit16(0);                   // another 16 bits.
+    } else {
+      Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
+    }
+    label->LinkTo(branch_id);           // Link to the branch ID.
+  }
+}
+
+
+void Thumb2Assembler::clz(Register rd, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CheckCondition(cond);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 |
+      B25 | B23 | B21 | B20 |
+      static_cast<uint32_t>(rm) << 16 |
+      0xf << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      B7 |
+      static_cast<uint32_t>(rm);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
+  CheckCondition(cond);
+  bool must_be_32bit = force_32bit_;
+  if (IsHighRegister(rd)|| imm16 >= 256u) {
+    must_be_32bit = true;
+  }
+
+  if (must_be_32bit) {
+    // Use encoding T3.
+    uint32_t imm4 = (imm16 >> 12) & 0b1111;
+    uint32_t i = (imm16 >> 11) & 0b1;
+    uint32_t imm3 = (imm16 >> 8) & 0b111;
+    uint32_t imm8 = imm16 & 0xff;
+    int32_t encoding = B31 | B30 | B29 | B28 |
+                    B25 | B22 |
+                    static_cast<uint32_t>(rd) << 8 |
+                    i << 26 |
+                    imm4 << 16 |
+                    imm3 << 12 |
+                    imm8;
+    Emit32(encoding);
+  } else {
+    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
+                imm16;
+    Emit16(encoding);
+  }
+}
+
+
+void Thumb2Assembler::movt(Register rd, uint16_t imm16, Condition cond) {
+  CheckCondition(cond);
+  // Always 32 bits.
+  uint32_t imm4 = (imm16 >> 12) & 0b1111;
+  uint32_t i = (imm16 >> 11) & 0b1;
+  uint32_t imm3 = (imm16 >> 8) & 0b111;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B23 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldrex(Register rt, Register rn, uint16_t imm, Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CheckCondition(cond);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CheckCondition(cond);
+  CHECK_LT(imm, (1u << 10));
+
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rt) << 12 |
+      0xf << 8 |
+      imm >> 2;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldrex(Register rt, Register rn, Condition cond) {
+  ldrex(rt, rn, 0, cond);
+}
+
+
+void Thumb2Assembler::strex(Register rd,
+                            Register rt,
+                            Register rn,
+                            uint16_t imm,
+                            Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CheckCondition(cond);
+  CHECK_LT(imm, (1u << 10));
+
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rt) << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      imm >> 2;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::strex(Register rd,
+                            Register rt,
+                            Register rn,
+                            Condition cond) {
+  strex(rd, rt, rn, 0, cond);
+}
+
+
+void Thumb2Assembler::clrex(Condition cond) {
+  CheckCondition(cond);
+  int32_t encoding = B31 | B30 | B29 | B27 | B28 | B25 | B24 | B23 |
+      B21 | B20 |
+      0xf << 16 |
+      B15 |
+      0xf << 8 |
+      B5 |
+      0xf;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::nop(Condition cond) {
+  CheckCondition(cond);
+  int16_t encoding = B15 | B13 | B12 |
+      B11 | B10 | B9 | B8;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::vmovsr(SRegister sn, Register rt, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovrs(Register rt, SRegister sn, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B20 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovsrr(SRegister sm, Register rt, Register rt2,
+                              Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovrrs(Register rt, Register rt2, SRegister sm,
+                              Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovdrr(DRegister dm, Register rt, Register rt2,
+                              Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovrrd(Register rt, Register rt2, DRegister dm,
+                              Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vldrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(sd, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vstrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(sd, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vldrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(dd, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vstrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(dd, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vpushs(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, false, cond);
+}
+
+
+void Thumb2Assembler::vpushd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, true, cond);
+}
+
+
+void Thumb2Assembler::vpops(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, false, cond);
+}
+
+
+void Thumb2Assembler::vpopd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, true, cond);
+}
+
+
+void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
+  CheckCondition(cond);
+
+  uint32_t D;
+  uint32_t Vd;
+  if (dbl) {
+    // Encoded as D:Vd.
+    D = (reg >> 4) & 1;
+    Vd = reg & 0b1111;
+  } else {
+    // Encoded as Vd:D.
+    D = reg & 1;
+    Vd = (reg >> 1) & 0b1111;
+  }
+  int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
+                    B11 | B9 |
+        (dbl ? B8 : 0) |
+        (push ? B24 : (B23 | B20)) |
+        0b1110 << 28 |
+        nregs << (dbl ? 1 : 0) |
+        D << 22 |
+        Vd << 12;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPsss(Condition cond, int32_t opcode,
+                                 SRegister sd, SRegister sn, SRegister sm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(sn) & 1)*B7) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPddd(Condition cond, int32_t opcode,
+                                 DRegister dd, DRegister dn, DRegister dm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(dn, kNoDRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | B8 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dn) & 0xf)*B16) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(dn) >> 4)*B7) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPsd(Condition cond, int32_t opcode,
+                                SRegister sd, DRegister dm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPds(Condition cond, int32_t opcode,
+                                DRegister dd, SRegister sm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR.
+  CheckCondition(cond);
+  UNIMPLEMENTED(FATAL) << "Unimplemented thumb instruction";
+}
+
+
+void Thumb2Assembler::svc(uint32_t imm8) {
+  CHECK(IsUint(8, imm8)) << imm8;
+  int16_t encoding = B15 | B14 | B12 |
+       B11 | B10 | B9 | B8 |
+       imm8;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::bkpt(uint16_t imm8) {
+  CHECK(IsUint(8, imm8)) << imm8;
+  int16_t encoding = B15 | B13 | B12 |
+      B11 | B10 | B9 |
+      imm8;
+  Emit16(encoding);
+}
+
+// Convert the given IT state to a mask bit given bit 0 of the first
+// condition and a shift position.
+static uint8_t ToItMask(ItState s, uint8_t firstcond0, uint8_t shift) {
+  switch (s) {
+  case kItOmitted: return 1 << shift;
+  case kItThen: return firstcond0 << shift;
+  case kItElse: return !firstcond0 << shift;
+  }
+  return 0;
+}
+
+
+// Set the IT condition in the given position for the given state.  This is used
+// to check that conditional instructions match the preceding IT statement.
+void Thumb2Assembler::SetItCondition(ItState s, Condition cond, uint8_t index) {
+  switch (s) {
+  case kItOmitted: it_conditions_[index] = AL; break;
+  case kItThen: it_conditions_[index] = cond; break;
+  case kItElse:
+    it_conditions_[index] = static_cast<Condition>(static_cast<uint8_t>(cond) ^ 1);
+    break;
+  }
+}
+
+
+void Thumb2Assembler::it(Condition firstcond, ItState i1, ItState i2, ItState i3) {
+  CheckCondition(AL);       // Not allowed in IT block.
+  uint8_t firstcond0 = static_cast<uint8_t>(firstcond) & 1;
+
+  // All conditions to AL.
+  for (uint8_t i = 0; i < 4; ++i) {
+    it_conditions_[i] = AL;
+  }
+
+  SetItCondition(kItThen, firstcond, 0);
+  uint8_t mask = ToItMask(i1, firstcond0, 3);
+  SetItCondition(i1, firstcond, 1);
+
+  if (i1 != kItOmitted) {
+    mask |= ToItMask(i2, firstcond0, 2);
+    SetItCondition(i2, firstcond, 2);
+    if (i2 != kItOmitted) {
+      mask |= ToItMask(i3, firstcond0, 1);
+      SetItCondition(i3, firstcond, 3);
+      if (i3 != kItOmitted) {
+        mask |= 0b0001;
+      }
+    }
+  }
+
+  // Start at first condition.
+  it_cond_index_ = 0;
+  next_condition_ = it_conditions_[0];
+  uint16_t encoding = B15 | B13 | B12 |
+        B11 | B10 | B9 | B8 |
+        firstcond << 4 |
+        mask;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::cbz(Register rn, Label* label) {
+  CheckCondition(AL);
+  if (label->IsBound()) {
+    LOG(FATAL) << "cbz can only be used to branch forwards";
+  } else {
+    uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), false);
+    label->LinkTo(branchid);
+  }
+}
+
+
+void Thumb2Assembler::cbnz(Register rn, Label* label) {
+  CheckCondition(AL);
+  if (label->IsBound()) {
+    LOG(FATAL) << "cbnz can only be used to branch forwards";
+  } else {
+    uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), true);
+    label->LinkTo(branchid);
+  }
+}
+
+
+void Thumb2Assembler::blx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CheckCondition(cond);
+  int16_t encoding = B14 | B10 | B9 | B8 | B7 | static_cast<int16_t>(rm) << 3;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::bx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CheckCondition(cond);
+  int16_t encoding = B14 | B10 | B9 | B8 | static_cast<int16_t>(rm) << 3;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::Push(Register rd, Condition cond) {
+  str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
+}
+
+
+void Thumb2Assembler::Pop(Register rd, Condition cond) {
+  ldr(rd, Address(SP, kRegisterSize, Address::PostIndex), cond);
+}
+
+
+void Thumb2Assembler::PushList(RegList regs, Condition cond) {
+  stm(DB_W, SP, regs, cond);
+}
+
+
+void Thumb2Assembler::PopList(RegList regs, Condition cond) {
+  ldm(IA_W, SP, regs, cond);
+}
+
+
+void Thumb2Assembler::Mov(Register rd, Register rm, Condition cond) {
+  if (cond != AL || rd != rm) {
+    mov(rd, ShifterOperand(rm), cond);
+  }
+}
+
+
+// A branch has changed size.  Make a hole for it.
+void Thumb2Assembler::MakeHoleForBranch(uint32_t location, uint32_t delta) {
+  // Move the contents of the buffer using: Move(newposition, oldposition)
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Move(location + delta, location);
+}
+
+
+void Thumb2Assembler::Bind(Label* label) {
+  CHECK(!label->IsBound());
+  uint32_t bound_pc = buffer_.Size();
+  std::vector<Branch*> changed_branches;
+
+  while (label->IsLinked()) {
+    uint16_t position = label->Position();                  // Branch id for linked branch.
+    Branch* branch = GetBranch(position);                   // Get the branch at this id.
+    bool changed = branch->Resolve(bound_pc);               // Branch can be resolved now.
+    uint32_t branch_location = branch->GetLocation();
+    uint16_t next = buffer_.Load<uint16_t>(branch_location);       // Get next in chain.
+    if (changed) {
+      MakeHoleForBranch(branch->GetLocation(), 2);
+      if (branch->IsCompareAndBranch()) {
+        // A cbz/cbnz instruction has changed size.  There is no valid encoding for
+        // a 32 bit cbz/cbnz so we need to change this to an instruction pair:
+        // cmp rn, #0
+        // b<eq|ne> target
+        bool n = branch->GetType() == Branch::kCompareAndBranchNonZero;
+        Condition cond = n ? NE : EQ;
+        branch->Move(2);      // Move the branch forward by 2 bytes.
+        branch->ResetTypeAndCondition(Branch::kConditional, cond);
+        branch->ResetSize(Branch::k16Bit);
+
+        // Now add a compare instruction in the place the branch was.
+        int16_t cmp = B13 | B11 | static_cast<int16_t>(branch->GetRegister()) << 8;
+        buffer_.Store<int16_t>(branch_location, cmp);
+
+        // Since have moved made a hole in the code we need to reload the
+        // current pc.
+        bound_pc = buffer_.Size();
+
+        // Now resolve the newly added branch.
+        changed = branch->Resolve(bound_pc);
+        if (changed) {
+          MakeHoleForBranch(branch->GetLocation(), 2);
+          changed_branches.push_back(branch);
+        }
+      } else {
+        changed_branches.push_back(branch);
+      }
+    }
+    label->position_ = next;                                // Move to next.
+  }
+  label->BindTo(bound_pc);
+
+  // Now relocate any changed branches.  Do this until there are no more changes.
+  std::vector<Branch*> branches_to_process = changed_branches;
+  while (branches_to_process.size() != 0) {
+    changed_branches.clear();
+    for (auto& changed_branch : branches_to_process) {
+      for (auto& branch : branches_) {
+        bool changed = branch->Relocate(changed_branch->GetLocation(), 2);
+        if (changed) {
+          changed_branches.push_back(branch);
+        }
+      }
+      branches_to_process = changed_branches;
+    }
+  }
+}
+
+
+void Thumb2Assembler::EmitBranches() {
+  for (auto& branch : branches_) {
+    branch->Emit(&buffer_);
+  }
+}
+
+
+void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond) {
+  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+}
+
+
+int32_t Thumb2Assembler::EncodeBranchOffset(int32_t offset, int32_t inst) {
+  // The offset is off by 4 due to the way the ARM CPUs read PC.
+  offset -= 4;
+  offset >>= 1;
+
+  uint32_t value = 0;
+  // There are two different encodings depending on the value of bit 12.  In one case
+  // intermediate values are calculated using the sign bit.
+  if ((inst & B12) == B12) {
+    // 25 bits of offset.
+    uint32_t signbit = (offset >> 31) & 0x1;
+    uint32_t i1 = (offset >> 22) & 0x1;
+    uint32_t i2 = (offset >> 21) & 0x1;
+    uint32_t imm10 = (offset >> 11) & 0x03ff;
+    uint32_t imm11 = offset & 0x07ff;
+    uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+    uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+    value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+                      imm11;
+    // Remove the offset from the current encoding.
+    inst &= ~(0x3ff << 16 | 0x7ff);
+  } else {
+    uint32_t signbit = (offset >> 31) & 0x1;
+    uint32_t imm6 = (offset >> 11) & 0x03f;
+    uint32_t imm11 = offset & 0x07ff;
+    uint32_t j1 = (offset >> 19) & 1;
+    uint32_t j2 = (offset >> 17) & 1;
+    value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm6 << 16) |
+        imm11;
+    // Remove the offset from the current encoding.
+    inst &= ~(0x3f << 16 | 0x7ff);
+  }
+  // Mask out offset bits in current instruction.
+  inst &= ~(B26 | B13 | B11);
+  inst |= value;
+  return inst;
+}
+
+
+int Thumb2Assembler::DecodeBranchOffset(int32_t instr) {
+  int32_t imm32;
+  if ((instr & B12) == B12) {
+    uint32_t S = (instr >> 26) & 1;
+    uint32_t J2 = (instr >> 11) & 1;
+    uint32_t J1 = (instr >> 13) & 1;
+    uint32_t imm10 = (instr >> 16) & 0x3FF;
+    uint32_t imm11 = instr & 0x7FF;
+
+    uint32_t I1 = ~(J1 ^ S) & 1;
+    uint32_t I2 = ~(J2 ^ S) & 1;
+    imm32 = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
+    imm32 = (imm32 << 8) >> 8;  // sign extend 24 bit immediate.
+  } else {
+    uint32_t S = (instr >> 26) & 1;
+    uint32_t J2 = (instr >> 11) & 1;
+    uint32_t J1 = (instr >> 13) & 1;
+    uint32_t imm6 = (instr >> 16) & 0x3F;
+    uint32_t imm11 = instr & 0x7FF;
+
+    imm32 = (S << 20) | (J2 << 19) | (J1 << 18) | (imm6 << 12) | (imm11 << 1);
+    imm32 = (imm32 << 11) >> 11;  // sign extend 21 bit immediate.
+  }
+  imm32 += 4;
+  return imm32;
+}
+
+
+void Thumb2Assembler::AddConstant(Register rd, int32_t value, Condition cond) {
+  AddConstant(rd, rd, value, cond);
+}
+
+
+void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value,
+                                  Condition cond) {
+  if (value == 0) {
+    if (rd != rn) {
+      mov(rd, ShifterOperand(rn), cond);
+    }
+    return;
+  }
+  // We prefer to select the shorter code sequence rather than selecting add for
+  // positive values and sub for negatives ones, which would slightly improve
+  // the readability of generated code for some constants.
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldThumb(rd, rn, ADD, value, &shifter_op)) {
+    add(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldThumb(rd, rn, SUB, -value, &shifter_op)) {
+    sub(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      add(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      sub(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      add(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Thumb2Assembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                                          Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldThumb(rd, rn, ADD, value, &shifter_op)) {
+    adds(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldThumb(rd, rn, ADD, -value, &shifter_op)) {
+    subs(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      adds(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      subs(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      adds(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldThumb(rd, R0, MOV, value, &shifter_op)) {
+    mov(rd, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldThumb(rd, R0, MVN, ~value, &shifter_op)) {
+    mvn(rd, shifter_op, cond);
+  } else {
+    movw(rd, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(rd, value_high, cond);
+    }
+  }
+}
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetThumb.
+void Thumb2Assembler::LoadFromOffset(LoadOperandType type,
+                                     Register reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldLoadOffsetThumb(type, offset)) {
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetThumb(type, offset));
+  switch (type) {
+    case kLoadSignedByte:
+      ldrsb(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedByte:
+      ldrb(reg, Address(base, offset), cond);
+      break;
+    case kLoadSignedHalfword:
+      ldrsh(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedHalfword:
+      ldrh(reg, Address(base, offset), cond);
+      break;
+    case kLoadWord:
+      ldr(reg, Address(base, offset), cond);
+      break;
+    case kLoadWordPair:
+      ldrd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
+void Thumb2Assembler::LoadSFromOffset(SRegister reg,
+                                      Register base,
+                                      int32_t offset,
+                                      Condition cond) {
+  if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
+  vldrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
+void Thumb2Assembler::LoadDFromOffset(DRegister reg,
+                                      Register base,
+                                      int32_t offset,
+                                      Condition cond) {
+  if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
+  vldrd(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetThumb.
+void Thumb2Assembler::StoreToOffset(StoreOperandType type,
+                                    Register reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldStoreOffsetThumb(type, offset)) {
+    CHECK(reg != IP);
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetThumb(type, offset));
+  switch (type) {
+    case kStoreByte:
+      strb(reg, Address(base, offset), cond);
+      break;
+    case kStoreHalfword:
+      strh(reg, Address(base, offset), cond);
+      break;
+    case kStoreWord:
+      str(reg, Address(base, offset), cond);
+      break;
+    case kStoreWordPair:
+      strd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetThumb, as expected by JIT::GuardedStoreToOffset.
+void Thumb2Assembler::StoreSToOffset(SRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
+  vstrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetThumb, as expected by JIT::GuardedStoreSToOffset.
+void Thumb2Assembler::StoreDToOffset(DRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
+  vstrd(reg, Address(base, offset), cond);
+}
+
+
+void Thumb2Assembler::MemoryBarrier(ManagedRegister mscratch) {
+  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
+#if ANDROID_SMP != 0
+  int32_t encoding = 0xf3bf8f5f;  // dmb in T1 encoding.
+  Emit32(encoding);
+#endif
+}
+
+
+void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) {
+  cbz(r, label);
+}
+
+
+void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
+  cbnz(r, label);
+}
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
new file mode 100644
index 0000000..60b9384
--- /dev/null
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -0,0 +1,685 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
+#define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
+
+#include <vector>
+
+#include "base/logging.h"
+#include "constants_arm.h"
+#include "utils/arm/managed_register_arm.h"
+#include "utils/arm/assembler_arm.h"
+#include "offsets.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+
+class Thumb2Assembler FINAL : public ArmAssembler {
+ public:
+  Thumb2Assembler() : force_32bit_(false), it_cond_index_(kNoItCondition), next_condition_(AL) {
+  }
+
+  virtual ~Thumb2Assembler() {
+    for (auto& branch : branches_) {
+      delete branch;
+    }
+  }
+
+  bool IsThumb() const OVERRIDE {
+    return true;
+  }
+
+  bool IsForced32Bit() const {
+    return force_32bit_;
+  }
+
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
+    EmitBranches();
+    Assembler::FinalizeInstructions(region);
+  }
+
+  // Data-processing instructions.
+  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  // Miscellaneous data-processing instructions.
+  void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+  void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+
+  // Multiply instructions.
+  void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void mla(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void mls(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+             Condition cond = AL) OVERRIDE;
+
+  void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+
+  // Load/store instructions.
+  void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+  void stm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+
+  void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
+  void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;
+
+  void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL);
+  void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL);
+
+
+  // Miscellaneous instructions.
+  void clrex(Condition cond = AL) OVERRIDE;
+  void nop(Condition cond = AL) OVERRIDE;
+
+  void bkpt(uint16_t imm16) OVERRIDE;
+  void svc(uint32_t imm24) OVERRIDE;
+
+  // If-then
+  void it(Condition firstcond, ItState i1 = kItOmitted,
+        ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE;
+
+  void cbz(Register rn, Label* target) OVERRIDE;
+  void cbnz(Register rn, Label* target) OVERRIDE;
+
+  // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
+  void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
+  void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
+  void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  // Returns false if the immediate cannot be encoded.
+  bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
+  bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;
+
+  void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+
+  void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
+  void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
+  void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
+
+  void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+
+  // Branch instructions.
+  void b(Label* label, Condition cond = AL);
+  void bl(Label* label, Condition cond = AL);
+  void blx(Label* label);
+  void blx(Register rm, Condition cond = AL) OVERRIDE;
+  void bx(Register rm, Condition cond = AL) OVERRIDE;
+
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Rrx(Register rd, Register rm, Condition cond = AL);
+
+  void Push(Register rd, Condition cond = AL) OVERRIDE;
+  void Pop(Register rd, Condition cond = AL) OVERRIDE;
+
+  void PushList(RegList regs, Condition cond = AL) OVERRIDE;
+  void PopList(RegList regs, Condition cond = AL) OVERRIDE;
+
+  void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+
+  void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
+  void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
+
+  // Macros.
+  // Add signed constant value to rd. May clobber IP.
+  void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void AddConstant(Register rd, Register rn, int32_t value,
+                   Condition cond = AL) OVERRIDE;
+  void AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                           Condition cond = AL) OVERRIDE;
+  void AddConstantWithCarry(Register rd, Register rn, int32_t value,
+                            Condition cond = AL) {}
+
+  // Load and Store. May clobber IP.
+  void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {}
+  void LoadDImmediate(DRegister dd, double value,
+                      Register scratch, Condition cond = AL) {}
+  void MarkExceptionHandler(Label* label) OVERRIDE;
+  void LoadFromOffset(LoadOperandType type,
+                      Register reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void StoreToOffset(StoreOperandType type,
+                     Register reg,
+                     Register base,
+                     int32_t offset,
+                     Condition cond = AL) OVERRIDE;
+  void LoadSFromOffset(SRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreSToOffset(SRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void LoadDFromOffset(DRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreDToOffset(DRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+
+
+  static bool IsInstructionForExceptionHandling(uword pc);
+
+  // Emit data (e.g. encoded instruction or immediate) to the.
+  // instruction stream.
+  void Emit32(int32_t value);     // Emit a 32 bit instruction in thumb format.
+  void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
+  void Bind(Label* label) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+  // Force the assembler to generate 32 bit instructions.
+  void Force32Bit() {
+    force_32bit_ = true;
+  }
+
+ private:
+  // Emit a single 32 or 16 bit data processing instruction.
+  void EmitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  // Must the instruction be 32 bits or can it possibly be encoded
+  // in 16 bits?
+  bool Is32BitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  // Emit a 32 bit data processing instruction.
+  void Emit32BitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  // Emit a 16 bit data processing instruction.
+  void Emit16BitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  void Emit16BitAddSub(Condition cond,
+                       Opcode opcode,
+                       int set_cc,
+                       Register rn,
+                       Register rd,
+                       const ShifterOperand& so);
+
+  uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n);
+
+  void EmitLoadStore(Condition cond,
+                 bool load,
+                 bool byte,
+                 bool half,
+                 bool is_signed,
+                 Register rd,
+                 const Address& ad);
+
+  void EmitMemOpAddressMode3(Condition cond,
+                             int32_t mode,
+                             Register rd,
+                             const Address& ad);
+
+  void EmitMultiMemOp(Condition cond,
+                      BlockAddressMode am,
+                      bool load,
+                      Register base,
+                      RegList regs);
+
+  void EmitMulOp(Condition cond,
+                 int32_t opcode,
+                 Register rd,
+                 Register rn,
+                 Register rm,
+                 Register rs);
+
+  void EmitVFPsss(Condition cond,
+                  int32_t opcode,
+                  SRegister sd,
+                  SRegister sn,
+                  SRegister sm);
+
+  void EmitVFPddd(Condition cond,
+                  int32_t opcode,
+                  DRegister dd,
+                  DRegister dn,
+                  DRegister dm);
+
+  void EmitVFPsd(Condition cond,
+                 int32_t opcode,
+                 SRegister sd,
+                 DRegister dm);
+
+  void EmitVFPds(Condition cond,
+                 int32_t opcode,
+                 DRegister dd,
+                 SRegister sm);
+
+  void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
+
+  void EmitBranch(Condition cond, Label* label, bool link, bool x);
+  static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
+  static int DecodeBranchOffset(int32_t inst);
+  int32_t EncodeTstOffset(int offset, int32_t inst);
+  int DecodeTstOffset(int32_t inst);
+
+  bool IsLowRegister(Register r) {
+    return r < R8;
+  }
+
+  bool IsHighRegister(Register r) {
+     return r >= R8;
+  }
+
+  bool force_32bit_;      // Force the assembler to use 32 bit thumb2 instructions.
+
+  // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
+  Condition it_conditions_[4];
+  uint8_t it_cond_index_;
+  Condition next_condition_;
+
+  void SetItCondition(ItState s, Condition cond, uint8_t index);
+
+  void CheckCondition(Condition cond) {
+    CHECK_EQ(cond, next_condition_);
+
+    // Move to the next condition if there is one.
+    if (it_cond_index_ < 3) {
+      ++it_cond_index_;
+      next_condition_ = it_conditions_[it_cond_index_];
+    } else {
+      next_condition_ = AL;
+    }
+  }
+
+  void CheckConditionLastIt(Condition cond) {
+    if (it_cond_index_ < 3) {
+      // Check that the next condition is AL.  This means that the
+      // current condition is the last in the IT block.
+      CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL);
+    }
+    CheckCondition(cond);
+  }
+
+  // Branches.
+  //
+  // The thumb2 architecture allows branches to be either 16 or 32 bit instructions.  This
+  // depends on both the type of branch and the offset to which it is branching.  When
+  // generating code for branches we don't know the size before hand (if the branch is
+  // going forward, because we haven't seen the target address yet), so we need to assume
+  // that it is going to be one of 16 or 32 bits.  When we know the target (the label is 'bound')
+  // we can determine the actual size of the branch.  However, if we had guessed wrong before
+  // we knew the target there will be no room in the instruction sequence for the new
+  // instruction (assume that we never decrease the size of a branch).
+  //
+  // To handle this, we keep a record of every branch in the program.  The actual instruction
+  // encoding for these is delayed until we know the final size of every branch.  When we
+  // bind a label to a branch (we then know the target address) we determine if the branch
+  // has changed size.  If it has we need to move all the instructions in the buffer after
+  // the branch point forward by the change in size of the branch.  This will create a gap
+  // in the code big enough for the new branch encoding.  However, since we have moved
+  // a chunk of code we need to relocate the branches in that code to their new address.
+  //
+  // Creating a hole in the code for the new branch encoding might cause another branch that was
+  // 16 bits to become 32 bits, so we need to find this in another pass.
+  //
+  // We also need to deal with a cbz/cbnz instruction that becomes too big for its offset
+  // range.  We do this by converting it to two instructions:
+  //     cmp Rn, #0
+  //     b<cond> target
+  // But we also need to handle the case where the conditional branch is out of range and
+  // becomes a 32 bit conditional branch.
+  //
+  // All branches have a 'branch id' which is a 16 bit unsigned number used to identify
+  // the branch.  Unresolved labels use the branch id to link to the next unresolved branch.
+
+  class Branch {
+   public:
+    // Branch type.
+    enum Type {
+      kUnconditional,             // B.
+      kConditional,               // B<cond>.
+      kCompareAndBranchZero,      // cbz.
+      kCompareAndBranchNonZero,   // cbnz.
+      kUnconditionalLink,         // BL.
+      kUnconditionalLinkX,        // BLX.
+      kUnconditionalX             // BX.
+    };
+
+    // Calculated size of branch instruction based on type and offset.
+    enum Size {
+      k16Bit,
+      k32Bit
+    };
+
+    // Unresolved branch possibly with a condition.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Condition cond = AL) :
+        assembler_(assembler), type_(type), location_(location),
+        target_(kUnresolved),
+        cond_(cond), rn_(R0) {
+      CHECK(!IsCompareAndBranch());
+      size_ = CalculateSize();
+    }
+
+    // Unresolved compare-and-branch instruction with a register.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Register rn) :
+        assembler_(assembler), type_(type), location_(location),
+        target_(kUnresolved), cond_(AL), rn_(rn) {
+      CHECK(IsCompareAndBranch());
+      size_ = CalculateSize();
+    }
+
+    // Resolved branch (can't be compare-and-branch) with a target and possibly a condition.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, uint32_t target,
+           Condition cond = AL) :
+           assembler_(assembler), type_(type), location_(location),
+           target_(target), cond_(cond), rn_(R0) {
+      CHECK(!IsCompareAndBranch());
+      // Resolved branch.
+      size_ = CalculateSize();
+    }
+
+    bool IsCompareAndBranch() const {
+      return type_ == kCompareAndBranchNonZero || type_ == kCompareAndBranchZero;
+    }
+
+    // Resolve a branch when the target is known.  If this causes the
+    // size of the branch to change return true.  Otherwise return false.
+    bool Resolve(uint32_t target) {
+      target_ = target;
+      Size newsize = CalculateSize();
+      if (size_ != newsize) {
+        size_ = newsize;
+        return true;
+      }
+      return false;
+    }
+
+    // Move a cbz/cbnz branch.  This is always forward.
+    void Move(int32_t delta) {
+      CHECK(IsCompareAndBranch());
+      CHECK_GT(delta, 0);
+      location_ += delta;
+      target_ += delta;
+    }
+
+    // Relocate a branch by a given delta.  This changed the location and
+    // target if they need to be changed.  It also recalculates the
+    // size of the branch instruction.  It returns true if the branch
+    // has changed size.
+    bool Relocate(uint32_t oldlocation, int32_t delta) {
+      if (location_ > oldlocation) {
+        location_ += delta;
+      }
+      if (target_ != kUnresolved) {
+        if (target_ > oldlocation) {
+          target_ += delta;
+        }
+      } else {
+        return false;       // Don't know the size yet.
+      }
+
+      // Calculate the new size.
+      Size newsize = CalculateSize();
+      if (size_ != newsize) {
+        size_ = newsize;
+        return true;
+      }
+      return false;
+    }
+
+    Size GetSize() const {
+      return size_;
+    }
+
+    Type GetType() const {
+      return type_;
+    }
+
+    uint32_t GetLocation() const {
+      return location_;
+    }
+
+    // Emit the branch instruction into the assembler buffer.  This does the
+    // encoding into the thumb instruction.
+    void Emit(AssemblerBuffer* buffer) const;
+
+    // Reset the type and condition to those given.  This used for
+    // cbz/cbnz instructions when they are converted to cmp/b<cond>
+    void ResetTypeAndCondition(Type type, Condition cond) {
+      CHECK(IsCompareAndBranch());
+      CHECK(cond == EQ || cond == NE);
+      type_ = type;
+      cond_ = cond;
+    }
+
+    Register GetRegister() const {
+      return rn_;
+    }
+
+    void ResetSize(Size size) {
+      size_ = size;
+    }
+
+   private:
+    // Calculate the size of the branch instruction based on its type and offset.
+    Size CalculateSize() const {
+      if (target_ == kUnresolved) {
+        if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) {
+          return k32Bit;
+        }
+        return k16Bit;
+      }
+      int32_t delta = target_ - location_ - 4;
+      if (delta < 0) {
+        delta = -delta;
+      }
+      switch (type_) {
+        case kUnconditional:
+          if (assembler_->IsForced32Bit() || delta >= (1 << 11)) {
+            return k32Bit;
+          } else {
+            return k16Bit;
+          }
+        case kConditional:
+          if (assembler_->IsForced32Bit() || delta >= (1 << 8)) {
+            return k32Bit;
+          } else {
+            return k16Bit;
+          }
+        case kCompareAndBranchZero:
+        case kCompareAndBranchNonZero:
+          if (delta >= (1 << 7)) {
+            return k32Bit;      // Will cause this branch to become invalid.
+          }
+          return k16Bit;
+
+        case kUnconditionalX:
+        case kUnconditionalLinkX:
+          return k16Bit;
+        case kUnconditionalLink:
+          return k32Bit;
+      }
+      LOG(FATAL) << "Cannot reach";
+      return k16Bit;
+    }
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
+    const Thumb2Assembler* assembler_;
+    Type type_;
+    uint32_t location_;     // Offset into assembler buffer in bytes.
+    uint32_t target_;       // Offset into assembler buffer in bytes.
+    Size size_;
+    Condition cond_;
+    const Register rn_;
+  };
+
+  std::vector<Branch*> branches_;
+
+  // Add a resolved branch and return its size.
+  Branch::Size AddBranch(Branch::Type type, uint32_t location, uint32_t target,
+                         Condition cond = AL) {
+    branches_.push_back(new Branch(this, type, location, target, cond));
+    return branches_[branches_.size()-1]->GetSize();
+  }
+
+  // Add a compare and branch (with a register) and return its id.
+  uint16_t AddBranch(Branch::Type type, uint32_t location, Register rn) {
+    branches_.push_back(new Branch(this, type, location, rn));
+    return branches_.size() - 1;
+  }
+
+  // Add an unresolved branch and return its id.
+  uint16_t AddBranch(Branch::Type type, uint32_t location, Condition cond = AL) {
+    branches_.push_back(new Branch(this, type, location, cond));
+    return branches_.size() - 1;
+  }
+
+  Branch* GetBranch(uint16_t branchid) {
+    if (branchid >= branches_.size()) {
+      return nullptr;
+    }
+    return branches_[branchid];
+  }
+
+  void EmitBranches();
+  void MakeHoleForBranch(uint32_t location, uint32_t size);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h
index 058f945..3e4cd43 100644
--- a/compiler/utils/arm/constants_arm.h
+++ b/compiler/utils/arm/constants_arm.h
@@ -155,7 +155,8 @@
   LSR = 1,  // Logical shift right
   ASR = 2,  // Arithmetic shift right
   ROR = 3,  // Rotate right
-  kMaxShift = 4
+  RRX = 4,  // Rotate right with extend.
+  kMaxShift
 };
 
 
@@ -210,7 +211,6 @@
   kBranchOffsetMask = 0x00ffffff
 };
 
-
 // Size (in bytes) of registers.
 const int kRegisterSize = 4;
 
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
new file mode 100644
index 0000000..2d70b7d
--- /dev/null
+++ b/compiler/utils/array_ref.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARRAY_REF_H_
+#define ART_COMPILER_UTILS_ARRAY_REF_H_
+
+#include <type_traits>
+#include <vector>
+
+#include "base/logging.h"
+
+namespace art {
+
+/**
+ * @brief A container that references an array.
+ *
+ * @details The template class ArrayRef provides a container that references
+ * an external array. This external array must remain alive while the ArrayRef
+ * object is in use. The external array may be a std::vector<>-backed storage
+ * or any other contiguous chunk of memory but that memory must remain valid,
+ * i.e. the std::vector<> must not be resized for example.
+ *
+ * Except for copy/assign and insert/erase/capacity functions, the interface
+ * is essentially the same as std::vector<>. Since we don't want to throw
+ * exceptions, at() is also excluded.
+ */
+template <typename T>
+class ArrayRef {
+ private:
+  struct tag { };
+
+ public:
+  typedef T value_type;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T* iterator;
+  typedef const T* const_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef ptrdiff_t difference_type;
+  typedef size_t size_type;
+
+  // Constructors.
+
+  constexpr ArrayRef()
+      : array_(nullptr), size_(0u) {
+  }
+
+  template <size_t size>
+  constexpr ArrayRef(T (&array)[size])
+    : array_(array), size_(size) {
+  }
+
+  template <typename U, size_t size>
+  constexpr ArrayRef(U (&array)[size],
+                     typename std::enable_if<std::is_same<T, const U>::value, tag>::type t = tag())
+    : array_(array), size_(size) {
+  }
+
+  constexpr ArrayRef(T* array, size_t size)
+      : array_(array), size_(size) {
+  }
+
+  template <typename U>
+  constexpr ArrayRef(U* array, size_t size,
+                     typename std::enable_if<std::is_same<T, const U>::value, tag>::type t = tag())
+      : array_(array), size_(size) {
+  }
+
+  explicit ArrayRef(std::vector<T>& v)
+      : array_(v.data()), size_(v.size()) {
+  }
+
+  template <typename U>
+  ArrayRef(const std::vector<U>& v,
+           typename std::enable_if<std::is_same<T, const U>::value, tag>::tag t = tag())
+      : array_(v.data()), size_(v.size()) {
+  }
+
+  // Assignment operators.
+
+  ArrayRef& operator=(const ArrayRef& other) {
+    array_ = other.array_;
+    size_ = other.size_;
+    return *this;
+  }
+
+  template <typename U>
+  typename std::enable_if<std::is_same<T, const U>::value, ArrayRef>::type&
+  operator=(const ArrayRef<U>& other) {
+    return *this = ArrayRef(other);
+  }
+
+  // Destructor.
+  ~ArrayRef() = default;
+
+  // Iterators.
+  iterator begin() { return array_; }
+  const_iterator begin() const { return array_; }
+  const_iterator cbegin() const { return array_; }
+  iterator end() { return array_ + size_; }
+  const_iterator end() const { return array_ + size_; }
+  const_iterator cend() const { return array_ + size_; }
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+  const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
+
+  // Size.
+  size_type size() const { return size_; }
+  bool empty() const { return size() == 0u; }
+
+  // Element access. NOTE: Not providing at().
+
+  reference operator[](size_type n) {
+    DCHECK_LT(n, size_);
+    return array_[n];
+  }
+
+  const_reference operator[](size_type n) const {
+    DCHECK_LT(n, size_);
+    return array_[n];
+  }
+
+  reference front() {
+    DCHECK_NE(size_, 0u);
+    return array_[0];
+  }
+
+  const_reference front() const {
+    DCHECK_NE(size_, 0u);
+    return array_[0];
+  }
+
+  reference back() {
+    DCHECK_NE(size_, 0u);
+    return array_[size_ - 1u];
+  }
+
+  const_reference back() const {
+    DCHECK_NE(size_, 0u);
+    return array_[size_ - 1u];
+  }
+
+  value_type* data() { return array_; }
+  const value_type* data() const { return array_; }
+
+ private:
+  T* array_;
+  size_t size_;
+};
+
+}  // namespace art
+
+
+#endif  // ART_COMPILER_UTILS_ARRAY_REF_H_
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 26bdceb..68b784a 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -19,7 +19,8 @@
 #include <algorithm>
 #include <vector>
 
-#include "arm/assembler_arm.h"
+#include "arm/assembler_arm32.h"
+#include "arm/assembler_thumb2.h"
 #include "arm64/assembler_arm64.h"
 #include "mips/assembler_mips.h"
 #include "x86/assembler_x86.h"
@@ -106,8 +107,9 @@
 Assembler* Assembler::Create(InstructionSet instruction_set) {
   switch (instruction_set) {
     case kArm:
+      return new arm::Arm32Assembler();
     case kThumb2:
-      return new arm::ArmAssembler();
+      return new arm::Thumb2Assembler();
     case kArm64:
       return new arm64::Arm64Assembler();
     case kMips:
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 19239e1..f72f5e5 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -38,6 +38,8 @@
 
 namespace arm {
   class ArmAssembler;
+  class Arm32Assembler;
+  class Thumb2Assembler;
 }
 namespace arm64 {
   class Arm64Assembler;
@@ -87,7 +89,7 @@
 
   int LinkPosition() const {
     CHECK(IsLinked());
-    return position_ - kWordSize;
+    return position_ - kPointerSize;
   }
 
   bool IsBound() const { return position_ < 0; }
@@ -114,6 +116,8 @@
   }
 
   friend class arm::ArmAssembler;
+  friend class arm::Arm32Assembler;
+  friend class arm::Thumb2Assembler;
   friend class mips::MipsAssembler;
   friend class x86::X86Assembler;
   friend class x86_64::X86_64Assembler;
@@ -189,6 +193,15 @@
     *reinterpret_cast<T*>(contents_ + position) = value;
   }
 
+  void Move(size_t newposition, size_t oldposition) {
+    CHECK(HasEnsuredCapacity());
+    // Move the contents of the buffer from oldposition to
+    // newposition by nbytes.
+    size_t nbytes = Size() - oldposition;
+    memmove(contents_ + newposition, contents_ + oldposition, nbytes);
+    cursor_ += newposition - oldposition;
+  }
+
   // Emit a fixup at the current location.
   void EmitFixup(AssemblerFixup* fixup) {
     fixup->set_previous(fixup_);
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
new file mode 100644
index 0000000..55fbed1
--- /dev/null
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -0,0 +1,1227 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+
+#include "gtest/gtest.h"
+#include "utils/arm/assembler_thumb2.h"
+#include "base/hex_dump.h"
+#include "common_runtime_test.h"
+
+namespace art {
+namespace arm {
+
+// Include results file (generated manually)
+#include "assembler_thumb_test_expected.cc.inc"
+
+#ifndef HAVE_ANDROID_OS
+static constexpr bool kPrintResults = false;
+#endif
+
+void SetAndroidData() {
+  const char* data = getenv("ANDROID_DATA");
+  if (data == nullptr) {
+    setenv("ANDROID_DATA", "/tmp", 1);
+  }
+}
+
+std::string GetAndroidToolsDir() {
+  std::string root;
+  const char* android_build_top = getenv("ANDROID_BUILD_TOP");
+  if (android_build_top != nullptr) {
+    root += android_build_top;
+  } else {
+    // Not set by build server, so default to current directory
+    char* cwd = getcwd(nullptr, 0);
+    setenv("ANDROID_BUILD_TOP", cwd, 1);
+    root += cwd;
+    free(cwd);
+  }
+
+  // Look for "prebuilts"
+  std::string toolsdir = root;
+  struct stat st;
+  while (toolsdir != "") {
+    std::string prebuilts = toolsdir + "/prebuilts";
+    if (stat(prebuilts.c_str(), &st) == 0) {
+       // Found prebuilts.
+       toolsdir += "/prebuilts/gcc/linux-x86/arm";
+       break;
+    }
+    // Not present, move up one dir.
+    size_t slash = toolsdir.rfind('/');
+    if (slash == std::string::npos) {
+      toolsdir = "";
+    } else {
+      toolsdir = toolsdir.substr(0, slash-1);
+    }
+  }
+  bool statok = stat(toolsdir.c_str(), &st) == 0;
+  if (!statok) {
+    return "";      // Use path.
+  }
+
+  DIR* dir = opendir(toolsdir.c_str());
+  if (dir == nullptr) {
+    return "";      // Use path.
+  }
+
+  struct dirent* entry;
+  std::string founddir;
+  double maxversion  = 0;
+
+  // Find the latest version of the arm-eabi tools (biggest version number).
+  // Suffix on toolsdir will be something like "arm-eabi-4.8"
+  while ((entry = readdir(dir)) != nullptr) {
+    std::string subdir = toolsdir + std::string("/") + std::string(entry->d_name);
+    size_t eabi = subdir.find("arm-eabi-");
+    if (eabi != std::string::npos) {
+      std::string suffix = subdir.substr(eabi + sizeof("arm-eabi-"));
+      double version = strtod(suffix.c_str(), nullptr);
+      if (version > maxversion) {
+        maxversion = version;
+        founddir = subdir;
+      }
+    }
+  }
+  closedir(dir);
+  bool found = founddir != "";
+  if (!found) {
+    return "";      // Use path.
+  }
+
+  return founddir + "/bin/";
+}
+
+void dump(std::vector<uint8_t>& code, const char* testname) {
+  // This will only work on the host.  There is no as, objcopy or objdump on the
+  // device.
+#ifndef HAVE_ANDROID_OS
+  static bool results_ok = false;
+  static std::string toolsdir;
+
+  if (!results_ok) {
+    setup_results();
+    toolsdir = GetAndroidToolsDir();
+    SetAndroidData();
+    results_ok = true;
+  }
+
+  ScratchFile file;
+
+  const char* filename = file.GetFilename().c_str();
+
+  std::ofstream out(filename);
+  if (out) {
+    out << ".section \".text\"\n";
+    out << ".syntax unified\n";
+    out << ".arch armv7-a\n";
+    out << ".thumb\n";
+    out << ".thumb_func\n";
+    out << ".type " << testname << ", #function\n";
+    out << ".global " << testname << "\n";
+    out << testname << ":\n";
+    out << ".fnstart\n";
+
+    for (uint32_t i = 0 ; i < code.size(); ++i) {
+      out << ".byte " << (static_cast<int>(code[i]) & 0xff) << "\n";
+    }
+    out << ".fnend\n";
+    out << ".size " << testname << ", .-" << testname << "\n";
+  }
+  out.close();
+
+  char cmd[256];
+
+  // Assemble the .S
+  snprintf(cmd, sizeof(cmd), "%sarm-eabi-as %s -o %s.o", toolsdir.c_str(), filename, filename);
+  system(cmd);
+
+  // Remove the $d symbols to prevent the disassembler dumping the instructions
+  // as .word
+  snprintf(cmd, sizeof(cmd), "%sarm-eabi-objcopy -N '$d' %s.o %s.oo", toolsdir.c_str(),
+    filename, filename);
+  system(cmd);
+
+  // Disassemble.
+
+  snprintf(cmd, sizeof(cmd), "%sarm-eabi-objdump -d %s.oo | grep '^  *[0-9a-f][0-9a-f]*:'",
+    toolsdir.c_str(), filename);
+  if (kPrintResults) {
+    // Print the results only, don't check. This is used to generate new output for inserting
+    // into the .inc file.
+    system(cmd);
+  } else {
+    // Check the results match the appropriate results in the .inc file.
+    FILE *fp = popen(cmd, "r");
+    ASSERT_TRUE(fp != nullptr);
+
+    std::map<std::string, const char**>::iterator results = test_results.find(testname);
+    ASSERT_NE(results, test_results.end());
+
+    uint32_t lineindex = 0;
+
+    while (!feof(fp)) {
+      char testline[256];
+      char *s = fgets(testline, sizeof(testline), fp);
+      if (s == nullptr) {
+        break;
+      }
+      ASSERT_EQ(strcmp(results->second[lineindex], testline), 0);
+      ++lineindex;
+    }
+    // Check that we are at the end.
+    ASSERT_TRUE(results->second[lineindex] == nullptr);
+    fclose(fp);
+  }
+
+  char buf[FILENAME_MAX];
+  snprintf(buf, sizeof(buf), "%s.o", filename);
+  unlink(buf);
+
+  snprintf(buf, sizeof(buf), "%s.oo", filename);
+  unlink(buf);
+#endif
+}
+
+#define __ assembler->
+
+TEST(Thumb2AssemblerTest, SimpleMov) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mov(R0, ShifterOperand(R1));
+  __ mov(R8, ShifterOperand(R9));
+
+  __ mov(R0, ShifterOperand(1));
+  __ mov(R8, ShifterOperand(9));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleMov");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, SimpleMov32) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+  assembler->Force32Bit();
+
+  __ mov(R0, ShifterOperand(R1));
+  __ mov(R8, ShifterOperand(R9));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleMov32");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, SimpleMovAdd) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mov(R0, ShifterOperand(R1));
+  __ add(R0, R1, ShifterOperand(R2));
+  __ add(R0, R1, ShifterOperand());
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleMovAdd");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, DataProcessingRegister) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mov(R0, ShifterOperand(R1));
+  __ mvn(R0, ShifterOperand(R1));
+
+  // 32 bit variants.
+  __ add(R0, R1, ShifterOperand(R2));
+  __ sub(R0, R1, ShifterOperand(R2));
+  __ and_(R0, R1, ShifterOperand(R2));
+  __ orr(R0, R1, ShifterOperand(R2));
+  __ eor(R0, R1, ShifterOperand(R2));
+  __ bic(R0, R1, ShifterOperand(R2));
+  __ adc(R0, R1, ShifterOperand(R2));
+  __ sbc(R0, R1, ShifterOperand(R2));
+  __ rsb(R0, R1, ShifterOperand(R2));
+
+  // 16 bit variants.
+  __ add(R0, R1, ShifterOperand());
+  __ sub(R0, R1, ShifterOperand());
+  __ and_(R0, R1, ShifterOperand());
+  __ orr(R0, R1, ShifterOperand());
+  __ eor(R0, R1, ShifterOperand());
+  __ bic(R0, R1, ShifterOperand());
+  __ adc(R0, R1, ShifterOperand());
+  __ sbc(R0, R1, ShifterOperand());
+  __ rsb(R0, R1, ShifterOperand());
+
+  __ tst(R0, ShifterOperand(R1));
+  __ teq(R0, ShifterOperand(R1));
+  __ cmp(R0, ShifterOperand(R1));
+  __ cmn(R0, ShifterOperand(R1));
+
+  __ movs(R0, ShifterOperand(R1));
+  __ mvns(R0, ShifterOperand(R1));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingRegister");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, DataProcessingImmediate) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mov(R0, ShifterOperand(0x55));
+  __ mvn(R0, ShifterOperand(0x55));
+  __ add(R0, R1, ShifterOperand(0x55));
+  __ sub(R0, R1, ShifterOperand(0x55));
+  __ and_(R0, R1, ShifterOperand(0x55));
+  __ orr(R0, R1, ShifterOperand(0x55));
+  __ eor(R0, R1, ShifterOperand(0x55));
+  __ bic(R0, R1, ShifterOperand(0x55));
+  __ adc(R0, R1, ShifterOperand(0x55));
+  __ sbc(R0, R1, ShifterOperand(0x55));
+  __ rsb(R0, R1, ShifterOperand(0x55));
+
+  __ tst(R0, ShifterOperand(0x55));
+  __ teq(R0, ShifterOperand(0x55));
+  __ cmp(R0, ShifterOperand(0x55));
+  __ cmn(R0, ShifterOperand(0x55));
+
+  __ add(R0, R1, ShifterOperand(5));
+  __ sub(R0, R1, ShifterOperand(5));
+
+  __ movs(R0, ShifterOperand(0x55));
+  __ mvns(R0, ShifterOperand(0x55));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingImmediate");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mov(R0, ShifterOperand(0x550055));
+  __ mvn(R0, ShifterOperand(0x550055));
+  __ add(R0, R1, ShifterOperand(0x550055));
+  __ sub(R0, R1, ShifterOperand(0x550055));
+  __ and_(R0, R1, ShifterOperand(0x550055));
+  __ orr(R0, R1, ShifterOperand(0x550055));
+  __ eor(R0, R1, ShifterOperand(0x550055));
+  __ bic(R0, R1, ShifterOperand(0x550055));
+  __ adc(R0, R1, ShifterOperand(0x550055));
+  __ sbc(R0, R1, ShifterOperand(0x550055));
+  __ rsb(R0, R1, ShifterOperand(0x550055));
+
+  __ tst(R0, ShifterOperand(0x550055));
+  __ teq(R0, ShifterOperand(0x550055));
+  __ cmp(R0, ShifterOperand(0x550055));
+  __ cmn(R0, ShifterOperand(0x550055));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingModifiedImmediate");
+  delete assembler;
+}
+
+
+TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mov(R0, ShifterOperand(0x550055));
+  __ mov(R0, ShifterOperand(0x55005500));
+  __ mov(R0, ShifterOperand(0x55555555));
+  __ mov(R0, ShifterOperand(0xd5000000));       // rotated to first position
+  __ mov(R0, ShifterOperand(0x6a000000));       // rotated to second position
+  __ mov(R0, ShifterOperand(0x350));            // rotated to 2nd last position
+  __ mov(R0, ShifterOperand(0x1a8));            // rotated to last position
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingModifiedImmediates");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mov(R3, ShifterOperand(R4, LSL, 4));
+  __ mov(R3, ShifterOperand(R4, LSR, 5));
+  __ mov(R3, ShifterOperand(R4, ASR, 6));
+  __ mov(R3, ShifterOperand(R4, ROR, 7));
+  __ mov(R3, ShifterOperand(R4, ROR));
+
+  // 32 bit variants.
+  __ mov(R8, ShifterOperand(R4, LSL, 4));
+  __ mov(R8, ShifterOperand(R4, LSR, 5));
+  __ mov(R8, ShifterOperand(R4, ASR, 6));
+  __ mov(R8, ShifterOperand(R4, ROR, 7));
+  __ mov(R8, ShifterOperand(R4, RRX));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingShiftedRegister");
+  delete assembler;
+}
+
+
+TEST(Thumb2AssemblerTest, BasicLoad) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ ldr(R3, Address(R4, 24));
+  __ ldrb(R3, Address(R4, 24));
+  __ ldrh(R3, Address(R4, 24));
+  __ ldrsb(R3, Address(R4, 24));
+  __ ldrsh(R3, Address(R4, 24));
+
+  __ ldr(R3, Address(SP, 24));
+
+  // 32 bit variants
+  __ ldr(R8, Address(R4, 24));
+  __ ldrb(R8, Address(R4, 24));
+  __ ldrh(R8, Address(R4, 24));
+  __ ldrsb(R8, Address(R4, 24));
+  __ ldrsh(R8, Address(R4, 24));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "BasicLoad");
+  delete assembler;
+}
+
+
+TEST(Thumb2AssemblerTest, BasicStore) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ str(R3, Address(R4, 24));
+  __ strb(R3, Address(R4, 24));
+  __ strh(R3, Address(R4, 24));
+
+  __ str(R3, Address(SP, 24));
+
+  // 32 bit variants.
+  __ str(R8, Address(R4, 24));
+  __ strb(R8, Address(R4, 24));
+  __ strh(R8, Address(R4, 24));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "BasicStore");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, ComplexLoad) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ ldr(R3, Address(R4, 24, Address::Mode::Offset));
+  __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ ldr(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ ldr(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ ldr(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ ldr(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  __ ldrb(R3, Address(R4, 24, Address::Mode::Offset));
+  __ ldrb(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ ldrb(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ ldrb(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ ldrb(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ ldrb(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  __ ldrh(R3, Address(R4, 24, Address::Mode::Offset));
+  __ ldrh(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ ldrh(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ ldrh(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ ldrh(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ ldrh(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  __ ldrsb(R3, Address(R4, 24, Address::Mode::Offset));
+  __ ldrsb(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ ldrsb(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ ldrsb(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ ldrsb(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ ldrsb(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  __ ldrsh(R3, Address(R4, 24, Address::Mode::Offset));
+  __ ldrsh(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ ldrsh(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ ldrsh(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "ComplexLoad");
+  delete assembler;
+}
+
+
+TEST(Thumb2AssemblerTest, ComplexStore) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ str(R3, Address(R4, 24, Address::Mode::Offset));
+  __ str(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ str(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ str(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ str(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ str(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  __ strb(R3, Address(R4, 24, Address::Mode::Offset));
+  __ strb(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ strb(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ strb(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ strb(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ strb(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  __ strh(R3, Address(R4, 24, Address::Mode::Offset));
+  __ strh(R3, Address(R4, 24, Address::Mode::PreIndex));
+  __ strh(R3, Address(R4, 24, Address::Mode::PostIndex));
+  __ strh(R3, Address(R4, 24, Address::Mode::NegOffset));
+  __ strh(R3, Address(R4, 24, Address::Mode::NegPreIndex));
+  __ strh(R3, Address(R4, 24, Address::Mode::NegPostIndex));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "ComplexStore");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, NegativeLoadStore) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ ldr(R3, Address(R4, -24, Address::Mode::Offset));
+  __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ ldr(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ ldr(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ ldr(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ ldr(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  __ ldrb(R3, Address(R4, -24, Address::Mode::Offset));
+  __ ldrb(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ ldrb(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ ldrb(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ ldrb(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ ldrb(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  __ ldrh(R3, Address(R4, -24, Address::Mode::Offset));
+  __ ldrh(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ ldrh(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ ldrh(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ ldrh(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ ldrh(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  __ ldrsb(R3, Address(R4, -24, Address::Mode::Offset));
+  __ ldrsb(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ ldrsb(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ ldrsb(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ ldrsb(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ ldrsb(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  __ ldrsh(R3, Address(R4, -24, Address::Mode::Offset));
+  __ ldrsh(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ ldrsh(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ ldrsh(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ ldrsh(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ ldrsh(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  __ str(R3, Address(R4, -24, Address::Mode::Offset));
+  __ str(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ str(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ str(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ str(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ str(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  __ strb(R3, Address(R4, -24, Address::Mode::Offset));
+  __ strb(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ strb(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ strb(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ strb(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ strb(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  __ strh(R3, Address(R4, -24, Address::Mode::Offset));
+  __ strh(R3, Address(R4, -24, Address::Mode::PreIndex));
+  __ strh(R3, Address(R4, -24, Address::Mode::PostIndex));
+  __ strh(R3, Address(R4, -24, Address::Mode::NegOffset));
+  __ strh(R3, Address(R4, -24, Address::Mode::NegPreIndex));
+  __ strh(R3, Address(R4, -24, Address::Mode::NegPostIndex));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "NegativeLoadStore");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ strd(R2, Address(R0, 24, Address::Mode::Offset));
+  __ ldrd(R2, Address(R0, 24, Address::Mode::Offset));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleLoadStoreDual");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ strd(R2, Address(R0, 24, Address::Mode::Offset));
+  __ strd(R2, Address(R0, 24, Address::Mode::PreIndex));
+  __ strd(R2, Address(R0, 24, Address::Mode::PostIndex));
+  __ strd(R2, Address(R0, 24, Address::Mode::NegOffset));
+  __ strd(R2, Address(R0, 24, Address::Mode::NegPreIndex));
+  __ strd(R2, Address(R0, 24, Address::Mode::NegPostIndex));
+
+  __ ldrd(R2, Address(R0, 24, Address::Mode::Offset));
+  __ ldrd(R2, Address(R0, 24, Address::Mode::PreIndex));
+  __ ldrd(R2, Address(R0, 24, Address::Mode::PostIndex));
+  __ ldrd(R2, Address(R0, 24, Address::Mode::NegOffset));
+  __ ldrd(R2, Address(R0, 24, Address::Mode::NegPreIndex));
+  __ ldrd(R2, Address(R0, 24, Address::Mode::NegPostIndex));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "ComplexLoadStoreDual");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ strd(R2, Address(R0, -24, Address::Mode::Offset));
+  __ strd(R2, Address(R0, -24, Address::Mode::PreIndex));
+  __ strd(R2, Address(R0, -24, Address::Mode::PostIndex));
+  __ strd(R2, Address(R0, -24, Address::Mode::NegOffset));
+  __ strd(R2, Address(R0, -24, Address::Mode::NegPreIndex));
+  __ strd(R2, Address(R0, -24, Address::Mode::NegPostIndex));
+
+  __ ldrd(R2, Address(R0, -24, Address::Mode::Offset));
+  __ ldrd(R2, Address(R0, -24, Address::Mode::PreIndex));
+  __ ldrd(R2, Address(R0, -24, Address::Mode::PostIndex));
+  __ ldrd(R2, Address(R0, -24, Address::Mode::NegOffset));
+  __ ldrd(R2, Address(R0, -24, Address::Mode::NegPreIndex));
+  __ ldrd(R2, Address(R0, -24, Address::Mode::NegPostIndex));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "NegativeLoadStoreDual");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, SimpleBranch) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  __ mov(R0, ShifterOperand(2));
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(1));
+  __ b(&l1);
+  Label l2;
+  __ b(&l2);
+  __ mov(R1, ShifterOperand(2));
+  __ Bind(&l2);
+  __ mov(R0, ShifterOperand(3));
+
+  Label l3;
+  __ mov(R0, ShifterOperand(2));
+  __ Bind(&l3);
+  __ mov(R1, ShifterOperand(1));
+  __ b(&l3, EQ);
+
+  Label l4;
+  __ b(&l4, EQ);
+  __ mov(R1, ShifterOperand(2));
+  __ Bind(&l4);
+  __ mov(R0, ShifterOperand(3));
+
+  // 2 linked labels.
+  Label l5;
+  __ b(&l5);
+  __ mov(R1, ShifterOperand(4));
+  __ b(&l5);
+  __ mov(R1, ShifterOperand(5));
+  __ Bind(&l5);
+  __ mov(R0, ShifterOperand(6));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleBranch");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, LongBranch) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+  assembler->Force32Bit();
+  // 32 bit branches.
+  Label l1;
+  __ mov(R0, ShifterOperand(2));
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(1));
+  __ b(&l1);
+
+  Label l2;
+  __ b(&l2);
+  __ mov(R1, ShifterOperand(2));
+  __ Bind(&l2);
+  __ mov(R0, ShifterOperand(3));
+
+  Label l3;
+  __ mov(R0, ShifterOperand(2));
+  __ Bind(&l3);
+  __ mov(R1, ShifterOperand(1));
+  __ b(&l3, EQ);
+
+  Label l4;
+  __ b(&l4, EQ);
+  __ mov(R1, ShifterOperand(2));
+  __ Bind(&l4);
+  __ mov(R0, ShifterOperand(3));
+
+  // 2 linked labels.
+  Label l5;
+  __ b(&l5);
+  __ mov(R1, ShifterOperand(4));
+  __ b(&l5);
+  __ mov(R1, ShifterOperand(5));
+  __ Bind(&l5);
+  __ mov(R0, ShifterOperand(6));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LongBranch");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, LoadMultiple) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  // 16 bit.
+  __ ldm(DB_W, R4, (1 << R0 | 1 << R3));
+
+  // 32 bit.
+  __ ldm(DB_W, R4, (1 << LR | 1 << R11));
+  __ ldm(DB, R4, (1 << LR | 1 << R11));
+
+  // Single reg is converted to ldr
+  __ ldm(DB_W, R4, (1 << R5));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadMultiple");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, StoreMultiple) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  // 16 bit.
+  __ stm(IA_W, R4, (1 << R0 | 1 << R3));
+
+  // 32 bit.
+  __ stm(IA_W, R4, (1 << LR | 1 << R11));
+  __ stm(IA, R4, (1 << LR | 1 << R11));
+
+  // Single reg is converted to str
+  __ stm(IA_W, R4, (1 << R5));
+  __ stm(IA, R4, (1 << R5));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "StoreMultiple");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, MovWMovT) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ movw(R4, 0);         // 16 bit.
+  __ movw(R4, 0x34);      // 16 bit.
+  __ movw(R9, 0x34);      // 32 bit due to high register.
+  __ movw(R3, 0x1234);    // 32 bit due to large value.
+  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+
+  // Always 32 bit.
+  __ movt(R0, 0);
+  __ movt(R0, 0x1234);
+  __ movt(R1, 0xffff);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "MovWMovT");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, SpecialAddSub) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ add(R2, SP, ShifterOperand(0x50));   // 16 bit.
+  __ add(SP, SP, ShifterOperand(0x50));   // 16 bit.
+  __ add(R8, SP, ShifterOperand(0x50));   // 32 bit.
+
+  __ add(R2, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
+  __ add(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
+
+  __ sub(SP, SP, ShifterOperand(0x50));     // 16 bit
+  __ sub(R0, SP, ShifterOperand(0x50));     // 32 bit
+  __ sub(R8, SP, ShifterOperand(0x50));     // 32 bit.
+
+  __ sub(SP, SP, ShifterOperand(0xf00));   // 32 bit due to imm size
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SpecialAddSub");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, StoreToOffset) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ StoreToOffset(kStoreWord, R2, R4, 12);     // Simple
+  __ StoreToOffset(kStoreWord, R2, R4, 0x2000);     // Offset too big.
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "StoreToOffset");
+  delete assembler;
+}
+
+
+TEST(Thumb2AssemblerTest, IfThen) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ it(EQ);
+  __ mov(R1, ShifterOperand(1), EQ);
+
+  __ it(EQ, kItThen);
+  __ mov(R1, ShifterOperand(1), EQ);
+  __ mov(R2, ShifterOperand(2), EQ);
+
+  __ it(EQ, kItElse);
+  __ mov(R1, ShifterOperand(1), EQ);
+  __ mov(R2, ShifterOperand(2), NE);
+
+  __ it(EQ, kItThen, kItElse);
+  __ mov(R1, ShifterOperand(1), EQ);
+  __ mov(R2, ShifterOperand(2), EQ);
+  __ mov(R3, ShifterOperand(3), NE);
+
+  __ it(EQ, kItElse, kItElse);
+  __ mov(R1, ShifterOperand(1), EQ);
+  __ mov(R2, ShifterOperand(2), NE);
+  __ mov(R3, ShifterOperand(3), NE);
+
+  __ it(EQ, kItThen, kItThen, kItElse);
+  __ mov(R1, ShifterOperand(1), EQ);
+  __ mov(R2, ShifterOperand(2), EQ);
+  __ mov(R3, ShifterOperand(3), EQ);
+  __ mov(R4, ShifterOperand(4), NE);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "IfThen");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, CbzCbnz) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  __ cbz(R2, &l1);
+  __ mov(R1, ShifterOperand(3));
+  __ mov(R2, ShifterOperand(3));
+  __ Bind(&l1);
+  __ mov(R2, ShifterOperand(4));
+
+  Label l2;
+  __ cbnz(R2, &l2);
+  __ mov(R8, ShifterOperand(3));
+  __ mov(R2, ShifterOperand(3));
+  __ Bind(&l2);
+  __ mov(R2, ShifterOperand(4));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CbzCbnz");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, Multiply) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ mul(R0, R1, R0);
+  __ mul(R0, R1, R2);
+  __ mul(R8, R9, R8);
+  __ mul(R8, R9, R10);
+
+  __ mla(R0, R1, R2, R3);
+  __ mla(R8, R9, R8, R9);
+
+  __ mls(R0, R1, R2, R3);
+  __ mls(R8, R9, R8, R9);
+
+  __ umull(R0, R1, R2, R3);
+  __ umull(R8, R9, R10, R11);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Multiply");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, Divide) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ sdiv(R0, R1, R2);
+  __ sdiv(R8, R9, R10);
+
+  __ udiv(R0, R1, R2);
+  __ udiv(R8, R9, R10);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Divide");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, VMov) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ vmovs(S1, 1.0);
+  __ vmovd(D1, 1.0);
+
+  __ vmovs(S1, S2);
+  __ vmovd(D1, D2);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "VMov");
+  delete assembler;
+}
+
+
+TEST(Thumb2AssemblerTest, BasicFloatingPoint) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ vadds(S0, S1, S2);
+  __ vsubs(S0, S1, S2);
+  __ vmuls(S0, S1, S2);
+  __ vmlas(S0, S1, S2);
+  __ vmlss(S0, S1, S2);
+  __ vdivs(S0, S1, S2);
+  __ vabss(S0, S1);
+  __ vnegs(S0, S1);
+  __ vsqrts(S0, S1);
+
+  __ vaddd(D0, D1, D2);
+  __ vsubd(D0, D1, D2);
+  __ vmuld(D0, D1, D2);
+  __ vmlad(D0, D1, D2);
+  __ vmlsd(D0, D1, D2);
+  __ vdivd(D0, D1, D2);
+  __ vabsd(D0, D1);
+  __ vnegd(D0, D1);
+  __ vsqrtd(D0, D1);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "BasicFloatingPoint");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, FloatingPointConversions) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ vcvtsd(S2, D2);
+  __ vcvtds(D2, S2);
+
+  __ vcvtis(S1, S2);
+  __ vcvtsi(S1, S2);
+
+  __ vcvtid(S1, D2);
+  __ vcvtdi(D1, S2);
+
+  __ vcvtus(S1, S2);
+  __ vcvtsu(S1, S2);
+
+  __ vcvtud(S1, D2);
+  __ vcvtdu(D1, S2);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "FloatingPointConversions");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, FloatingPointComparisons) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ vcmps(S0, S1);
+  __ vcmpd(D0, D1);
+
+  __ vcmpsz(S2);
+  __ vcmpdz(D2);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "FloatingPointComparisons");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, Calls) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ blx(LR);
+  __ bx(LR);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Calls");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, Breakpoint) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ bkpt(0);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Breakpoint");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, StrR1) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ str(R1, Address(SP, 68));
+  __ str(R1, Address(SP, 1068));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "StrR1");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, VPushPop) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ vpushs(S2, 4);
+  __ vpushd(D2, 4);
+
+  __ vpops(S2, 4);
+  __ vpopd(D2, 4);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "VPushPop");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, Max16BitBranch) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  __ b(&l1);
+  for (int i = 0 ; i < (1 << 11) ; i += 2) {
+    __ mov(R3, ShifterOperand(i & 0xff));
+  }
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(R2));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Max16BitBranch");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, Branch32) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  __ b(&l1);
+  for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
+    __ mov(R3, ShifterOperand(i & 0xff));
+  }
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(R2));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Branch32");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, CompareAndBranchMax) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  __ cbz(R4, &l1);
+  for (int i = 0 ; i < (1 << 7) ; i += 2) {
+    __ mov(R3, ShifterOperand(i & 0xff));
+  }
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(R2));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranchMax");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  __ cbz(R4, &l1);
+  for (int i = 0 ; i < (1 << 7) + 2 ; i += 2) {
+    __ mov(R3, ShifterOperand(i & 0xff));
+  }
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(R2));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranchRelocation16");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  __ cbz(R4, &l1);
+  for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
+    __ mov(R3, ShifterOperand(i & 0xff));
+  }
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(R2));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranchRelocation32");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, MixedBranch32) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  Label l1;
+  Label l2;
+  __ b(&l1);      // Forwards.
+  __ Bind(&l2);
+
+  // Space to force relocation.
+  for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
+    __ mov(R3, ShifterOperand(i & 0xff));
+  }
+  __ b(&l2);      // Backwards.
+  __ Bind(&l1);
+  __ mov(R1, ShifterOperand(R2));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "MixedBranch32");
+  delete assembler;
+}
+
+#undef __
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
new file mode 100644
index 0000000..c5f2226
--- /dev/null
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -0,0 +1,4788 @@
+const char* SimpleMovResults[] = {
+  "   0:	0008      	movs	r0, r1\n",
+  "   2:	46c8      	mov	r8, r9\n",
+  "   4:	2001      	movs	r0, #1\n",
+  "   6:	f04f 0809 	mov.w	r8, #9\n",
+  nullptr
+};
+const char* SimpleMov32Results[] = {
+  "   0:	ea4f 0001 	mov.w	r0, r1\n",
+  "   4:	ea4f 0809 	mov.w	r8, r9\n",
+  nullptr
+};
+const char* SimpleMovAddResults[] = {
+  "   0:	0008      	movs	r0, r1\n",
+  "   2:	1888      	adds	r0, r1, r2\n",
+  "   4:	1c08      	adds	r0, r1, #0\n",
+  nullptr
+};
+const char* DataProcessingRegisterResults[] = {
+  "   0:	0008      	movs	r0, r1\n",
+  "   2:	43c8      	mvns	r0, r1\n",
+  "   4:	1888      	adds	r0, r1, r2\n",
+  "   6:	1a88      	subs	r0, r1, r2\n",
+  "   8:	ea01 0002 	and.w	r0, r1, r2\n",
+  "   c:	ea41 0002 	orr.w	r0, r1, r2\n",
+  "  10:	ea81 0002 	eor.w	r0, r1, r2\n",
+  "  14:	ea21 0002 	bic.w	r0, r1, r2\n",
+  "  18:	eb41 0002 	adc.w	r0, r1, r2\n",
+  "  1c:	eb61 0002 	sbc.w	r0, r1, r2\n",
+  "  20:	ebc1 0002 	rsb	r0, r1, r2\n",
+  "  24:	1c08      	adds	r0, r1, #0\n",
+  "  26:	1e08      	subs	r0, r1, #0\n",
+  "  28:	4008      	ands	r0, r1\n",
+  "  2a:	4308      	orrs	r0, r1\n",
+  "  2c:	4048      	eors	r0, r1\n",
+  "  2e:	4388      	bics	r0, r1\n",
+  "  30:	4148      	adcs	r0, r1\n",
+  "  32:	4188      	sbcs	r0, r1\n",
+  "  34:	4248      	negs	r0, r1\n",
+  "  36:	4208      	tst	r0, r1\n",
+  "  38:	ea90 0f01 	teq	r0, r1\n",
+  "  3c:	4288      	cmp	r0, r1\n",
+  "  3e:	42c8      	cmn	r0, r1\n",
+  "  40:	0008      	movs	r0, r1\n",
+  "  42:	43c8      	mvns	r0, r1\n",
+  nullptr
+};
+const char* DataProcessingImmediateResults[] = {
+  "   0:	2055      	movs	r0, #85	; 0x55\n",
+  "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
+  "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
+  "   a:	f1a1 0055 	sub.w	r0, r1, #85	; 0x55\n",
+  "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
+  "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
+  "  16:	f081 0055 	eor.w	r0, r1, #85	; 0x55\n",
+  "  1a:	f021 0055 	bic.w	r0, r1, #85	; 0x55\n",
+  "  1e:	f141 0055 	adc.w	r0, r1, #85	; 0x55\n",
+  "  22:	f161 0055 	sbc.w	r0, r1, #85	; 0x55\n",
+  "  26:	f1c1 0055 	rsb	r0, r1, #85	; 0x55\n",
+  "  2a:	f010 0f55 	tst.w	r0, #85	; 0x55\n",
+  "  2e:	f090 0f55 	teq	r0, #85	; 0x55\n",
+  "  32:	2855      	cmp	r0, #85	; 0x55\n",
+  "  34:	f110 0f55 	cmn.w	r0, #85	; 0x55\n",
+  "  38:	1d48      	adds	r0, r1, #5\n",
+  "  3a:	1f48      	subs	r0, r1, #5\n",
+  "  3c:	2055      	movs	r0, #85	; 0x55\n",
+  "  3e:	f07f 0055 	mvns.w	r0, #85	; 0x55\n",
+  nullptr
+};
+const char* DataProcessingModifiedImmediateResults[] = {
+  "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
+  "   4:	f06f 1055 	mvn.w	r0, #5570645	; 0x550055\n",
+  "   8:	f101 1055 	add.w	r0, r1, #5570645	; 0x550055\n",
+  "   c:	f1a1 1055 	sub.w	r0, r1, #5570645	; 0x550055\n",
+  "  10:	f001 1055 	and.w	r0, r1, #5570645	; 0x550055\n",
+  "  14:	f041 1055 	orr.w	r0, r1, #5570645	; 0x550055\n",
+  "  18:	f081 1055 	eor.w	r0, r1, #5570645	; 0x550055\n",
+  "  1c:	f021 1055 	bic.w	r0, r1, #5570645	; 0x550055\n",
+  "  20:	f141 1055 	adc.w	r0, r1, #5570645	; 0x550055\n",
+  "  24:	f161 1055 	sbc.w	r0, r1, #5570645	; 0x550055\n",
+  "  28:	f1c1 1055 	rsb	r0, r1, #5570645	; 0x550055\n",
+  "  2c:	f010 1f55 	tst.w	r0, #5570645	; 0x550055\n",
+  "  30:	f090 1f55 	teq	r0, #5570645	; 0x550055\n",
+  "  34:	f1b0 1f55 	cmp.w	r0, #5570645	; 0x550055\n",
+  "  38:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
+  nullptr
+};
+const char* DataProcessingModifiedImmediatesResults[] = {
+  "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
+  "   4:	f04f 2055 	mov.w	r0, #1426085120	; 0x55005500\n",
+  "   8:	f04f 3055 	mov.w	r0, #1431655765	; 0x55555555\n",
+  "   c:	f04f 4055 	mov.w	r0, #3573547008	; 0xd5000000\n",
+  "  10:	f04f 40d4 	mov.w	r0, #1778384896	; 0x6a000000\n",
+  "  14:	f44f 7054 	mov.w	r0, #848	; 0x350\n",
+  "  18:	f44f 70d4 	mov.w	r0, #424	; 0x1a8\n",
+  nullptr
+};
+const char* DataProcessingShiftedRegisterResults[] = {
+  "   0:	0123      	lsls	r3, r4, #4\n",
+  "   2:	0963      	lsrs	r3, r4, #5\n",
+  "   4:	11a3      	asrs	r3, r4, #6\n",
+  "   6:	ea4f 13f4 	mov.w	r3, r4, ror #7\n",
+  "   a:	41e3      	rors	r3, r4\n",
+  "   c:	0128      	lsls	r0, r5, #4\n",
+  "   e:	0968      	lsrs	r0, r5, #5\n",
+  "  10:	11a8      	asrs	r0, r5, #6\n",
+  "  12:	ea4f 18f4 	mov.w	r8, r4, ror #7\n",
+  "  16:	ea4f 0834 	mov.w	r8, r4, rrx\n",
+  nullptr
+};
+const char* BasicLoadResults[] = {
+  "   0:	69a3      	ldr	r3, [r4, #24]\n",
+  "   2:	7e23      	ldrb	r3, [r4, #24]\n",
+  "   4:	8b23      	ldrh	r3, [r4, #24]\n",
+  "   6:	f994 3018 	ldrsb.w	r3, [r4, #24]\n",
+  "   a:	f9b4 3018 	ldrsh.w	r3, [r4, #24]\n",
+  "   e:	9b06      	ldr	r3, [sp, #24]\n",
+  "  10:	f8d4 8018 	ldr.w	r8, [r4, #24]\n",
+  "  14:	f894 8018 	ldrb.w	r8, [r4, #24]\n",
+  "  18:	f8b4 8018 	ldrh.w	r8, [r4, #24]\n",
+  "  1c:	f994 8018 	ldrsb.w	r8, [r4, #24]\n",
+  "  20:	f9b4 8018 	ldrsh.w	r8, [r4, #24]\n",
+  nullptr
+};
+const char* BasicStoreResults[] = {
+  "   0:	61a3      	str	r3, [r4, #24]\n",
+  "   2:	7623      	strb	r3, [r4, #24]\n",
+  "   4:	8323      	strh	r3, [r4, #24]\n",
+  "   6:	9306      	str	r3, [sp, #24]\n",
+  "   8:	f8c4 8018 	str.w	r8, [r4, #24]\n",
+  "   c:	f884 8018 	strb.w	r8, [r4, #24]\n",
+  "  10:	f8a4 8018 	strh.w	r8, [r4, #24]\n",
+  nullptr
+};
+const char* ComplexLoadResults[] = {
+  "   0:	69a3      	ldr	r3, [r4, #24]\n",
+  "   2:	f854 3f18 	ldr.w	r3, [r4, #24]!\n",
+  "   6:	f854 3b18 	ldr.w	r3, [r4], #24\n",
+  "   a:	f854 3c18 	ldr.w	r3, [r4, #-24]\n",
+  "   e:	f854 3d18 	ldr.w	r3, [r4, #-24]!\n",
+  "  12:	f854 3918 	ldr.w	r3, [r4], #-24\n",
+  "  16:	7e23      	ldrb	r3, [r4, #24]\n",
+  "  18:	f814 3f18 	ldrb.w	r3, [r4, #24]!\n",
+  "  1c:	f814 3b18 	ldrb.w	r3, [r4], #24\n",
+  "  20:	f814 3c18 	ldrb.w	r3, [r4, #-24]\n",
+  "  24:	f814 3d18 	ldrb.w	r3, [r4, #-24]!\n",
+  "  28:	f814 3918 	ldrb.w	r3, [r4], #-24\n",
+  "  2c:	8b23      	ldrh	r3, [r4, #24]\n",
+  "  2e:	f834 3f18 	ldrh.w	r3, [r4, #24]!\n",
+  "  32:	f834 3b18 	ldrh.w	r3, [r4], #24\n",
+  "  36:	f834 3c18 	ldrh.w	r3, [r4, #-24]\n",
+  "  3a:	f834 3d18 	ldrh.w	r3, [r4, #-24]!\n",
+  "  3e:	f834 3918 	ldrh.w	r3, [r4], #-24\n",
+  "  42:	f994 3018 	ldrsb.w	r3, [r4, #24]\n",
+  "  46:	f914 3f18 	ldrsb.w	r3, [r4, #24]!\n",
+  "  4a:	f914 3b18 	ldrsb.w	r3, [r4], #24\n",
+  "  4e:	f914 3c18 	ldrsb.w	r3, [r4, #-24]\n",
+  "  52:	f914 3d18 	ldrsb.w	r3, [r4, #-24]!\n",
+  "  56:	f914 3918 	ldrsb.w	r3, [r4], #-24\n",
+  "  5a:	f9b4 3018 	ldrsh.w	r3, [r4, #24]\n",
+  "  5e:	f934 3f18 	ldrsh.w	r3, [r4, #24]!\n",
+  "  62:	f934 3b18 	ldrsh.w	r3, [r4], #24\n",
+  "  66:	f934 3c18 	ldrsh.w	r3, [r4, #-24]\n",
+  "  6a:	f934 3d18 	ldrsh.w	r3, [r4, #-24]!\n",
+  "  6e:	f934 3918 	ldrsh.w	r3, [r4], #-24\n",
+  nullptr
+};
+const char* ComplexStoreResults[] = {
+  "   0:	61a3      	str	r3, [r4, #24]\n",
+  "   2:	f844 3f18 	str.w	r3, [r4, #24]!\n",
+  "   6:	f844 3b18 	str.w	r3, [r4], #24\n",
+  "   a:	f844 3c18 	str.w	r3, [r4, #-24]\n",
+  "   e:	f844 3d18 	str.w	r3, [r4, #-24]!\n",
+  "  12:	f844 3918 	str.w	r3, [r4], #-24\n",
+  "  16:	7623      	strb	r3, [r4, #24]\n",
+  "  18:	f804 3f18 	strb.w	r3, [r4, #24]!\n",
+  "  1c:	f804 3b18 	strb.w	r3, [r4], #24\n",
+  "  20:	f804 3c18 	strb.w	r3, [r4, #-24]\n",
+  "  24:	f804 3d18 	strb.w	r3, [r4, #-24]!\n",
+  "  28:	f804 3918 	strb.w	r3, [r4], #-24\n",
+  "  2c:	8323      	strh	r3, [r4, #24]\n",
+  "  2e:	f824 3f18 	strh.w	r3, [r4, #24]!\n",
+  "  32:	f824 3b18 	strh.w	r3, [r4], #24\n",
+  "  36:	f824 3c18 	strh.w	r3, [r4, #-24]\n",
+  "  3a:	f824 3d18 	strh.w	r3, [r4, #-24]!\n",
+  "  3e:	f824 3918 	strh.w	r3, [r4], #-24\n",
+  nullptr
+};
+const char* NegativeLoadStoreResults[] = {
+  "   0:	f854 3c18 	ldr.w	r3, [r4, #-24]\n",
+  "   4:	f854 3d18 	ldr.w	r3, [r4, #-24]!\n",
+  "   8:	f854 3918 	ldr.w	r3, [r4], #-24\n",
+  "   c:	f854 3e18 	ldrt	r3, [r4, #24]\n",
+  "  10:	f854 3f18 	ldr.w	r3, [r4, #24]!\n",
+  "  14:	f854 3b18 	ldr.w	r3, [r4], #24\n",
+  "  18:	f814 3c18 	ldrb.w	r3, [r4, #-24]\n",
+  "  1c:	f814 3d18 	ldrb.w	r3, [r4, #-24]!\n",
+  "  20:	f814 3918 	ldrb.w	r3, [r4], #-24\n",
+  "  24:	f814 3e18 	ldrbt	r3, [r4, #24]\n",
+  "  28:	f814 3f18 	ldrb.w	r3, [r4, #24]!\n",
+  "  2c:	f814 3b18 	ldrb.w	r3, [r4], #24\n",
+  "  30:	f834 3c18 	ldrh.w	r3, [r4, #-24]\n",
+  "  34:	f834 3d18 	ldrh.w	r3, [r4, #-24]!\n",
+  "  38:	f834 3918 	ldrh.w	r3, [r4], #-24\n",
+  "  3c:	f834 3e18 	ldrht	r3, [r4, #24]\n",
+  "  40:	f834 3f18 	ldrh.w	r3, [r4, #24]!\n",
+  "  44:	f834 3b18 	ldrh.w	r3, [r4], #24\n",
+  "  48:	f914 3c18 	ldrsb.w	r3, [r4, #-24]\n",
+  "  4c:	f914 3d18 	ldrsb.w	r3, [r4, #-24]!\n",
+  "  50:	f914 3918 	ldrsb.w	r3, [r4], #-24\n",
+  "  54:	f914 3e18 	ldrsbt	r3, [r4, #24]\n",
+  "  58:	f914 3f18 	ldrsb.w	r3, [r4, #24]!\n",
+  "  5c:	f914 3b18 	ldrsb.w	r3, [r4], #24\n",
+  "  60:	f934 3c18 	ldrsh.w	r3, [r4, #-24]\n",
+  "  64:	f934 3d18 	ldrsh.w	r3, [r4, #-24]!\n",
+  "  68:	f934 3918 	ldrsh.w	r3, [r4], #-24\n",
+  "  6c:	f934 3e18 	ldrsht	r3, [r4, #24]\n",
+  "  70:	f934 3f18 	ldrsh.w	r3, [r4, #24]!\n",
+  "  74:	f934 3b18 	ldrsh.w	r3, [r4], #24\n",
+  "  78:	f844 3c18 	str.w	r3, [r4, #-24]\n",
+  "  7c:	f844 3d18 	str.w	r3, [r4, #-24]!\n",
+  "  80:	f844 3918 	str.w	r3, [r4], #-24\n",
+  "  84:	f844 3e18 	strt	r3, [r4, #24]\n",
+  "  88:	f844 3f18 	str.w	r3, [r4, #24]!\n",
+  "  8c:	f844 3b18 	str.w	r3, [r4], #24\n",
+  "  90:	f804 3c18 	strb.w	r3, [r4, #-24]\n",
+  "  94:	f804 3d18 	strb.w	r3, [r4, #-24]!\n",
+  "  98:	f804 3918 	strb.w	r3, [r4], #-24\n",
+  "  9c:	f804 3e18 	strbt	r3, [r4, #24]\n",
+  "  a0:	f804 3f18 	strb.w	r3, [r4, #24]!\n",
+  "  a4:	f804 3b18 	strb.w	r3, [r4], #24\n",
+  "  a8:	f824 3c18 	strh.w	r3, [r4, #-24]\n",
+  "  ac:	f824 3d18 	strh.w	r3, [r4, #-24]!\n",
+  "  b0:	f824 3918 	strh.w	r3, [r4], #-24\n",
+  "  b4:	f824 3e18 	strht	r3, [r4, #24]\n",
+  "  b8:	f824 3f18 	strh.w	r3, [r4, #24]!\n",
+  "  bc:	f824 3b18 	strh.w	r3, [r4], #24\n",
+  nullptr
+};
+const char* SimpleLoadStoreDualResults[] = {
+  "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
+  "   4:	e9d0 2306 	ldrd	r2, r3, [r0, #24]\n",
+  nullptr
+};
+const char* ComplexLoadStoreDualResults[] = {
+  "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
+  "   4:	e9e0 2306 	strd	r2, r3, [r0, #24]!\n",
+  "   8:	e8e0 2306 	strd	r2, r3, [r0], #24\n",
+  "   c:	e940 2306 	strd	r2, r3, [r0, #-24]\n",
+  "  10:	e960 2306 	strd	r2, r3, [r0, #-24]!\n",
+  "  14:	e860 2306 	strd	r2, r3, [r0], #-24\n",
+  "  18:	e9d0 2306 	ldrd	r2, r3, [r0, #24]\n",
+  "  1c:	e9f0 2306 	ldrd	r2, r3, [r0, #24]!\n",
+  "  20:	e8f0 2306 	ldrd	r2, r3, [r0], #24\n",
+  "  24:	e950 2306 	ldrd	r2, r3, [r0, #-24]\n",
+  "  28:	e970 2306 	ldrd	r2, r3, [r0, #-24]!\n",
+  "  2c:	e870 2306 	ldrd	r2, r3, [r0], #-24\n",
+  nullptr
+};
+const char* NegativeLoadStoreDualResults[] = {
+  "   0:	e940 2306 	strd	r2, r3, [r0, #-24]\n",
+  "   4:	e960 2306 	strd	r2, r3, [r0, #-24]!\n",
+  "   8:	e860 2306 	strd	r2, r3, [r0], #-24\n",
+  "   c:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
+  "  10:	e9e0 2306 	strd	r2, r3, [r0, #24]!\n",
+  "  14:	e8e0 2306 	strd	r2, r3, [r0], #24\n",
+  "  18:	e950 2306 	ldrd	r2, r3, [r0, #-24]\n",
+  "  1c:	e970 2306 	ldrd	r2, r3, [r0, #-24]!\n",
+  "  20:	e870 2306 	ldrd	r2, r3, [r0], #-24\n",
+  "  24:	e9d0 2306 	ldrd	r2, r3, [r0, #24]\n",
+  "  28:	e9f0 2306 	ldrd	r2, r3, [r0, #24]!\n",
+  "  2c:	e8f0 2306 	ldrd	r2, r3, [r0], #24\n",
+  nullptr
+};
+const char* SimpleBranchResults[] = {
+  "   0:	2002      	movs	r0, #2\n",
+  "   2:	2101      	movs	r1, #1\n",
+  "   4:	e7fd      	b.n	2 <SimpleBranch+0x2>\n",
+  "   6:	e000      	b.n	a <SimpleBranch+0xa>\n",
+  "   8:	2102      	movs	r1, #2\n",
+  "   a:	2003      	movs	r0, #3\n",
+  "   c:	2002      	movs	r0, #2\n",
+  "   e:	2101      	movs	r1, #1\n",
+  "  10:	d0fd      	beq.n	e <SimpleBranch+0xe>\n",
+  "  12:	d000      	beq.n	16 <SimpleBranch+0x16>\n",
+  "  14:	2102      	movs	r1, #2\n",
+  "  16:	2003      	movs	r0, #3\n",
+  "  18:	e002      	b.n	20 <SimpleBranch+0x20>\n",
+  "  1a:	2104      	movs	r1, #4\n",
+  "  1c:	e000      	b.n	20 <SimpleBranch+0x20>\n",
+  "  1e:	2105      	movs	r1, #5\n",
+  "  20:	2006      	movs	r0, #6\n",
+  nullptr
+};
+const char* LongBranchResults[] = {
+  "   0:	f04f 0002 	mov.w	r0, #2\n",
+  "   4:	f04f 0101 	mov.w	r1, #1\n",
+  "   8:	f7ff bffc 	b.w	4 <LongBranch+0x4>\n",
+  "   c:	f000 b802 	b.w	14 <LongBranch+0x14>\n",
+  "  10:	f04f 0102 	mov.w	r1, #2\n",
+  "  14:	f04f 0003 	mov.w	r0, #3\n",
+  "  18:	f04f 0002 	mov.w	r0, #2\n",
+  "  1c:	f04f 0101 	mov.w	r1, #1\n",
+  "  20:	f43f affc 	beq.w	1c <LongBranch+0x1c>\n",
+  "  24:	f000 8002 	beq.w	2c <LongBranch+0x2c>\n",
+  "  28:	f04f 0102 	mov.w	r1, #2\n",
+  "  2c:	f04f 0003 	mov.w	r0, #3\n",
+  "  30:	f000 b806 	b.w	40 <LongBranch+0x40>\n",
+  "  34:	f04f 0104 	mov.w	r1, #4\n",
+  "  38:	f000 b802 	b.w	40 <LongBranch+0x40>\n",
+  "  3c:	f04f 0105 	mov.w	r1, #5\n",
+  "  40:	f04f 0006 	mov.w	r0, #6\n",
+  nullptr
+};
+const char* LoadMultipleResults[] = {
+  "   0:	cc09      	ldmia	r4!, {r0, r3}\n",
+  "   2:	e934 4800 	ldmdb	r4!, {fp, lr}\n",
+  "   6:	e914 4800 	ldmdb	r4, {fp, lr}\n",
+  "   a:	f854 5b04 	ldr.w	r5, [r4], #4\n",
+  nullptr
+};
+const char* StoreMultipleResults[] = {
+  "   0:	c409      	stmia	r4!, {r0, r3}\n",
+  "   2:	e8a4 4800 	stmia.w	r4!, {fp, lr}\n",
+  "   6:	e884 4800 	stmia.w	r4, {fp, lr}\n",
+  "   a:	f844 5c04 	str.w	r5, [r4, #-4]\n",
+  "   e:	f844 5d04 	str.w	r5, [r4, #-4]!\n",
+  nullptr
+};
+const char* MovWMovTResults[] = {
+  "   0:	2400      	movs	r4, #0\n",
+  "   2:	2434      	movs	r4, #52	; 0x34\n",
+  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  10:	f2c0 0000 	movt	r0, #0\n",
+  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+  nullptr
+};
+const char* SpecialAddSubResults[] = {
+  "   0:	f20d 0250 	addw	r2, sp, #80	; 0x50\n",
+  "   4:	f20d 0d50 	addw	sp, sp, #80	; 0x50\n",
+  "   8:	f20d 0850 	addw	r8, sp, #80	; 0x50\n",
+  "   c:	f60d 7200 	addw	r2, sp, #3840	; 0xf00\n",
+  "  10:	f60d 7d00 	addw	sp, sp, #3840	; 0xf00\n",
+  "  14:	f2ad 0d50 	subw	sp, sp, #80	; 0x50\n",
+  "  18:	f2ad 0050 	subw	r0, sp, #80	; 0x50\n",
+  "  1c:	f2ad 0850 	subw	r8, sp, #80	; 0x50\n",
+  "  20:	f6ad 7d00 	subw	sp, sp, #3840	; 0xf00\n",
+  nullptr
+};
+const char* StoreToOffsetResults[] = {
+  "   0:	60e2      	str	r2, [r4, #12]\n",
+  "   2:	f44f 5c00 	mov.w	ip, #8192	; 0x2000\n",
+  "   6:	44a4      	add	ip, r4\n",
+  "   8:	f8cc 2000 	str.w	r2, [ip]\n",
+  nullptr
+};
+const char* IfThenResults[] = {
+  "   0:	bf08      	it	eq\n",
+  "   2:	2101      	moveq	r1, #1\n",
+  "   4:	bf04      	itt	eq\n",
+  "   6:	2101      	moveq	r1, #1\n",
+  "   8:	2202      	moveq	r2, #2\n",
+  "   a:	bf0c      	ite	eq\n",
+  "   c:	2101      	moveq	r1, #1\n",
+  "   e:	2202      	movne	r2, #2\n",
+  "  10:	bf06      	itte	eq\n",
+  "  12:	2101      	moveq	r1, #1\n",
+  "  14:	2202      	moveq	r2, #2\n",
+  "  16:	2303      	movne	r3, #3\n",
+  "  18:	bf0e      	itee	eq\n",
+  "  1a:	2101      	moveq	r1, #1\n",
+  "  1c:	2202      	movne	r2, #2\n",
+  "  1e:	2303      	movne	r3, #3\n",
+  "  20:	bf03      	ittte	eq\n",
+  "  22:	2101      	moveq	r1, #1\n",
+  "  24:	2202      	moveq	r2, #2\n",
+  "  26:	2303      	moveq	r3, #3\n",
+  "  28:	2404      	movne	r4, #4\n",
+  nullptr
+};
+const char* CbzCbnzResults[] = {
+  "   0:	b10a      	cbz	r2, 6 <CbzCbnz+0x6>\n",
+  "   2:	2103      	movs	r1, #3\n",
+  "   4:	2203      	movs	r2, #3\n",
+  "   6:	2204      	movs	r2, #4\n",
+  "   8:	b912      	cbnz	r2, 10 <CbzCbnz+0x10>\n",
+  "   a:	f04f 0803 	mov.w	r8, #3\n",
+  "   e:	2203      	movs	r2, #3\n",
+  "  10:	2204      	movs	r2, #4\n",
+  nullptr
+};
+const char* MultiplyResults[] = {
+  "   0:	4348      	muls	r0, r1\n",
+  "   2:	fb01 f002 	mul.w	r0, r1, r2\n",
+  "   6:	fb09 f808 	mul.w	r8, r9, r8\n",
+  "   a:	fb09 f80a 	mul.w	r8, r9, sl\n",
+  "   e:	fb01 3002 	mla	r0, r1, r2, r3\n",
+  "  12:	fb09 9808 	mla	r8, r9, r8, r9\n",
+  "  16:	fb01 3012 	mls	r0, r1, r2, r3\n",
+  "  1a:	fb09 9818 	mls	r8, r9, r8, r9\n",
+  "  1e:	fba2 0103 	umull	r0, r1, r2, r3\n",
+  "  22:	fbaa 890b 	umull	r8, r9, sl, fp\n",
+  nullptr
+};
+const char* DivideResults[] = {
+  "   0:	fb91 f0f2 	sdiv	r0, r1, r2\n",
+  "   4:	fb99 f8fa 	sdiv	r8, r9, sl\n",
+  "   8:	fbb1 f0f2 	udiv	r0, r1, r2\n",
+  "   c:	fbb9 f8fa 	udiv	r8, r9, sl\n",
+  nullptr
+};
+const char* VMovResults[] = {
+  "   0:	eef7 0a00 	vmov.f32	s1, #112	; 0x70\n",
+  "   4:	eeb7 1b00 	vmov.f64	d1, #112	; 0x70\n",
+  "   8:	eef0 0a41 	vmov.f32	s1, s2\n",
+  "   c:	eeb0 1b42 	vmov.f64	d1, d2\n",
+  nullptr
+};
+const char* BasicFloatingPointResults[] = {
+  "   0:	ee30 0a81 	vadd.f32	s0, s1, s2\n",
+  "   4:	ee30 0ac1 	vsub.f32	s0, s1, s2\n",
+  "   8:	ee20 0a81 	vmul.f32	s0, s1, s2\n",
+  "   c:	ee00 0a81 	vmla.f32	s0, s1, s2\n",
+  "  10:	ee00 0ac1 	vmls.f32	s0, s1, s2\n",
+  "  14:	ee80 0a81 	vdiv.f32	s0, s1, s2\n",
+  "  18:	eeb0 0ae0 	vabs.f32	s0, s1\n",
+  "  1c:	eeb1 0a60 	vneg.f32	s0, s1\n",
+  "  20:	eeb1 0ae0 	vsqrt.f32	s0, s1\n",
+  "  24:	ee31 0b02 	vadd.f64	d0, d1, d2\n",
+  "  28:	ee31 0b42 	vsub.f64	d0, d1, d2\n",
+  "  2c:	ee21 0b02 	vmul.f64	d0, d1, d2\n",
+  "  30:	ee01 0b02 	vmla.f64	d0, d1, d2\n",
+  "  34:	ee01 0b42 	vmls.f64	d0, d1, d2\n",
+  "  38:	ee81 0b02 	vdiv.f64	d0, d1, d2\n",
+  "  3c:	eeb0 0bc1 	vabs.f64	d0, d1\n",
+  "  40:	eeb1 0b41 	vneg.f64	d0, d1\n",
+  "  44:	eeb1 0bc1 	vsqrt.f64	d0, d1\n",
+  nullptr
+};
+const char* FloatingPointConversionsResults[] = {
+  "   0:	eeb7 1bc2 	vcvt.f32.f64	s2, d2\n",
+  "   4:	eeb7 2ac1 	vcvt.f64.f32	d2, s2\n",
+  "   8:	eefd 0ac1 	vcvt.s32.f32	s1, s2\n",
+  "   c:	eef8 0ac1 	vcvt.f32.s32	s1, s2\n",
+  "  10:	eefd 0bc2 	vcvt.s32.f64	s1, d2\n",
+  "  14:	eeb8 1bc1 	vcvt.f64.s32	d1, s2\n",
+  "  18:	eefc 0ac1 	vcvt.u32.f32	s1, s2\n",
+  "  1c:	eef8 0a41 	vcvt.f32.u32	s1, s2\n",
+  "  20:	eefc 0bc2 	vcvt.u32.f64	s1, d2\n",
+  "  24:	eeb8 1b41 	vcvt.f64.u32	d1, s2\n",
+  nullptr
+};
+const char* FloatingPointComparisonsResults[] = {
+  "   0:	eeb4 0a60 	vcmp.f32	s0, s1\n",
+  "   4:	eeb4 0b41 	vcmp.f64	d0, d1\n",
+  "   8:	eeb5 1a40 	vcmp.f32	s2, #0.0\n",
+  "   c:	eeb5 2b40 	vcmp.f64	d2, #0.0\n",
+  nullptr
+};
+const char* CallsResults[] = {
+  "   0:	47f0      	blx	lr\n",
+  "   2:	4770      	bx	lr\n",
+  nullptr
+};
+const char* BreakpointResults[] = {
+  "   0:	be00      	bkpt	0x0000\n",
+  nullptr
+};
+const char* StrR1Results[] = {
+  "   0:	9111      	str	r1, [sp, #68]	; 0x44\n",
+  "   2:	f8cd 142c 	str.w	r1, [sp, #1068]	; 0x42c\n",
+  nullptr
+};
+const char* VPushPopResults[] = {
+  "   0:	ed2d 1a04 	vpush	{s2-s5}\n",
+  "   4:	ed2d 2b08 	vpush	{d2-d5}\n",
+  "   8:	ecbd 1a04 	vpop	{s2-s5}\n",
+  "   c:	ecbd 2b08 	vpop	{d2-d5}\n",
+  nullptr
+};
+const char* Max16BitBranchResults[] = {
+  "   0:	e3ff      	b.n	802 <Max16BitBranch+0x802>\n",
+  "   2:	2300      	movs	r3, #0\n",
+  "   4:	2302      	movs	r3, #2\n",
+  "   6:	2304      	movs	r3, #4\n",
+  "   8:	2306      	movs	r3, #6\n",
+  "   a:	2308      	movs	r3, #8\n",
+  "   c:	230a      	movs	r3, #10\n",
+  "   e:	230c      	movs	r3, #12\n",
+  "  10:	230e      	movs	r3, #14\n",
+  "  12:	2310      	movs	r3, #16\n",
+  "  14:	2312      	movs	r3, #18\n",
+  "  16:	2314      	movs	r3, #20\n",
+  "  18:	2316      	movs	r3, #22\n",
+  "  1a:	2318      	movs	r3, #24\n",
+  "  1c:	231a      	movs	r3, #26\n",
+  "  1e:	231c      	movs	r3, #28\n",
+  "  20:	231e      	movs	r3, #30\n",
+  "  22:	2320      	movs	r3, #32\n",
+  "  24:	2322      	movs	r3, #34	; 0x22\n",
+  "  26:	2324      	movs	r3, #36	; 0x24\n",
+  "  28:	2326      	movs	r3, #38	; 0x26\n",
+  "  2a:	2328      	movs	r3, #40	; 0x28\n",
+  "  2c:	232a      	movs	r3, #42	; 0x2a\n",
+  "  2e:	232c      	movs	r3, #44	; 0x2c\n",
+  "  30:	232e      	movs	r3, #46	; 0x2e\n",
+  "  32:	2330      	movs	r3, #48	; 0x30\n",
+  "  34:	2332      	movs	r3, #50	; 0x32\n",
+  "  36:	2334      	movs	r3, #52	; 0x34\n",
+  "  38:	2336      	movs	r3, #54	; 0x36\n",
+  "  3a:	2338      	movs	r3, #56	; 0x38\n",
+  "  3c:	233a      	movs	r3, #58	; 0x3a\n",
+  "  3e:	233c      	movs	r3, #60	; 0x3c\n",
+  "  40:	233e      	movs	r3, #62	; 0x3e\n",
+  "  42:	2340      	movs	r3, #64	; 0x40\n",
+  "  44:	2342      	movs	r3, #66	; 0x42\n",
+  "  46:	2344      	movs	r3, #68	; 0x44\n",
+  "  48:	2346      	movs	r3, #70	; 0x46\n",
+  "  4a:	2348      	movs	r3, #72	; 0x48\n",
+  "  4c:	234a      	movs	r3, #74	; 0x4a\n",
+  "  4e:	234c      	movs	r3, #76	; 0x4c\n",
+  "  50:	234e      	movs	r3, #78	; 0x4e\n",
+  "  52:	2350      	movs	r3, #80	; 0x50\n",
+  "  54:	2352      	movs	r3, #82	; 0x52\n",
+  "  56:	2354      	movs	r3, #84	; 0x54\n",
+  "  58:	2356      	movs	r3, #86	; 0x56\n",
+  "  5a:	2358      	movs	r3, #88	; 0x58\n",
+  "  5c:	235a      	movs	r3, #90	; 0x5a\n",
+  "  5e:	235c      	movs	r3, #92	; 0x5c\n",
+  "  60:	235e      	movs	r3, #94	; 0x5e\n",
+  "  62:	2360      	movs	r3, #96	; 0x60\n",
+  "  64:	2362      	movs	r3, #98	; 0x62\n",
+  "  66:	2364      	movs	r3, #100	; 0x64\n",
+  "  68:	2366      	movs	r3, #102	; 0x66\n",
+  "  6a:	2368      	movs	r3, #104	; 0x68\n",
+  "  6c:	236a      	movs	r3, #106	; 0x6a\n",
+  "  6e:	236c      	movs	r3, #108	; 0x6c\n",
+  "  70:	236e      	movs	r3, #110	; 0x6e\n",
+  "  72:	2370      	movs	r3, #112	; 0x70\n",
+  "  74:	2372      	movs	r3, #114	; 0x72\n",
+  "  76:	2374      	movs	r3, #116	; 0x74\n",
+  "  78:	2376      	movs	r3, #118	; 0x76\n",
+  "  7a:	2378      	movs	r3, #120	; 0x78\n",
+  "  7c:	237a      	movs	r3, #122	; 0x7a\n",
+  "  7e:	237c      	movs	r3, #124	; 0x7c\n",
+  "  80:	237e      	movs	r3, #126	; 0x7e\n",
+  "  82:	2380      	movs	r3, #128	; 0x80\n",
+  "  84:	2382      	movs	r3, #130	; 0x82\n",
+  "  86:	2384      	movs	r3, #132	; 0x84\n",
+  "  88:	2386      	movs	r3, #134	; 0x86\n",
+  "  8a:	2388      	movs	r3, #136	; 0x88\n",
+  "  8c:	238a      	movs	r3, #138	; 0x8a\n",
+  "  8e:	238c      	movs	r3, #140	; 0x8c\n",
+  "  90:	238e      	movs	r3, #142	; 0x8e\n",
+  "  92:	2390      	movs	r3, #144	; 0x90\n",
+  "  94:	2392      	movs	r3, #146	; 0x92\n",
+  "  96:	2394      	movs	r3, #148	; 0x94\n",
+  "  98:	2396      	movs	r3, #150	; 0x96\n",
+  "  9a:	2398      	movs	r3, #152	; 0x98\n",
+  "  9c:	239a      	movs	r3, #154	; 0x9a\n",
+  "  9e:	239c      	movs	r3, #156	; 0x9c\n",
+  "  a0:	239e      	movs	r3, #158	; 0x9e\n",
+  "  a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  "  a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  "  a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  "  a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  "  aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  "  ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  "  ae:	23ac      	movs	r3, #172	; 0xac\n",
+  "  b0:	23ae      	movs	r3, #174	; 0xae\n",
+  "  b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  "  b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  "  b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  "  b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  "  ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  "  bc:	23ba      	movs	r3, #186	; 0xba\n",
+  "  be:	23bc      	movs	r3, #188	; 0xbc\n",
+  "  c0:	23be      	movs	r3, #190	; 0xbe\n",
+  "  c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  "  c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  "  c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  "  c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  "  ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  "  cc:	23ca      	movs	r3, #202	; 0xca\n",
+  "  ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  "  d0:	23ce      	movs	r3, #206	; 0xce\n",
+  "  d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  "  d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  "  d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  "  d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  "  da:	23d8      	movs	r3, #216	; 0xd8\n",
+  "  dc:	23da      	movs	r3, #218	; 0xda\n",
+  "  de:	23dc      	movs	r3, #220	; 0xdc\n",
+  "  e0:	23de      	movs	r3, #222	; 0xde\n",
+  "  e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  "  e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  "  e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  "  e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  "  ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  "  ec:	23ea      	movs	r3, #234	; 0xea\n",
+  "  ee:	23ec      	movs	r3, #236	; 0xec\n",
+  "  f0:	23ee      	movs	r3, #238	; 0xee\n",
+  "  f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  "  f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  "  f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  "  f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  "  fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  "  fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  "  fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 100:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 102:	2300      	movs	r3, #0\n",
+  " 104:	2302      	movs	r3, #2\n",
+  " 106:	2304      	movs	r3, #4\n",
+  " 108:	2306      	movs	r3, #6\n",
+  " 10a:	2308      	movs	r3, #8\n",
+  " 10c:	230a      	movs	r3, #10\n",
+  " 10e:	230c      	movs	r3, #12\n",
+  " 110:	230e      	movs	r3, #14\n",
+  " 112:	2310      	movs	r3, #16\n",
+  " 114:	2312      	movs	r3, #18\n",
+  " 116:	2314      	movs	r3, #20\n",
+  " 118:	2316      	movs	r3, #22\n",
+  " 11a:	2318      	movs	r3, #24\n",
+  " 11c:	231a      	movs	r3, #26\n",
+  " 11e:	231c      	movs	r3, #28\n",
+  " 120:	231e      	movs	r3, #30\n",
+  " 122:	2320      	movs	r3, #32\n",
+  " 124:	2322      	movs	r3, #34	; 0x22\n",
+  " 126:	2324      	movs	r3, #36	; 0x24\n",
+  " 128:	2326      	movs	r3, #38	; 0x26\n",
+  " 12a:	2328      	movs	r3, #40	; 0x28\n",
+  " 12c:	232a      	movs	r3, #42	; 0x2a\n",
+  " 12e:	232c      	movs	r3, #44	; 0x2c\n",
+  " 130:	232e      	movs	r3, #46	; 0x2e\n",
+  " 132:	2330      	movs	r3, #48	; 0x30\n",
+  " 134:	2332      	movs	r3, #50	; 0x32\n",
+  " 136:	2334      	movs	r3, #52	; 0x34\n",
+  " 138:	2336      	movs	r3, #54	; 0x36\n",
+  " 13a:	2338      	movs	r3, #56	; 0x38\n",
+  " 13c:	233a      	movs	r3, #58	; 0x3a\n",
+  " 13e:	233c      	movs	r3, #60	; 0x3c\n",
+  " 140:	233e      	movs	r3, #62	; 0x3e\n",
+  " 142:	2340      	movs	r3, #64	; 0x40\n",
+  " 144:	2342      	movs	r3, #66	; 0x42\n",
+  " 146:	2344      	movs	r3, #68	; 0x44\n",
+  " 148:	2346      	movs	r3, #70	; 0x46\n",
+  " 14a:	2348      	movs	r3, #72	; 0x48\n",
+  " 14c:	234a      	movs	r3, #74	; 0x4a\n",
+  " 14e:	234c      	movs	r3, #76	; 0x4c\n",
+  " 150:	234e      	movs	r3, #78	; 0x4e\n",
+  " 152:	2350      	movs	r3, #80	; 0x50\n",
+  " 154:	2352      	movs	r3, #82	; 0x52\n",
+  " 156:	2354      	movs	r3, #84	; 0x54\n",
+  " 158:	2356      	movs	r3, #86	; 0x56\n",
+  " 15a:	2358      	movs	r3, #88	; 0x58\n",
+  " 15c:	235a      	movs	r3, #90	; 0x5a\n",
+  " 15e:	235c      	movs	r3, #92	; 0x5c\n",
+  " 160:	235e      	movs	r3, #94	; 0x5e\n",
+  " 162:	2360      	movs	r3, #96	; 0x60\n",
+  " 164:	2362      	movs	r3, #98	; 0x62\n",
+  " 166:	2364      	movs	r3, #100	; 0x64\n",
+  " 168:	2366      	movs	r3, #102	; 0x66\n",
+  " 16a:	2368      	movs	r3, #104	; 0x68\n",
+  " 16c:	236a      	movs	r3, #106	; 0x6a\n",
+  " 16e:	236c      	movs	r3, #108	; 0x6c\n",
+  " 170:	236e      	movs	r3, #110	; 0x6e\n",
+  " 172:	2370      	movs	r3, #112	; 0x70\n",
+  " 174:	2372      	movs	r3, #114	; 0x72\n",
+  " 176:	2374      	movs	r3, #116	; 0x74\n",
+  " 178:	2376      	movs	r3, #118	; 0x76\n",
+  " 17a:	2378      	movs	r3, #120	; 0x78\n",
+  " 17c:	237a      	movs	r3, #122	; 0x7a\n",
+  " 17e:	237c      	movs	r3, #124	; 0x7c\n",
+  " 180:	237e      	movs	r3, #126	; 0x7e\n",
+  " 182:	2380      	movs	r3, #128	; 0x80\n",
+  " 184:	2382      	movs	r3, #130	; 0x82\n",
+  " 186:	2384      	movs	r3, #132	; 0x84\n",
+  " 188:	2386      	movs	r3, #134	; 0x86\n",
+  " 18a:	2388      	movs	r3, #136	; 0x88\n",
+  " 18c:	238a      	movs	r3, #138	; 0x8a\n",
+  " 18e:	238c      	movs	r3, #140	; 0x8c\n",
+  " 190:	238e      	movs	r3, #142	; 0x8e\n",
+  " 192:	2390      	movs	r3, #144	; 0x90\n",
+  " 194:	2392      	movs	r3, #146	; 0x92\n",
+  " 196:	2394      	movs	r3, #148	; 0x94\n",
+  " 198:	2396      	movs	r3, #150	; 0x96\n",
+  " 19a:	2398      	movs	r3, #152	; 0x98\n",
+  " 19c:	239a      	movs	r3, #154	; 0x9a\n",
+  " 19e:	239c      	movs	r3, #156	; 0x9c\n",
+  " 1a0:	239e      	movs	r3, #158	; 0x9e\n",
+  " 1a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 1a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 1a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 1a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 1aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 1ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 1ae:	23ac      	movs	r3, #172	; 0xac\n",
+  " 1b0:	23ae      	movs	r3, #174	; 0xae\n",
+  " 1b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 1b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 1b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 1b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 1ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 1bc:	23ba      	movs	r3, #186	; 0xba\n",
+  " 1be:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 1c0:	23be      	movs	r3, #190	; 0xbe\n",
+  " 1c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 1c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 1c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 1c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 1ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 1cc:	23ca      	movs	r3, #202	; 0xca\n",
+  " 1ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 1d0:	23ce      	movs	r3, #206	; 0xce\n",
+  " 1d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 1d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 1d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 1d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 1da:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 1dc:	23da      	movs	r3, #218	; 0xda\n",
+  " 1de:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 1e0:	23de      	movs	r3, #222	; 0xde\n",
+  " 1e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 1e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 1e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 1e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 1ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 1ec:	23ea      	movs	r3, #234	; 0xea\n",
+  " 1ee:	23ec      	movs	r3, #236	; 0xec\n",
+  " 1f0:	23ee      	movs	r3, #238	; 0xee\n",
+  " 1f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 1f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 1f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 1f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 1fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 1fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 1fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 200:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 202:	2300      	movs	r3, #0\n",
+  " 204:	2302      	movs	r3, #2\n",
+  " 206:	2304      	movs	r3, #4\n",
+  " 208:	2306      	movs	r3, #6\n",
+  " 20a:	2308      	movs	r3, #8\n",
+  " 20c:	230a      	movs	r3, #10\n",
+  " 20e:	230c      	movs	r3, #12\n",
+  " 210:	230e      	movs	r3, #14\n",
+  " 212:	2310      	movs	r3, #16\n",
+  " 214:	2312      	movs	r3, #18\n",
+  " 216:	2314      	movs	r3, #20\n",
+  " 218:	2316      	movs	r3, #22\n",
+  " 21a:	2318      	movs	r3, #24\n",
+  " 21c:	231a      	movs	r3, #26\n",
+  " 21e:	231c      	movs	r3, #28\n",
+  " 220:	231e      	movs	r3, #30\n",
+  " 222:	2320      	movs	r3, #32\n",
+  " 224:	2322      	movs	r3, #34	; 0x22\n",
+  " 226:	2324      	movs	r3, #36	; 0x24\n",
+  " 228:	2326      	movs	r3, #38	; 0x26\n",
+  " 22a:	2328      	movs	r3, #40	; 0x28\n",
+  " 22c:	232a      	movs	r3, #42	; 0x2a\n",
+  " 22e:	232c      	movs	r3, #44	; 0x2c\n",
+  " 230:	232e      	movs	r3, #46	; 0x2e\n",
+  " 232:	2330      	movs	r3, #48	; 0x30\n",
+  " 234:	2332      	movs	r3, #50	; 0x32\n",
+  " 236:	2334      	movs	r3, #52	; 0x34\n",
+  " 238:	2336      	movs	r3, #54	; 0x36\n",
+  " 23a:	2338      	movs	r3, #56	; 0x38\n",
+  " 23c:	233a      	movs	r3, #58	; 0x3a\n",
+  " 23e:	233c      	movs	r3, #60	; 0x3c\n",
+  " 240:	233e      	movs	r3, #62	; 0x3e\n",
+  " 242:	2340      	movs	r3, #64	; 0x40\n",
+  " 244:	2342      	movs	r3, #66	; 0x42\n",
+  " 246:	2344      	movs	r3, #68	; 0x44\n",
+  " 248:	2346      	movs	r3, #70	; 0x46\n",
+  " 24a:	2348      	movs	r3, #72	; 0x48\n",
+  " 24c:	234a      	movs	r3, #74	; 0x4a\n",
+  " 24e:	234c      	movs	r3, #76	; 0x4c\n",
+  " 250:	234e      	movs	r3, #78	; 0x4e\n",
+  " 252:	2350      	movs	r3, #80	; 0x50\n",
+  " 254:	2352      	movs	r3, #82	; 0x52\n",
+  " 256:	2354      	movs	r3, #84	; 0x54\n",
+  " 258:	2356      	movs	r3, #86	; 0x56\n",
+  " 25a:	2358      	movs	r3, #88	; 0x58\n",
+  " 25c:	235a      	movs	r3, #90	; 0x5a\n",
+  " 25e:	235c      	movs	r3, #92	; 0x5c\n",
+  " 260:	235e      	movs	r3, #94	; 0x5e\n",
+  " 262:	2360      	movs	r3, #96	; 0x60\n",
+  " 264:	2362      	movs	r3, #98	; 0x62\n",
+  " 266:	2364      	movs	r3, #100	; 0x64\n",
+  " 268:	2366      	movs	r3, #102	; 0x66\n",
+  " 26a:	2368      	movs	r3, #104	; 0x68\n",
+  " 26c:	236a      	movs	r3, #106	; 0x6a\n",
+  " 26e:	236c      	movs	r3, #108	; 0x6c\n",
+  " 270:	236e      	movs	r3, #110	; 0x6e\n",
+  " 272:	2370      	movs	r3, #112	; 0x70\n",
+  " 274:	2372      	movs	r3, #114	; 0x72\n",
+  " 276:	2374      	movs	r3, #116	; 0x74\n",
+  " 278:	2376      	movs	r3, #118	; 0x76\n",
+  " 27a:	2378      	movs	r3, #120	; 0x78\n",
+  " 27c:	237a      	movs	r3, #122	; 0x7a\n",
+  " 27e:	237c      	movs	r3, #124	; 0x7c\n",
+  " 280:	237e      	movs	r3, #126	; 0x7e\n",
+  " 282:	2380      	movs	r3, #128	; 0x80\n",
+  " 284:	2382      	movs	r3, #130	; 0x82\n",
+  " 286:	2384      	movs	r3, #132	; 0x84\n",
+  " 288:	2386      	movs	r3, #134	; 0x86\n",
+  " 28a:	2388      	movs	r3, #136	; 0x88\n",
+  " 28c:	238a      	movs	r3, #138	; 0x8a\n",
+  " 28e:	238c      	movs	r3, #140	; 0x8c\n",
+  " 290:	238e      	movs	r3, #142	; 0x8e\n",
+  " 292:	2390      	movs	r3, #144	; 0x90\n",
+  " 294:	2392      	movs	r3, #146	; 0x92\n",
+  " 296:	2394      	movs	r3, #148	; 0x94\n",
+  " 298:	2396      	movs	r3, #150	; 0x96\n",
+  " 29a:	2398      	movs	r3, #152	; 0x98\n",
+  " 29c:	239a      	movs	r3, #154	; 0x9a\n",
+  " 29e:	239c      	movs	r3, #156	; 0x9c\n",
+  " 2a0:	239e      	movs	r3, #158	; 0x9e\n",
+  " 2a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 2a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 2a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 2a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 2aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 2ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 2ae:	23ac      	movs	r3, #172	; 0xac\n",
+  " 2b0:	23ae      	movs	r3, #174	; 0xae\n",
+  " 2b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 2b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 2b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 2b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 2ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 2bc:	23ba      	movs	r3, #186	; 0xba\n",
+  " 2be:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 2c0:	23be      	movs	r3, #190	; 0xbe\n",
+  " 2c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 2c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 2c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 2c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 2ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 2cc:	23ca      	movs	r3, #202	; 0xca\n",
+  " 2ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 2d0:	23ce      	movs	r3, #206	; 0xce\n",
+  " 2d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 2d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 2d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 2d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 2da:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 2dc:	23da      	movs	r3, #218	; 0xda\n",
+  " 2de:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 2e0:	23de      	movs	r3, #222	; 0xde\n",
+  " 2e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 2e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 2e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 2e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 2ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 2ec:	23ea      	movs	r3, #234	; 0xea\n",
+  " 2ee:	23ec      	movs	r3, #236	; 0xec\n",
+  " 2f0:	23ee      	movs	r3, #238	; 0xee\n",
+  " 2f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 2f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 2f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 2f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 2fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 2fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 2fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 300:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 302:	2300      	movs	r3, #0\n",
+  " 304:	2302      	movs	r3, #2\n",
+  " 306:	2304      	movs	r3, #4\n",
+  " 308:	2306      	movs	r3, #6\n",
+  " 30a:	2308      	movs	r3, #8\n",
+  " 30c:	230a      	movs	r3, #10\n",
+  " 30e:	230c      	movs	r3, #12\n",
+  " 310:	230e      	movs	r3, #14\n",
+  " 312:	2310      	movs	r3, #16\n",
+  " 314:	2312      	movs	r3, #18\n",
+  " 316:	2314      	movs	r3, #20\n",
+  " 318:	2316      	movs	r3, #22\n",
+  " 31a:	2318      	movs	r3, #24\n",
+  " 31c:	231a      	movs	r3, #26\n",
+  " 31e:	231c      	movs	r3, #28\n",
+  " 320:	231e      	movs	r3, #30\n",
+  " 322:	2320      	movs	r3, #32\n",
+  " 324:	2322      	movs	r3, #34	; 0x22\n",
+  " 326:	2324      	movs	r3, #36	; 0x24\n",
+  " 328:	2326      	movs	r3, #38	; 0x26\n",
+  " 32a:	2328      	movs	r3, #40	; 0x28\n",
+  " 32c:	232a      	movs	r3, #42	; 0x2a\n",
+  " 32e:	232c      	movs	r3, #44	; 0x2c\n",
+  " 330:	232e      	movs	r3, #46	; 0x2e\n",
+  " 332:	2330      	movs	r3, #48	; 0x30\n",
+  " 334:	2332      	movs	r3, #50	; 0x32\n",
+  " 336:	2334      	movs	r3, #52	; 0x34\n",
+  " 338:	2336      	movs	r3, #54	; 0x36\n",
+  " 33a:	2338      	movs	r3, #56	; 0x38\n",
+  " 33c:	233a      	movs	r3, #58	; 0x3a\n",
+  " 33e:	233c      	movs	r3, #60	; 0x3c\n",
+  " 340:	233e      	movs	r3, #62	; 0x3e\n",
+  " 342:	2340      	movs	r3, #64	; 0x40\n",
+  " 344:	2342      	movs	r3, #66	; 0x42\n",
+  " 346:	2344      	movs	r3, #68	; 0x44\n",
+  " 348:	2346      	movs	r3, #70	; 0x46\n",
+  " 34a:	2348      	movs	r3, #72	; 0x48\n",
+  " 34c:	234a      	movs	r3, #74	; 0x4a\n",
+  " 34e:	234c      	movs	r3, #76	; 0x4c\n",
+  " 350:	234e      	movs	r3, #78	; 0x4e\n",
+  " 352:	2350      	movs	r3, #80	; 0x50\n",
+  " 354:	2352      	movs	r3, #82	; 0x52\n",
+  " 356:	2354      	movs	r3, #84	; 0x54\n",
+  " 358:	2356      	movs	r3, #86	; 0x56\n",
+  " 35a:	2358      	movs	r3, #88	; 0x58\n",
+  " 35c:	235a      	movs	r3, #90	; 0x5a\n",
+  " 35e:	235c      	movs	r3, #92	; 0x5c\n",
+  " 360:	235e      	movs	r3, #94	; 0x5e\n",
+  " 362:	2360      	movs	r3, #96	; 0x60\n",
+  " 364:	2362      	movs	r3, #98	; 0x62\n",
+  " 366:	2364      	movs	r3, #100	; 0x64\n",
+  " 368:	2366      	movs	r3, #102	; 0x66\n",
+  " 36a:	2368      	movs	r3, #104	; 0x68\n",
+  " 36c:	236a      	movs	r3, #106	; 0x6a\n",
+  " 36e:	236c      	movs	r3, #108	; 0x6c\n",
+  " 370:	236e      	movs	r3, #110	; 0x6e\n",
+  " 372:	2370      	movs	r3, #112	; 0x70\n",
+  " 374:	2372      	movs	r3, #114	; 0x72\n",
+  " 376:	2374      	movs	r3, #116	; 0x74\n",
+  " 378:	2376      	movs	r3, #118	; 0x76\n",
+  " 37a:	2378      	movs	r3, #120	; 0x78\n",
+  " 37c:	237a      	movs	r3, #122	; 0x7a\n",
+  " 37e:	237c      	movs	r3, #124	; 0x7c\n",
+  " 380:	237e      	movs	r3, #126	; 0x7e\n",
+  " 382:	2380      	movs	r3, #128	; 0x80\n",
+  " 384:	2382      	movs	r3, #130	; 0x82\n",
+  " 386:	2384      	movs	r3, #132	; 0x84\n",
+  " 388:	2386      	movs	r3, #134	; 0x86\n",
+  " 38a:	2388      	movs	r3, #136	; 0x88\n",
+  " 38c:	238a      	movs	r3, #138	; 0x8a\n",
+  " 38e:	238c      	movs	r3, #140	; 0x8c\n",
+  " 390:	238e      	movs	r3, #142	; 0x8e\n",
+  " 392:	2390      	movs	r3, #144	; 0x90\n",
+  " 394:	2392      	movs	r3, #146	; 0x92\n",
+  " 396:	2394      	movs	r3, #148	; 0x94\n",
+  " 398:	2396      	movs	r3, #150	; 0x96\n",
+  " 39a:	2398      	movs	r3, #152	; 0x98\n",
+  " 39c:	239a      	movs	r3, #154	; 0x9a\n",
+  " 39e:	239c      	movs	r3, #156	; 0x9c\n",
+  " 3a0:	239e      	movs	r3, #158	; 0x9e\n",
+  " 3a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 3a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 3a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 3a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 3aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 3ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 3ae:	23ac      	movs	r3, #172	; 0xac\n",
+  " 3b0:	23ae      	movs	r3, #174	; 0xae\n",
+  " 3b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 3b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 3b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 3b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 3ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 3bc:	23ba      	movs	r3, #186	; 0xba\n",
+  " 3be:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 3c0:	23be      	movs	r3, #190	; 0xbe\n",
+  " 3c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 3c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 3c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 3c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 3ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 3cc:	23ca      	movs	r3, #202	; 0xca\n",
+  " 3ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 3d0:	23ce      	movs	r3, #206	; 0xce\n",
+  " 3d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 3d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 3d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 3d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 3da:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 3dc:	23da      	movs	r3, #218	; 0xda\n",
+  " 3de:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 3e0:	23de      	movs	r3, #222	; 0xde\n",
+  " 3e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 3e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 3e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 3e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 3ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 3ec:	23ea      	movs	r3, #234	; 0xea\n",
+  " 3ee:	23ec      	movs	r3, #236	; 0xec\n",
+  " 3f0:	23ee      	movs	r3, #238	; 0xee\n",
+  " 3f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 3f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 3f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 3f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 3fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 3fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 3fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 400:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 402:	2300      	movs	r3, #0\n",
+  " 404:	2302      	movs	r3, #2\n",
+  " 406:	2304      	movs	r3, #4\n",
+  " 408:	2306      	movs	r3, #6\n",
+  " 40a:	2308      	movs	r3, #8\n",
+  " 40c:	230a      	movs	r3, #10\n",
+  " 40e:	230c      	movs	r3, #12\n",
+  " 410:	230e      	movs	r3, #14\n",
+  " 412:	2310      	movs	r3, #16\n",
+  " 414:	2312      	movs	r3, #18\n",
+  " 416:	2314      	movs	r3, #20\n",
+  " 418:	2316      	movs	r3, #22\n",
+  " 41a:	2318      	movs	r3, #24\n",
+  " 41c:	231a      	movs	r3, #26\n",
+  " 41e:	231c      	movs	r3, #28\n",
+  " 420:	231e      	movs	r3, #30\n",
+  " 422:	2320      	movs	r3, #32\n",
+  " 424:	2322      	movs	r3, #34	; 0x22\n",
+  " 426:	2324      	movs	r3, #36	; 0x24\n",
+  " 428:	2326      	movs	r3, #38	; 0x26\n",
+  " 42a:	2328      	movs	r3, #40	; 0x28\n",
+  " 42c:	232a      	movs	r3, #42	; 0x2a\n",
+  " 42e:	232c      	movs	r3, #44	; 0x2c\n",
+  " 430:	232e      	movs	r3, #46	; 0x2e\n",
+  " 432:	2330      	movs	r3, #48	; 0x30\n",
+  " 434:	2332      	movs	r3, #50	; 0x32\n",
+  " 436:	2334      	movs	r3, #52	; 0x34\n",
+  " 438:	2336      	movs	r3, #54	; 0x36\n",
+  " 43a:	2338      	movs	r3, #56	; 0x38\n",
+  " 43c:	233a      	movs	r3, #58	; 0x3a\n",
+  " 43e:	233c      	movs	r3, #60	; 0x3c\n",
+  " 440:	233e      	movs	r3, #62	; 0x3e\n",
+  " 442:	2340      	movs	r3, #64	; 0x40\n",
+  " 444:	2342      	movs	r3, #66	; 0x42\n",
+  " 446:	2344      	movs	r3, #68	; 0x44\n",
+  " 448:	2346      	movs	r3, #70	; 0x46\n",
+  " 44a:	2348      	movs	r3, #72	; 0x48\n",
+  " 44c:	234a      	movs	r3, #74	; 0x4a\n",
+  " 44e:	234c      	movs	r3, #76	; 0x4c\n",
+  " 450:	234e      	movs	r3, #78	; 0x4e\n",
+  " 452:	2350      	movs	r3, #80	; 0x50\n",
+  " 454:	2352      	movs	r3, #82	; 0x52\n",
+  " 456:	2354      	movs	r3, #84	; 0x54\n",
+  " 458:	2356      	movs	r3, #86	; 0x56\n",
+  " 45a:	2358      	movs	r3, #88	; 0x58\n",
+  " 45c:	235a      	movs	r3, #90	; 0x5a\n",
+  " 45e:	235c      	movs	r3, #92	; 0x5c\n",
+  " 460:	235e      	movs	r3, #94	; 0x5e\n",
+  " 462:	2360      	movs	r3, #96	; 0x60\n",
+  " 464:	2362      	movs	r3, #98	; 0x62\n",
+  " 466:	2364      	movs	r3, #100	; 0x64\n",
+  " 468:	2366      	movs	r3, #102	; 0x66\n",
+  " 46a:	2368      	movs	r3, #104	; 0x68\n",
+  " 46c:	236a      	movs	r3, #106	; 0x6a\n",
+  " 46e:	236c      	movs	r3, #108	; 0x6c\n",
+  " 470:	236e      	movs	r3, #110	; 0x6e\n",
+  " 472:	2370      	movs	r3, #112	; 0x70\n",
+  " 474:	2372      	movs	r3, #114	; 0x72\n",
+  " 476:	2374      	movs	r3, #116	; 0x74\n",
+  " 478:	2376      	movs	r3, #118	; 0x76\n",
+  " 47a:	2378      	movs	r3, #120	; 0x78\n",
+  " 47c:	237a      	movs	r3, #122	; 0x7a\n",
+  " 47e:	237c      	movs	r3, #124	; 0x7c\n",
+  " 480:	237e      	movs	r3, #126	; 0x7e\n",
+  " 482:	2380      	movs	r3, #128	; 0x80\n",
+  " 484:	2382      	movs	r3, #130	; 0x82\n",
+  " 486:	2384      	movs	r3, #132	; 0x84\n",
+  " 488:	2386      	movs	r3, #134	; 0x86\n",
+  " 48a:	2388      	movs	r3, #136	; 0x88\n",
+  " 48c:	238a      	movs	r3, #138	; 0x8a\n",
+  " 48e:	238c      	movs	r3, #140	; 0x8c\n",
+  " 490:	238e      	movs	r3, #142	; 0x8e\n",
+  " 492:	2390      	movs	r3, #144	; 0x90\n",
+  " 494:	2392      	movs	r3, #146	; 0x92\n",
+  " 496:	2394      	movs	r3, #148	; 0x94\n",
+  " 498:	2396      	movs	r3, #150	; 0x96\n",
+  " 49a:	2398      	movs	r3, #152	; 0x98\n",
+  " 49c:	239a      	movs	r3, #154	; 0x9a\n",
+  " 49e:	239c      	movs	r3, #156	; 0x9c\n",
+  " 4a0:	239e      	movs	r3, #158	; 0x9e\n",
+  " 4a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 4a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 4a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 4a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 4aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 4ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 4ae:	23ac      	movs	r3, #172	; 0xac\n",
+  " 4b0:	23ae      	movs	r3, #174	; 0xae\n",
+  " 4b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 4b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 4b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 4b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 4ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 4bc:	23ba      	movs	r3, #186	; 0xba\n",
+  " 4be:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 4c0:	23be      	movs	r3, #190	; 0xbe\n",
+  " 4c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 4c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 4c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 4c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 4ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 4cc:	23ca      	movs	r3, #202	; 0xca\n",
+  " 4ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 4d0:	23ce      	movs	r3, #206	; 0xce\n",
+  " 4d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 4d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 4d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 4d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 4da:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 4dc:	23da      	movs	r3, #218	; 0xda\n",
+  " 4de:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 4e0:	23de      	movs	r3, #222	; 0xde\n",
+  " 4e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 4e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 4e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 4e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 4ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 4ec:	23ea      	movs	r3, #234	; 0xea\n",
+  " 4ee:	23ec      	movs	r3, #236	; 0xec\n",
+  " 4f0:	23ee      	movs	r3, #238	; 0xee\n",
+  " 4f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 4f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 4f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 4f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 4fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 4fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 4fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 500:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 502:	2300      	movs	r3, #0\n",
+  " 504:	2302      	movs	r3, #2\n",
+  " 506:	2304      	movs	r3, #4\n",
+  " 508:	2306      	movs	r3, #6\n",
+  " 50a:	2308      	movs	r3, #8\n",
+  " 50c:	230a      	movs	r3, #10\n",
+  " 50e:	230c      	movs	r3, #12\n",
+  " 510:	230e      	movs	r3, #14\n",
+  " 512:	2310      	movs	r3, #16\n",
+  " 514:	2312      	movs	r3, #18\n",
+  " 516:	2314      	movs	r3, #20\n",
+  " 518:	2316      	movs	r3, #22\n",
+  " 51a:	2318      	movs	r3, #24\n",
+  " 51c:	231a      	movs	r3, #26\n",
+  " 51e:	231c      	movs	r3, #28\n",
+  " 520:	231e      	movs	r3, #30\n",
+  " 522:	2320      	movs	r3, #32\n",
+  " 524:	2322      	movs	r3, #34	; 0x22\n",
+  " 526:	2324      	movs	r3, #36	; 0x24\n",
+  " 528:	2326      	movs	r3, #38	; 0x26\n",
+  " 52a:	2328      	movs	r3, #40	; 0x28\n",
+  " 52c:	232a      	movs	r3, #42	; 0x2a\n",
+  " 52e:	232c      	movs	r3, #44	; 0x2c\n",
+  " 530:	232e      	movs	r3, #46	; 0x2e\n",
+  " 532:	2330      	movs	r3, #48	; 0x30\n",
+  " 534:	2332      	movs	r3, #50	; 0x32\n",
+  " 536:	2334      	movs	r3, #52	; 0x34\n",
+  " 538:	2336      	movs	r3, #54	; 0x36\n",
+  " 53a:	2338      	movs	r3, #56	; 0x38\n",
+  " 53c:	233a      	movs	r3, #58	; 0x3a\n",
+  " 53e:	233c      	movs	r3, #60	; 0x3c\n",
+  " 540:	233e      	movs	r3, #62	; 0x3e\n",
+  " 542:	2340      	movs	r3, #64	; 0x40\n",
+  " 544:	2342      	movs	r3, #66	; 0x42\n",
+  " 546:	2344      	movs	r3, #68	; 0x44\n",
+  " 548:	2346      	movs	r3, #70	; 0x46\n",
+  " 54a:	2348      	movs	r3, #72	; 0x48\n",
+  " 54c:	234a      	movs	r3, #74	; 0x4a\n",
+  " 54e:	234c      	movs	r3, #76	; 0x4c\n",
+  " 550:	234e      	movs	r3, #78	; 0x4e\n",
+  " 552:	2350      	movs	r3, #80	; 0x50\n",
+  " 554:	2352      	movs	r3, #82	; 0x52\n",
+  " 556:	2354      	movs	r3, #84	; 0x54\n",
+  " 558:	2356      	movs	r3, #86	; 0x56\n",
+  " 55a:	2358      	movs	r3, #88	; 0x58\n",
+  " 55c:	235a      	movs	r3, #90	; 0x5a\n",
+  " 55e:	235c      	movs	r3, #92	; 0x5c\n",
+  " 560:	235e      	movs	r3, #94	; 0x5e\n",
+  " 562:	2360      	movs	r3, #96	; 0x60\n",
+  " 564:	2362      	movs	r3, #98	; 0x62\n",
+  " 566:	2364      	movs	r3, #100	; 0x64\n",
+  " 568:	2366      	movs	r3, #102	; 0x66\n",
+  " 56a:	2368      	movs	r3, #104	; 0x68\n",
+  " 56c:	236a      	movs	r3, #106	; 0x6a\n",
+  " 56e:	236c      	movs	r3, #108	; 0x6c\n",
+  " 570:	236e      	movs	r3, #110	; 0x6e\n",
+  " 572:	2370      	movs	r3, #112	; 0x70\n",
+  " 574:	2372      	movs	r3, #114	; 0x72\n",
+  " 576:	2374      	movs	r3, #116	; 0x74\n",
+  " 578:	2376      	movs	r3, #118	; 0x76\n",
+  " 57a:	2378      	movs	r3, #120	; 0x78\n",
+  " 57c:	237a      	movs	r3, #122	; 0x7a\n",
+  " 57e:	237c      	movs	r3, #124	; 0x7c\n",
+  " 580:	237e      	movs	r3, #126	; 0x7e\n",
+  " 582:	2380      	movs	r3, #128	; 0x80\n",
+  " 584:	2382      	movs	r3, #130	; 0x82\n",
+  " 586:	2384      	movs	r3, #132	; 0x84\n",
+  " 588:	2386      	movs	r3, #134	; 0x86\n",
+  " 58a:	2388      	movs	r3, #136	; 0x88\n",
+  " 58c:	238a      	movs	r3, #138	; 0x8a\n",
+  " 58e:	238c      	movs	r3, #140	; 0x8c\n",
+  " 590:	238e      	movs	r3, #142	; 0x8e\n",
+  " 592:	2390      	movs	r3, #144	; 0x90\n",
+  " 594:	2392      	movs	r3, #146	; 0x92\n",
+  " 596:	2394      	movs	r3, #148	; 0x94\n",
+  " 598:	2396      	movs	r3, #150	; 0x96\n",
+  " 59a:	2398      	movs	r3, #152	; 0x98\n",
+  " 59c:	239a      	movs	r3, #154	; 0x9a\n",
+  " 59e:	239c      	movs	r3, #156	; 0x9c\n",
+  " 5a0:	239e      	movs	r3, #158	; 0x9e\n",
+  " 5a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 5a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 5a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 5a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 5aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 5ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 5ae:	23ac      	movs	r3, #172	; 0xac\n",
+  " 5b0:	23ae      	movs	r3, #174	; 0xae\n",
+  " 5b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 5b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 5b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 5b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 5ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 5bc:	23ba      	movs	r3, #186	; 0xba\n",
+  " 5be:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 5c0:	23be      	movs	r3, #190	; 0xbe\n",
+  " 5c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 5c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 5c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 5c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 5ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 5cc:	23ca      	movs	r3, #202	; 0xca\n",
+  " 5ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 5d0:	23ce      	movs	r3, #206	; 0xce\n",
+  " 5d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 5d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 5d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 5d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 5da:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 5dc:	23da      	movs	r3, #218	; 0xda\n",
+  " 5de:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 5e0:	23de      	movs	r3, #222	; 0xde\n",
+  " 5e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 5e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 5e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 5e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 5ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 5ec:	23ea      	movs	r3, #234	; 0xea\n",
+  " 5ee:	23ec      	movs	r3, #236	; 0xec\n",
+  " 5f0:	23ee      	movs	r3, #238	; 0xee\n",
+  " 5f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 5f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 5f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 5f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 5fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 5fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 5fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 600:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 602:	2300      	movs	r3, #0\n",
+  " 604:	2302      	movs	r3, #2\n",
+  " 606:	2304      	movs	r3, #4\n",
+  " 608:	2306      	movs	r3, #6\n",
+  " 60a:	2308      	movs	r3, #8\n",
+  " 60c:	230a      	movs	r3, #10\n",
+  " 60e:	230c      	movs	r3, #12\n",
+  " 610:	230e      	movs	r3, #14\n",
+  " 612:	2310      	movs	r3, #16\n",
+  " 614:	2312      	movs	r3, #18\n",
+  " 616:	2314      	movs	r3, #20\n",
+  " 618:	2316      	movs	r3, #22\n",
+  " 61a:	2318      	movs	r3, #24\n",
+  " 61c:	231a      	movs	r3, #26\n",
+  " 61e:	231c      	movs	r3, #28\n",
+  " 620:	231e      	movs	r3, #30\n",
+  " 622:	2320      	movs	r3, #32\n",
+  " 624:	2322      	movs	r3, #34	; 0x22\n",
+  " 626:	2324      	movs	r3, #36	; 0x24\n",
+  " 628:	2326      	movs	r3, #38	; 0x26\n",
+  " 62a:	2328      	movs	r3, #40	; 0x28\n",
+  " 62c:	232a      	movs	r3, #42	; 0x2a\n",
+  " 62e:	232c      	movs	r3, #44	; 0x2c\n",
+  " 630:	232e      	movs	r3, #46	; 0x2e\n",
+  " 632:	2330      	movs	r3, #48	; 0x30\n",
+  " 634:	2332      	movs	r3, #50	; 0x32\n",
+  " 636:	2334      	movs	r3, #52	; 0x34\n",
+  " 638:	2336      	movs	r3, #54	; 0x36\n",
+  " 63a:	2338      	movs	r3, #56	; 0x38\n",
+  " 63c:	233a      	movs	r3, #58	; 0x3a\n",
+  " 63e:	233c      	movs	r3, #60	; 0x3c\n",
+  " 640:	233e      	movs	r3, #62	; 0x3e\n",
+  " 642:	2340      	movs	r3, #64	; 0x40\n",
+  " 644:	2342      	movs	r3, #66	; 0x42\n",
+  " 646:	2344      	movs	r3, #68	; 0x44\n",
+  " 648:	2346      	movs	r3, #70	; 0x46\n",
+  " 64a:	2348      	movs	r3, #72	; 0x48\n",
+  " 64c:	234a      	movs	r3, #74	; 0x4a\n",
+  " 64e:	234c      	movs	r3, #76	; 0x4c\n",
+  " 650:	234e      	movs	r3, #78	; 0x4e\n",
+  " 652:	2350      	movs	r3, #80	; 0x50\n",
+  " 654:	2352      	movs	r3, #82	; 0x52\n",
+  " 656:	2354      	movs	r3, #84	; 0x54\n",
+  " 658:	2356      	movs	r3, #86	; 0x56\n",
+  " 65a:	2358      	movs	r3, #88	; 0x58\n",
+  " 65c:	235a      	movs	r3, #90	; 0x5a\n",
+  " 65e:	235c      	movs	r3, #92	; 0x5c\n",
+  " 660:	235e      	movs	r3, #94	; 0x5e\n",
+  " 662:	2360      	movs	r3, #96	; 0x60\n",
+  " 664:	2362      	movs	r3, #98	; 0x62\n",
+  " 666:	2364      	movs	r3, #100	; 0x64\n",
+  " 668:	2366      	movs	r3, #102	; 0x66\n",
+  " 66a:	2368      	movs	r3, #104	; 0x68\n",
+  " 66c:	236a      	movs	r3, #106	; 0x6a\n",
+  " 66e:	236c      	movs	r3, #108	; 0x6c\n",
+  " 670:	236e      	movs	r3, #110	; 0x6e\n",
+  " 672:	2370      	movs	r3, #112	; 0x70\n",
+  " 674:	2372      	movs	r3, #114	; 0x72\n",
+  " 676:	2374      	movs	r3, #116	; 0x74\n",
+  " 678:	2376      	movs	r3, #118	; 0x76\n",
+  " 67a:	2378      	movs	r3, #120	; 0x78\n",
+  " 67c:	237a      	movs	r3, #122	; 0x7a\n",
+  " 67e:	237c      	movs	r3, #124	; 0x7c\n",
+  " 680:	237e      	movs	r3, #126	; 0x7e\n",
+  " 682:	2380      	movs	r3, #128	; 0x80\n",
+  " 684:	2382      	movs	r3, #130	; 0x82\n",
+  " 686:	2384      	movs	r3, #132	; 0x84\n",
+  " 688:	2386      	movs	r3, #134	; 0x86\n",
+  " 68a:	2388      	movs	r3, #136	; 0x88\n",
+  " 68c:	238a      	movs	r3, #138	; 0x8a\n",
+  " 68e:	238c      	movs	r3, #140	; 0x8c\n",
+  " 690:	238e      	movs	r3, #142	; 0x8e\n",
+  " 692:	2390      	movs	r3, #144	; 0x90\n",
+  " 694:	2392      	movs	r3, #146	; 0x92\n",
+  " 696:	2394      	movs	r3, #148	; 0x94\n",
+  " 698:	2396      	movs	r3, #150	; 0x96\n",
+  " 69a:	2398      	movs	r3, #152	; 0x98\n",
+  " 69c:	239a      	movs	r3, #154	; 0x9a\n",
+  " 69e:	239c      	movs	r3, #156	; 0x9c\n",
+  " 6a0:	239e      	movs	r3, #158	; 0x9e\n",
+  " 6a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 6a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 6a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 6a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 6aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 6ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 6ae:	23ac      	movs	r3, #172	; 0xac\n",
+  " 6b0:	23ae      	movs	r3, #174	; 0xae\n",
+  " 6b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 6b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 6b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 6b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 6ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 6bc:	23ba      	movs	r3, #186	; 0xba\n",
+  " 6be:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 6c0:	23be      	movs	r3, #190	; 0xbe\n",
+  " 6c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 6c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 6c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 6c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 6ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 6cc:	23ca      	movs	r3, #202	; 0xca\n",
+  " 6ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 6d0:	23ce      	movs	r3, #206	; 0xce\n",
+  " 6d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 6d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 6d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 6d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 6da:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 6dc:	23da      	movs	r3, #218	; 0xda\n",
+  " 6de:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 6e0:	23de      	movs	r3, #222	; 0xde\n",
+  " 6e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 6e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 6e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 6e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 6ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 6ec:	23ea      	movs	r3, #234	; 0xea\n",
+  " 6ee:	23ec      	movs	r3, #236	; 0xec\n",
+  " 6f0:	23ee      	movs	r3, #238	; 0xee\n",
+  " 6f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 6f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 6f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 6f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 6fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 6fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 6fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 700:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 702:	2300      	movs	r3, #0\n",
+  " 704:	2302      	movs	r3, #2\n",
+  " 706:	2304      	movs	r3, #4\n",
+  " 708:	2306      	movs	r3, #6\n",
+  " 70a:	2308      	movs	r3, #8\n",
+  " 70c:	230a      	movs	r3, #10\n",
+  " 70e:	230c      	movs	r3, #12\n",
+  " 710:	230e      	movs	r3, #14\n",
+  " 712:	2310      	movs	r3, #16\n",
+  " 714:	2312      	movs	r3, #18\n",
+  " 716:	2314      	movs	r3, #20\n",
+  " 718:	2316      	movs	r3, #22\n",
+  " 71a:	2318      	movs	r3, #24\n",
+  " 71c:	231a      	movs	r3, #26\n",
+  " 71e:	231c      	movs	r3, #28\n",
+  " 720:	231e      	movs	r3, #30\n",
+  " 722:	2320      	movs	r3, #32\n",
+  " 724:	2322      	movs	r3, #34	; 0x22\n",
+  " 726:	2324      	movs	r3, #36	; 0x24\n",
+  " 728:	2326      	movs	r3, #38	; 0x26\n",
+  " 72a:	2328      	movs	r3, #40	; 0x28\n",
+  " 72c:	232a      	movs	r3, #42	; 0x2a\n",
+  " 72e:	232c      	movs	r3, #44	; 0x2c\n",
+  " 730:	232e      	movs	r3, #46	; 0x2e\n",
+  " 732:	2330      	movs	r3, #48	; 0x30\n",
+  " 734:	2332      	movs	r3, #50	; 0x32\n",
+  " 736:	2334      	movs	r3, #52	; 0x34\n",
+  " 738:	2336      	movs	r3, #54	; 0x36\n",
+  " 73a:	2338      	movs	r3, #56	; 0x38\n",
+  " 73c:	233a      	movs	r3, #58	; 0x3a\n",
+  " 73e:	233c      	movs	r3, #60	; 0x3c\n",
+  " 740:	233e      	movs	r3, #62	; 0x3e\n",
+  " 742:	2340      	movs	r3, #64	; 0x40\n",
+  " 744:	2342      	movs	r3, #66	; 0x42\n",
+  " 746:	2344      	movs	r3, #68	; 0x44\n",
+  " 748:	2346      	movs	r3, #70	; 0x46\n",
+  " 74a:	2348      	movs	r3, #72	; 0x48\n",
+  " 74c:	234a      	movs	r3, #74	; 0x4a\n",
+  " 74e:	234c      	movs	r3, #76	; 0x4c\n",
+  " 750:	234e      	movs	r3, #78	; 0x4e\n",
+  " 752:	2350      	movs	r3, #80	; 0x50\n",
+  " 754:	2352      	movs	r3, #82	; 0x52\n",
+  " 756:	2354      	movs	r3, #84	; 0x54\n",
+  " 758:	2356      	movs	r3, #86	; 0x56\n",
+  " 75a:	2358      	movs	r3, #88	; 0x58\n",
+  " 75c:	235a      	movs	r3, #90	; 0x5a\n",
+  " 75e:	235c      	movs	r3, #92	; 0x5c\n",
+  " 760:	235e      	movs	r3, #94	; 0x5e\n",
+  " 762:	2360      	movs	r3, #96	; 0x60\n",
+  " 764:	2362      	movs	r3, #98	; 0x62\n",
+  " 766:	2364      	movs	r3, #100	; 0x64\n",
+  " 768:	2366      	movs	r3, #102	; 0x66\n",
+  " 76a:	2368      	movs	r3, #104	; 0x68\n",
+  " 76c:	236a      	movs	r3, #106	; 0x6a\n",
+  " 76e:	236c      	movs	r3, #108	; 0x6c\n",
+  " 770:	236e      	movs	r3, #110	; 0x6e\n",
+  " 772:	2370      	movs	r3, #112	; 0x70\n",
+  " 774:	2372      	movs	r3, #114	; 0x72\n",
+  " 776:	2374      	movs	r3, #116	; 0x74\n",
+  " 778:	2376      	movs	r3, #118	; 0x76\n",
+  " 77a:	2378      	movs	r3, #120	; 0x78\n",
+  " 77c:	237a      	movs	r3, #122	; 0x7a\n",
+  " 77e:	237c      	movs	r3, #124	; 0x7c\n",
+  " 780:	237e      	movs	r3, #126	; 0x7e\n",
+  " 782:	2380      	movs	r3, #128	; 0x80\n",
+  " 784:	2382      	movs	r3, #130	; 0x82\n",
+  " 786:	2384      	movs	r3, #132	; 0x84\n",
+  " 788:	2386      	movs	r3, #134	; 0x86\n",
+  " 78a:	2388      	movs	r3, #136	; 0x88\n",
+  " 78c:	238a      	movs	r3, #138	; 0x8a\n",
+  " 78e:	238c      	movs	r3, #140	; 0x8c\n",
+  " 790:	238e      	movs	r3, #142	; 0x8e\n",
+  " 792:	2390      	movs	r3, #144	; 0x90\n",
+  " 794:	2392      	movs	r3, #146	; 0x92\n",
+  " 796:	2394      	movs	r3, #148	; 0x94\n",
+  " 798:	2396      	movs	r3, #150	; 0x96\n",
+  " 79a:	2398      	movs	r3, #152	; 0x98\n",
+  " 79c:	239a      	movs	r3, #154	; 0x9a\n",
+  " 79e:	239c      	movs	r3, #156	; 0x9c\n",
+  " 7a0:	239e      	movs	r3, #158	; 0x9e\n",
+  " 7a2:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 7a4:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 7a6:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 7a8:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 7aa:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 7ac:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 7ae:	23ac      	movs	r3, #172	; 0xac\n",
+  " 7b0:	23ae      	movs	r3, #174	; 0xae\n",
+  " 7b2:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 7b4:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 7b6:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 7b8:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 7ba:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 7bc:	23ba      	movs	r3, #186	; 0xba\n",
+  " 7be:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 7c0:	23be      	movs	r3, #190	; 0xbe\n",
+  " 7c2:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 7c4:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 7c6:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 7c8:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 7ca:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 7cc:	23ca      	movs	r3, #202	; 0xca\n",
+  " 7ce:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 7d0:	23ce      	movs	r3, #206	; 0xce\n",
+  " 7d2:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 7d4:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 7d6:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 7d8:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 7da:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 7dc:	23da      	movs	r3, #218	; 0xda\n",
+  " 7de:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 7e0:	23de      	movs	r3, #222	; 0xde\n",
+  " 7e2:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 7e4:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 7e6:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 7e8:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 7ea:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 7ec:	23ea      	movs	r3, #234	; 0xea\n",
+  " 7ee:	23ec      	movs	r3, #236	; 0xec\n",
+  " 7f0:	23ee      	movs	r3, #238	; 0xee\n",
+  " 7f2:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 7f4:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 7f6:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 7f8:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 7fa:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 7fc:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 7fe:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 800:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 802:	0011      	movs	r1, r2\n",
+  nullptr
+};
+const char* Branch32Results[] = {
+  "   0:	f000 bc01 	b.w	806 <Branch32+0x806>\n",
+  "   4:	2300      	movs	r3, #0\n",
+  "   6:	2302      	movs	r3, #2\n",
+  "   8:	2304      	movs	r3, #4\n",
+  "   a:	2306      	movs	r3, #6\n",
+  "   c:	2308      	movs	r3, #8\n",
+  "   e:	230a      	movs	r3, #10\n",
+  "  10:	230c      	movs	r3, #12\n",
+  "  12:	230e      	movs	r3, #14\n",
+  "  14:	2310      	movs	r3, #16\n",
+  "  16:	2312      	movs	r3, #18\n",
+  "  18:	2314      	movs	r3, #20\n",
+  "  1a:	2316      	movs	r3, #22\n",
+  "  1c:	2318      	movs	r3, #24\n",
+  "  1e:	231a      	movs	r3, #26\n",
+  "  20:	231c      	movs	r3, #28\n",
+  "  22:	231e      	movs	r3, #30\n",
+  "  24:	2320      	movs	r3, #32\n",
+  "  26:	2322      	movs	r3, #34	; 0x22\n",
+  "  28:	2324      	movs	r3, #36	; 0x24\n",
+  "  2a:	2326      	movs	r3, #38	; 0x26\n",
+  "  2c:	2328      	movs	r3, #40	; 0x28\n",
+  "  2e:	232a      	movs	r3, #42	; 0x2a\n",
+  "  30:	232c      	movs	r3, #44	; 0x2c\n",
+  "  32:	232e      	movs	r3, #46	; 0x2e\n",
+  "  34:	2330      	movs	r3, #48	; 0x30\n",
+  "  36:	2332      	movs	r3, #50	; 0x32\n",
+  "  38:	2334      	movs	r3, #52	; 0x34\n",
+  "  3a:	2336      	movs	r3, #54	; 0x36\n",
+  "  3c:	2338      	movs	r3, #56	; 0x38\n",
+  "  3e:	233a      	movs	r3, #58	; 0x3a\n",
+  "  40:	233c      	movs	r3, #60	; 0x3c\n",
+  "  42:	233e      	movs	r3, #62	; 0x3e\n",
+  "  44:	2340      	movs	r3, #64	; 0x40\n",
+  "  46:	2342      	movs	r3, #66	; 0x42\n",
+  "  48:	2344      	movs	r3, #68	; 0x44\n",
+  "  4a:	2346      	movs	r3, #70	; 0x46\n",
+  "  4c:	2348      	movs	r3, #72	; 0x48\n",
+  "  4e:	234a      	movs	r3, #74	; 0x4a\n",
+  "  50:	234c      	movs	r3, #76	; 0x4c\n",
+  "  52:	234e      	movs	r3, #78	; 0x4e\n",
+  "  54:	2350      	movs	r3, #80	; 0x50\n",
+  "  56:	2352      	movs	r3, #82	; 0x52\n",
+  "  58:	2354      	movs	r3, #84	; 0x54\n",
+  "  5a:	2356      	movs	r3, #86	; 0x56\n",
+  "  5c:	2358      	movs	r3, #88	; 0x58\n",
+  "  5e:	235a      	movs	r3, #90	; 0x5a\n",
+  "  60:	235c      	movs	r3, #92	; 0x5c\n",
+  "  62:	235e      	movs	r3, #94	; 0x5e\n",
+  "  64:	2360      	movs	r3, #96	; 0x60\n",
+  "  66:	2362      	movs	r3, #98	; 0x62\n",
+  "  68:	2364      	movs	r3, #100	; 0x64\n",
+  "  6a:	2366      	movs	r3, #102	; 0x66\n",
+  "  6c:	2368      	movs	r3, #104	; 0x68\n",
+  "  6e:	236a      	movs	r3, #106	; 0x6a\n",
+  "  70:	236c      	movs	r3, #108	; 0x6c\n",
+  "  72:	236e      	movs	r3, #110	; 0x6e\n",
+  "  74:	2370      	movs	r3, #112	; 0x70\n",
+  "  76:	2372      	movs	r3, #114	; 0x72\n",
+  "  78:	2374      	movs	r3, #116	; 0x74\n",
+  "  7a:	2376      	movs	r3, #118	; 0x76\n",
+  "  7c:	2378      	movs	r3, #120	; 0x78\n",
+  "  7e:	237a      	movs	r3, #122	; 0x7a\n",
+  "  80:	237c      	movs	r3, #124	; 0x7c\n",
+  "  82:	237e      	movs	r3, #126	; 0x7e\n",
+  "  84:	2380      	movs	r3, #128	; 0x80\n",
+  "  86:	2382      	movs	r3, #130	; 0x82\n",
+  "  88:	2384      	movs	r3, #132	; 0x84\n",
+  "  8a:	2386      	movs	r3, #134	; 0x86\n",
+  "  8c:	2388      	movs	r3, #136	; 0x88\n",
+  "  8e:	238a      	movs	r3, #138	; 0x8a\n",
+  "  90:	238c      	movs	r3, #140	; 0x8c\n",
+  "  92:	238e      	movs	r3, #142	; 0x8e\n",
+  "  94:	2390      	movs	r3, #144	; 0x90\n",
+  "  96:	2392      	movs	r3, #146	; 0x92\n",
+  "  98:	2394      	movs	r3, #148	; 0x94\n",
+  "  9a:	2396      	movs	r3, #150	; 0x96\n",
+  "  9c:	2398      	movs	r3, #152	; 0x98\n",
+  "  9e:	239a      	movs	r3, #154	; 0x9a\n",
+  "  a0:	239c      	movs	r3, #156	; 0x9c\n",
+  "  a2:	239e      	movs	r3, #158	; 0x9e\n",
+  "  a4:	23a0      	movs	r3, #160	; 0xa0\n",
+  "  a6:	23a2      	movs	r3, #162	; 0xa2\n",
+  "  a8:	23a4      	movs	r3, #164	; 0xa4\n",
+  "  aa:	23a6      	movs	r3, #166	; 0xa6\n",
+  "  ac:	23a8      	movs	r3, #168	; 0xa8\n",
+  "  ae:	23aa      	movs	r3, #170	; 0xaa\n",
+  "  b0:	23ac      	movs	r3, #172	; 0xac\n",
+  "  b2:	23ae      	movs	r3, #174	; 0xae\n",
+  "  b4:	23b0      	movs	r3, #176	; 0xb0\n",
+  "  b6:	23b2      	movs	r3, #178	; 0xb2\n",
+  "  b8:	23b4      	movs	r3, #180	; 0xb4\n",
+  "  ba:	23b6      	movs	r3, #182	; 0xb6\n",
+  "  bc:	23b8      	movs	r3, #184	; 0xb8\n",
+  "  be:	23ba      	movs	r3, #186	; 0xba\n",
+  "  c0:	23bc      	movs	r3, #188	; 0xbc\n",
+  "  c2:	23be      	movs	r3, #190	; 0xbe\n",
+  "  c4:	23c0      	movs	r3, #192	; 0xc0\n",
+  "  c6:	23c2      	movs	r3, #194	; 0xc2\n",
+  "  c8:	23c4      	movs	r3, #196	; 0xc4\n",
+  "  ca:	23c6      	movs	r3, #198	; 0xc6\n",
+  "  cc:	23c8      	movs	r3, #200	; 0xc8\n",
+  "  ce:	23ca      	movs	r3, #202	; 0xca\n",
+  "  d0:	23cc      	movs	r3, #204	; 0xcc\n",
+  "  d2:	23ce      	movs	r3, #206	; 0xce\n",
+  "  d4:	23d0      	movs	r3, #208	; 0xd0\n",
+  "  d6:	23d2      	movs	r3, #210	; 0xd2\n",
+  "  d8:	23d4      	movs	r3, #212	; 0xd4\n",
+  "  da:	23d6      	movs	r3, #214	; 0xd6\n",
+  "  dc:	23d8      	movs	r3, #216	; 0xd8\n",
+  "  de:	23da      	movs	r3, #218	; 0xda\n",
+  "  e0:	23dc      	movs	r3, #220	; 0xdc\n",
+  "  e2:	23de      	movs	r3, #222	; 0xde\n",
+  "  e4:	23e0      	movs	r3, #224	; 0xe0\n",
+  "  e6:	23e2      	movs	r3, #226	; 0xe2\n",
+  "  e8:	23e4      	movs	r3, #228	; 0xe4\n",
+  "  ea:	23e6      	movs	r3, #230	; 0xe6\n",
+  "  ec:	23e8      	movs	r3, #232	; 0xe8\n",
+  "  ee:	23ea      	movs	r3, #234	; 0xea\n",
+  "  f0:	23ec      	movs	r3, #236	; 0xec\n",
+  "  f2:	23ee      	movs	r3, #238	; 0xee\n",
+  "  f4:	23f0      	movs	r3, #240	; 0xf0\n",
+  "  f6:	23f2      	movs	r3, #242	; 0xf2\n",
+  "  f8:	23f4      	movs	r3, #244	; 0xf4\n",
+  "  fa:	23f6      	movs	r3, #246	; 0xf6\n",
+  "  fc:	23f8      	movs	r3, #248	; 0xf8\n",
+  "  fe:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 100:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 102:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 104:	2300      	movs	r3, #0\n",
+  " 106:	2302      	movs	r3, #2\n",
+  " 108:	2304      	movs	r3, #4\n",
+  " 10a:	2306      	movs	r3, #6\n",
+  " 10c:	2308      	movs	r3, #8\n",
+  " 10e:	230a      	movs	r3, #10\n",
+  " 110:	230c      	movs	r3, #12\n",
+  " 112:	230e      	movs	r3, #14\n",
+  " 114:	2310      	movs	r3, #16\n",
+  " 116:	2312      	movs	r3, #18\n",
+  " 118:	2314      	movs	r3, #20\n",
+  " 11a:	2316      	movs	r3, #22\n",
+  " 11c:	2318      	movs	r3, #24\n",
+  " 11e:	231a      	movs	r3, #26\n",
+  " 120:	231c      	movs	r3, #28\n",
+  " 122:	231e      	movs	r3, #30\n",
+  " 124:	2320      	movs	r3, #32\n",
+  " 126:	2322      	movs	r3, #34	; 0x22\n",
+  " 128:	2324      	movs	r3, #36	; 0x24\n",
+  " 12a:	2326      	movs	r3, #38	; 0x26\n",
+  " 12c:	2328      	movs	r3, #40	; 0x28\n",
+  " 12e:	232a      	movs	r3, #42	; 0x2a\n",
+  " 130:	232c      	movs	r3, #44	; 0x2c\n",
+  " 132:	232e      	movs	r3, #46	; 0x2e\n",
+  " 134:	2330      	movs	r3, #48	; 0x30\n",
+  " 136:	2332      	movs	r3, #50	; 0x32\n",
+  " 138:	2334      	movs	r3, #52	; 0x34\n",
+  " 13a:	2336      	movs	r3, #54	; 0x36\n",
+  " 13c:	2338      	movs	r3, #56	; 0x38\n",
+  " 13e:	233a      	movs	r3, #58	; 0x3a\n",
+  " 140:	233c      	movs	r3, #60	; 0x3c\n",
+  " 142:	233e      	movs	r3, #62	; 0x3e\n",
+  " 144:	2340      	movs	r3, #64	; 0x40\n",
+  " 146:	2342      	movs	r3, #66	; 0x42\n",
+  " 148:	2344      	movs	r3, #68	; 0x44\n",
+  " 14a:	2346      	movs	r3, #70	; 0x46\n",
+  " 14c:	2348      	movs	r3, #72	; 0x48\n",
+  " 14e:	234a      	movs	r3, #74	; 0x4a\n",
+  " 150:	234c      	movs	r3, #76	; 0x4c\n",
+  " 152:	234e      	movs	r3, #78	; 0x4e\n",
+  " 154:	2350      	movs	r3, #80	; 0x50\n",
+  " 156:	2352      	movs	r3, #82	; 0x52\n",
+  " 158:	2354      	movs	r3, #84	; 0x54\n",
+  " 15a:	2356      	movs	r3, #86	; 0x56\n",
+  " 15c:	2358      	movs	r3, #88	; 0x58\n",
+  " 15e:	235a      	movs	r3, #90	; 0x5a\n",
+  " 160:	235c      	movs	r3, #92	; 0x5c\n",
+  " 162:	235e      	movs	r3, #94	; 0x5e\n",
+  " 164:	2360      	movs	r3, #96	; 0x60\n",
+  " 166:	2362      	movs	r3, #98	; 0x62\n",
+  " 168:	2364      	movs	r3, #100	; 0x64\n",
+  " 16a:	2366      	movs	r3, #102	; 0x66\n",
+  " 16c:	2368      	movs	r3, #104	; 0x68\n",
+  " 16e:	236a      	movs	r3, #106	; 0x6a\n",
+  " 170:	236c      	movs	r3, #108	; 0x6c\n",
+  " 172:	236e      	movs	r3, #110	; 0x6e\n",
+  " 174:	2370      	movs	r3, #112	; 0x70\n",
+  " 176:	2372      	movs	r3, #114	; 0x72\n",
+  " 178:	2374      	movs	r3, #116	; 0x74\n",
+  " 17a:	2376      	movs	r3, #118	; 0x76\n",
+  " 17c:	2378      	movs	r3, #120	; 0x78\n",
+  " 17e:	237a      	movs	r3, #122	; 0x7a\n",
+  " 180:	237c      	movs	r3, #124	; 0x7c\n",
+  " 182:	237e      	movs	r3, #126	; 0x7e\n",
+  " 184:	2380      	movs	r3, #128	; 0x80\n",
+  " 186:	2382      	movs	r3, #130	; 0x82\n",
+  " 188:	2384      	movs	r3, #132	; 0x84\n",
+  " 18a:	2386      	movs	r3, #134	; 0x86\n",
+  " 18c:	2388      	movs	r3, #136	; 0x88\n",
+  " 18e:	238a      	movs	r3, #138	; 0x8a\n",
+  " 190:	238c      	movs	r3, #140	; 0x8c\n",
+  " 192:	238e      	movs	r3, #142	; 0x8e\n",
+  " 194:	2390      	movs	r3, #144	; 0x90\n",
+  " 196:	2392      	movs	r3, #146	; 0x92\n",
+  " 198:	2394      	movs	r3, #148	; 0x94\n",
+  " 19a:	2396      	movs	r3, #150	; 0x96\n",
+  " 19c:	2398      	movs	r3, #152	; 0x98\n",
+  " 19e:	239a      	movs	r3, #154	; 0x9a\n",
+  " 1a0:	239c      	movs	r3, #156	; 0x9c\n",
+  " 1a2:	239e      	movs	r3, #158	; 0x9e\n",
+  " 1a4:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 1a6:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 1a8:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 1aa:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 1ac:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 1ae:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 1b0:	23ac      	movs	r3, #172	; 0xac\n",
+  " 1b2:	23ae      	movs	r3, #174	; 0xae\n",
+  " 1b4:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 1b6:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 1b8:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 1ba:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 1bc:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 1be:	23ba      	movs	r3, #186	; 0xba\n",
+  " 1c0:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 1c2:	23be      	movs	r3, #190	; 0xbe\n",
+  " 1c4:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 1c6:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 1c8:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 1ca:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 1cc:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 1ce:	23ca      	movs	r3, #202	; 0xca\n",
+  " 1d0:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 1d2:	23ce      	movs	r3, #206	; 0xce\n",
+  " 1d4:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 1d6:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 1d8:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 1da:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 1dc:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 1de:	23da      	movs	r3, #218	; 0xda\n",
+  " 1e0:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 1e2:	23de      	movs	r3, #222	; 0xde\n",
+  " 1e4:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 1e6:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 1e8:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 1ea:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 1ec:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 1ee:	23ea      	movs	r3, #234	; 0xea\n",
+  " 1f0:	23ec      	movs	r3, #236	; 0xec\n",
+  " 1f2:	23ee      	movs	r3, #238	; 0xee\n",
+  " 1f4:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 1f6:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 1f8:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 1fa:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 1fc:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 1fe:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 200:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 202:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 204:	2300      	movs	r3, #0\n",
+  " 206:	2302      	movs	r3, #2\n",
+  " 208:	2304      	movs	r3, #4\n",
+  " 20a:	2306      	movs	r3, #6\n",
+  " 20c:	2308      	movs	r3, #8\n",
+  " 20e:	230a      	movs	r3, #10\n",
+  " 210:	230c      	movs	r3, #12\n",
+  " 212:	230e      	movs	r3, #14\n",
+  " 214:	2310      	movs	r3, #16\n",
+  " 216:	2312      	movs	r3, #18\n",
+  " 218:	2314      	movs	r3, #20\n",
+  " 21a:	2316      	movs	r3, #22\n",
+  " 21c:	2318      	movs	r3, #24\n",
+  " 21e:	231a      	movs	r3, #26\n",
+  " 220:	231c      	movs	r3, #28\n",
+  " 222:	231e      	movs	r3, #30\n",
+  " 224:	2320      	movs	r3, #32\n",
+  " 226:	2322      	movs	r3, #34	; 0x22\n",
+  " 228:	2324      	movs	r3, #36	; 0x24\n",
+  " 22a:	2326      	movs	r3, #38	; 0x26\n",
+  " 22c:	2328      	movs	r3, #40	; 0x28\n",
+  " 22e:	232a      	movs	r3, #42	; 0x2a\n",
+  " 230:	232c      	movs	r3, #44	; 0x2c\n",
+  " 232:	232e      	movs	r3, #46	; 0x2e\n",
+  " 234:	2330      	movs	r3, #48	; 0x30\n",
+  " 236:	2332      	movs	r3, #50	; 0x32\n",
+  " 238:	2334      	movs	r3, #52	; 0x34\n",
+  " 23a:	2336      	movs	r3, #54	; 0x36\n",
+  " 23c:	2338      	movs	r3, #56	; 0x38\n",
+  " 23e:	233a      	movs	r3, #58	; 0x3a\n",
+  " 240:	233c      	movs	r3, #60	; 0x3c\n",
+  " 242:	233e      	movs	r3, #62	; 0x3e\n",
+  " 244:	2340      	movs	r3, #64	; 0x40\n",
+  " 246:	2342      	movs	r3, #66	; 0x42\n",
+  " 248:	2344      	movs	r3, #68	; 0x44\n",
+  " 24a:	2346      	movs	r3, #70	; 0x46\n",
+  " 24c:	2348      	movs	r3, #72	; 0x48\n",
+  " 24e:	234a      	movs	r3, #74	; 0x4a\n",
+  " 250:	234c      	movs	r3, #76	; 0x4c\n",
+  " 252:	234e      	movs	r3, #78	; 0x4e\n",
+  " 254:	2350      	movs	r3, #80	; 0x50\n",
+  " 256:	2352      	movs	r3, #82	; 0x52\n",
+  " 258:	2354      	movs	r3, #84	; 0x54\n",
+  " 25a:	2356      	movs	r3, #86	; 0x56\n",
+  " 25c:	2358      	movs	r3, #88	; 0x58\n",
+  " 25e:	235a      	movs	r3, #90	; 0x5a\n",
+  " 260:	235c      	movs	r3, #92	; 0x5c\n",
+  " 262:	235e      	movs	r3, #94	; 0x5e\n",
+  " 264:	2360      	movs	r3, #96	; 0x60\n",
+  " 266:	2362      	movs	r3, #98	; 0x62\n",
+  " 268:	2364      	movs	r3, #100	; 0x64\n",
+  " 26a:	2366      	movs	r3, #102	; 0x66\n",
+  " 26c:	2368      	movs	r3, #104	; 0x68\n",
+  " 26e:	236a      	movs	r3, #106	; 0x6a\n",
+  " 270:	236c      	movs	r3, #108	; 0x6c\n",
+  " 272:	236e      	movs	r3, #110	; 0x6e\n",
+  " 274:	2370      	movs	r3, #112	; 0x70\n",
+  " 276:	2372      	movs	r3, #114	; 0x72\n",
+  " 278:	2374      	movs	r3, #116	; 0x74\n",
+  " 27a:	2376      	movs	r3, #118	; 0x76\n",
+  " 27c:	2378      	movs	r3, #120	; 0x78\n",
+  " 27e:	237a      	movs	r3, #122	; 0x7a\n",
+  " 280:	237c      	movs	r3, #124	; 0x7c\n",
+  " 282:	237e      	movs	r3, #126	; 0x7e\n",
+  " 284:	2380      	movs	r3, #128	; 0x80\n",
+  " 286:	2382      	movs	r3, #130	; 0x82\n",
+  " 288:	2384      	movs	r3, #132	; 0x84\n",
+  " 28a:	2386      	movs	r3, #134	; 0x86\n",
+  " 28c:	2388      	movs	r3, #136	; 0x88\n",
+  " 28e:	238a      	movs	r3, #138	; 0x8a\n",
+  " 290:	238c      	movs	r3, #140	; 0x8c\n",
+  " 292:	238e      	movs	r3, #142	; 0x8e\n",
+  " 294:	2390      	movs	r3, #144	; 0x90\n",
+  " 296:	2392      	movs	r3, #146	; 0x92\n",
+  " 298:	2394      	movs	r3, #148	; 0x94\n",
+  " 29a:	2396      	movs	r3, #150	; 0x96\n",
+  " 29c:	2398      	movs	r3, #152	; 0x98\n",
+  " 29e:	239a      	movs	r3, #154	; 0x9a\n",
+  " 2a0:	239c      	movs	r3, #156	; 0x9c\n",
+  " 2a2:	239e      	movs	r3, #158	; 0x9e\n",
+  " 2a4:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 2a6:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 2a8:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 2aa:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 2ac:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 2ae:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 2b0:	23ac      	movs	r3, #172	; 0xac\n",
+  " 2b2:	23ae      	movs	r3, #174	; 0xae\n",
+  " 2b4:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 2b6:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 2b8:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 2ba:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 2bc:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 2be:	23ba      	movs	r3, #186	; 0xba\n",
+  " 2c0:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 2c2:	23be      	movs	r3, #190	; 0xbe\n",
+  " 2c4:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 2c6:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 2c8:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 2ca:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 2cc:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 2ce:	23ca      	movs	r3, #202	; 0xca\n",
+  " 2d0:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 2d2:	23ce      	movs	r3, #206	; 0xce\n",
+  " 2d4:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 2d6:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 2d8:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 2da:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 2dc:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 2de:	23da      	movs	r3, #218	; 0xda\n",
+  " 2e0:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 2e2:	23de      	movs	r3, #222	; 0xde\n",
+  " 2e4:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 2e6:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 2e8:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 2ea:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 2ec:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 2ee:	23ea      	movs	r3, #234	; 0xea\n",
+  " 2f0:	23ec      	movs	r3, #236	; 0xec\n",
+  " 2f2:	23ee      	movs	r3, #238	; 0xee\n",
+  " 2f4:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 2f6:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 2f8:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 2fa:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 2fc:	23f8      	movs	r3, #248	; 0xf8\n",
+  " 2fe:	23fa      	movs	r3, #250	; 0xfa\n",
+  " 300:	23fc      	movs	r3, #252	; 0xfc\n",
+  " 302:	23fe      	movs	r3, #254	; 0xfe\n",
+  " 304:	2300      	movs	r3, #0\n",
+  " 306:	2302      	movs	r3, #2\n",
+  " 308:	2304      	movs	r3, #4\n",
+  " 30a:	2306      	movs	r3, #6\n",
+  " 30c:	2308      	movs	r3, #8\n",
+  " 30e:	230a      	movs	r3, #10\n",
+  " 310:	230c      	movs	r3, #12\n",
+  " 312:	230e      	movs	r3, #14\n",
+  " 314:	2310      	movs	r3, #16\n",
+  " 316:	2312      	movs	r3, #18\n",
+  " 318:	2314      	movs	r3, #20\n",
+  " 31a:	2316      	movs	r3, #22\n",
+  " 31c:	2318      	movs	r3, #24\n",
+  " 31e:	231a      	movs	r3, #26\n",
+  " 320:	231c      	movs	r3, #28\n",
+  " 322:	231e      	movs	r3, #30\n",
+  " 324:	2320      	movs	r3, #32\n",
+  " 326:	2322      	movs	r3, #34	; 0x22\n",
+  " 328:	2324      	movs	r3, #36	; 0x24\n",
+  " 32a:	2326      	movs	r3, #38	; 0x26\n",
+  " 32c:	2328      	movs	r3, #40	; 0x28\n",
+  " 32e:	232a      	movs	r3, #42	; 0x2a\n",
+  " 330:	232c      	movs	r3, #44	; 0x2c\n",
+  " 332:	232e      	movs	r3, #46	; 0x2e\n",
+  " 334:	2330      	movs	r3, #48	; 0x30\n",
+  " 336:	2332      	movs	r3, #50	; 0x32\n",
+  " 338:	2334      	movs	r3, #52	; 0x34\n",
+  " 33a:	2336      	movs	r3, #54	; 0x36\n",
+  " 33c:	2338      	movs	r3, #56	; 0x38\n",
+  " 33e:	233a      	movs	r3, #58	; 0x3a\n",
+  " 340:	233c      	movs	r3, #60	; 0x3c\n",
+  " 342:	233e      	movs	r3, #62	; 0x3e\n",
+  " 344:	2340      	movs	r3, #64	; 0x40\n",
+  " 346:	2342      	movs	r3, #66	; 0x42\n",
+  " 348:	2344      	movs	r3, #68	; 0x44\n",
+  " 34a:	2346      	movs	r3, #70	; 0x46\n",
+  " 34c:	2348      	movs	r3, #72	; 0x48\n",
+  " 34e:	234a      	movs	r3, #74	; 0x4a\n",
+  " 350:	234c      	movs	r3, #76	; 0x4c\n",
+  " 352:	234e      	movs	r3, #78	; 0x4e\n",
+  " 354:	2350      	movs	r3, #80	; 0x50\n",
+  " 356:	2352      	movs	r3, #82	; 0x52\n",
+  " 358:	2354      	movs	r3, #84	; 0x54\n",
+  " 35a:	2356      	movs	r3, #86	; 0x56\n",
+  " 35c:	2358      	movs	r3, #88	; 0x58\n",
+  " 35e:	235a      	movs	r3, #90	; 0x5a\n",
+  " 360:	235c      	movs	r3, #92	; 0x5c\n",
+  " 362:	235e      	movs	r3, #94	; 0x5e\n",
+  " 364:	2360      	movs	r3, #96	; 0x60\n",
+  " 366:	2362      	movs	r3, #98	; 0x62\n",
+  " 368:	2364      	movs	r3, #100	; 0x64\n",
+  " 36a:	2366      	movs	r3, #102	; 0x66\n",
+  " 36c:	2368      	movs	r3, #104	; 0x68\n",
+  " 36e:	236a      	movs	r3, #106	; 0x6a\n",
+  " 370:	236c      	movs	r3, #108	; 0x6c\n",
+  " 372:	236e      	movs	r3, #110	; 0x6e\n",
+  " 374:	2370      	movs	r3, #112	; 0x70\n",
+  " 376:	2372      	movs	r3, #114	; 0x72\n",
+  " 378:	2374      	movs	r3, #116	; 0x74\n",
+  " 37a:	2376      	movs	r3, #118	; 0x76\n",
+  " 37c:	2378      	movs	r3, #120	; 0x78\n",
+  " 37e:	237a      	movs	r3, #122	; 0x7a\n",
+  " 380:	237c      	movs	r3, #124	; 0x7c\n",
+  " 382:	237e      	movs	r3, #126	; 0x7e\n",
+  " 384:	2380      	movs	r3, #128	; 0x80\n",
+  " 386:	2382      	movs	r3, #130	; 0x82\n",
+  " 388:	2384      	movs	r3, #132	; 0x84\n",
+  " 38a:	2386      	movs	r3, #134	; 0x86\n",
+  " 38c:	2388      	movs	r3, #136	; 0x88\n",
+  " 38e:	238a      	movs	r3, #138	; 0x8a\n",
+  " 390:	238c      	movs	r3, #140	; 0x8c\n",
+  " 392:	238e      	movs	r3, #142	; 0x8e\n",
+  " 394:	2390      	movs	r3, #144	; 0x90\n",
+  " 396:	2392      	movs	r3, #146	; 0x92\n",
+  " 398:	2394      	movs	r3, #148	; 0x94\n",
+  " 39a:	2396      	movs	r3, #150	; 0x96\n",
+  " 39c:	2398      	movs	r3, #152	; 0x98\n",
+  " 39e:	239a      	movs	r3, #154	; 0x9a\n",
+  " 3a0:	239c      	movs	r3, #156	; 0x9c\n",
+  " 3a2:	239e      	movs	r3, #158	; 0x9e\n",
+  " 3a4:	23a0      	movs	r3, #160	; 0xa0\n",
+  " 3a6:	23a2      	movs	r3, #162	; 0xa2\n",
+  " 3a8:	23a4      	movs	r3, #164	; 0xa4\n",
+  " 3aa:	23a6      	movs	r3, #166	; 0xa6\n",
+  " 3ac:	23a8      	movs	r3, #168	; 0xa8\n",
+  " 3ae:	23aa      	movs	r3, #170	; 0xaa\n",
+  " 3b0:	23ac      	movs	r3, #172	; 0xac\n",
+  " 3b2:	23ae      	movs	r3, #174	; 0xae\n",
+  " 3b4:	23b0      	movs	r3, #176	; 0xb0\n",
+  " 3b6:	23b2      	movs	r3, #178	; 0xb2\n",
+  " 3b8:	23b4      	movs	r3, #180	; 0xb4\n",
+  " 3ba:	23b6      	movs	r3, #182	; 0xb6\n",
+  " 3bc:	23b8      	movs	r3, #184	; 0xb8\n",
+  " 3be:	23ba      	movs	r3, #186	; 0xba\n",
+  " 3c0:	23bc      	movs	r3, #188	; 0xbc\n",
+  " 3c2:	23be      	movs	r3, #190	; 0xbe\n",
+  " 3c4:	23c0      	movs	r3, #192	; 0xc0\n",
+  " 3c6:	23c2      	movs	r3, #194	; 0xc2\n",
+  " 3c8:	23c4      	movs	r3, #196	; 0xc4\n",
+  " 3ca:	23c6      	movs	r3, #198	; 0xc6\n",
+  " 3cc:	23c8      	movs	r3, #200	; 0xc8\n",
+  " 3ce:	23ca      	movs	r3, #202	; 0xca\n",
+  " 3d0:	23cc      	movs	r3, #204	; 0xcc\n",
+  " 3d2:	23ce      	movs	r3, #206	; 0xce\n",
+  " 3d4:	23d0      	movs	r3, #208	; 0xd0\n",
+  " 3d6:	23d2      	movs	r3, #210	; 0xd2\n",
+  " 3d8:	23d4      	movs	r3, #212	; 0xd4\n",
+  " 3da:	23d6      	movs	r3, #214	; 0xd6\n",
+  " 3dc:	23d8      	movs	r3, #216	; 0xd8\n",
+  " 3de:	23da      	movs	r3, #218	; 0xda\n",
+  " 3e0:	23dc      	movs	r3, #220	; 0xdc\n",
+  " 3e2:	23de      	movs	r3, #222	; 0xde\n",
+  " 3e4:	23e0      	movs	r3, #224	; 0xe0\n",
+  " 3e6:	23e2      	movs	r3, #226	; 0xe2\n",
+  " 3e8:	23e4      	movs	r3, #228	; 0xe4\n",
+  " 3ea:	23e6      	movs	r3, #230	; 0xe6\n",
+  " 3ec:	23e8      	movs	r3, #232	; 0xe8\n",
+  " 3ee:	23ea      	movs	r3, #234	; 0xea\n",
+  " 3f0:	23ec      	movs	r3, #236	; 0xec\n",
+  " 3f2:	23ee      	movs	r3, #238	; 0xee\n",
+  " 3f4:	23f0      	movs	r3, #240	; 0xf0\n",
+  " 3f6:	23f2      	movs	r3, #242	; 0xf2\n",
+  " 3f8:	23f4      	movs	r3, #244	; 0xf4\n",
+  " 3fa:	23f6      	movs	r3, #246	; 0xf6\n",
+  " 3fc:	23f