Merge "Allow invoke-virtual-quick on interface types."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index a993251..1b70d59 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -27,6 +27,12 @@
 	dex/quick/arm/int_arm.cc \
 	dex/quick/arm/target_arm.cc \
 	dex/quick/arm/utility_arm.cc \
+	dex/quick/arm64/assemble_arm64.cc \
+	dex/quick/arm64/call_arm64.cc \
+	dex/quick/arm64/fp_arm64.cc \
+	dex/quick/arm64/int_arm64.cc \
+	dex/quick/arm64/target_arm64.cc \
+	dex/quick/arm64/utility_arm64.cc \
 	dex/quick/codegen_util.cc \
 	dex/quick/dex_file_method_inliner.cc \
 	dex/quick/dex_file_to_method_inliner_map.cc \
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 6bf0058..79a85db 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -102,8 +102,7 @@
       mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     case kArm64:
-      // TODO(Arm64): replace the generator below with a proper one.
-      mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+      mir_to_lir = Arm64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     case kMips:
       mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index c6d6295..452c8d7 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -22,6 +22,8 @@
 namespace art {
 
 /*
+ * TODO(Arm64): the comments below are outdated.
+ *
  * Runtime register usage conventions.
  *
  * r0-r3: Argument registers in both Dalvik and C/C++ conventions.
@@ -29,12 +31,12 @@
  *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
  *        registers.
  * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4   : (rARM_SUSPEND) is reserved (suspend check/debugger assist)
+ * r4   : (rA64_SUSPEND) is reserved (suspend check/debugger assist)
  * r5   : Callee save (promotion target)
  * r6   : Callee save (promotion target)
  * r7   : Callee save (promotion target)
  * r8   : Callee save (promotion target)
- * r9   : (rARM_SELF) is reserved (pointer to thread-local storage)
+ * r9   : (rA64_SELF) is reserved (pointer to thread-local storage)
  * r10  : Callee save (promotion target)
  * r11  : Callee save (promotion target)
  * r12  : Scratch, may be trashed by linkage stubs
@@ -93,452 +95,284 @@
  * +========================+
  */
 
+#if 1
+#define A64_PTR_SIZE 4
+#define A64_GET_INT_OFFS(offs) ((offs).Int32Value())
+#else
+// Not yet ready for this.
+#define A64_PTR_SIZE 8
+#define A64_GET_INT_OFFS(offs) ((offs).Int32Value())
+#endif
+
+#define A64_QUICK_ENTRYPOINT_OFFSET(name) QUICK_ENTRYPOINT_OFFSET(A64_PTR_SIZE, name)
+#define A64_QUICK_ENTRYPOINT_INT_OFFS(name) A64_GET_INT_OFFS(A64_QUICK_ENTRYPOINT_OFFSET(name))
+#define A64_THREAD_THIN_LOCK_ID_OFFSET A64_GET_INT_OFFS(Thread::ThinLockIdOffset<A64_PTR_SIZE>())
+#define A64_THREAD_EXCEPTION_INT_OFFS A64_GET_INT_OFFS(Thread::ExceptionOffset<A64_PTR_SIZE>())
+#define A64_THREAD_CARD_TABLE_INT_OFFS A64_GET_INT_OFFS(Thread::CardTableOffset<A64_PTR_SIZE>())
+#define A64_THREAD_STACK_END_INT_OFFS A64_GET_INT_OFFS(Thread::StackEndOffset<A64_PTR_SIZE>())
+#define A64_THREAD_SUSPEND_TRIGGER_OFFSET \
+  A64_GET_INT_OFFS(Thread::ThreadSuspendTriggerOffset<A64_PTR_SIZE>())
+typedef ThreadOffset<A64_PTR_SIZE> A64ThreadOffset;
+
+// Offset to distinguish FP regs.
+#define ARM_FP_REG_OFFSET 32
 // First FP callee save.
 #define ARM_FP_CALLEE_SAVE_BASE 16
 
+// Mask to strip off fp flags.
+#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET - 1)
+
+// Temporary macros, used to mark code which wants to distinguish betweek zr/sp.
+#define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
+#define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr)
+
 enum ArmResourceEncodingPos {
   kArmGPReg0   = 0,
-  kArmRegSP    = 13,
-  kArmRegLR    = 14,
-  kArmRegPC    = 15,
-  kArmFPReg0   = 16,
-  kArmFPReg16  = 32,
-  kArmRegEnd   = 48,
+  kArmRegLR    = 30,
+  kArmRegSP    = 31,
+  kArmFPReg0   = 32,
+  kArmRegEnd   = 64,
 };
 
-#define ENCODE_ARM_REG_LIST(N)      (static_cast<uint64_t>(N))
 #define ENCODE_ARM_REG_SP           (1ULL << kArmRegSP)
 #define ENCODE_ARM_REG_LR           (1ULL << kArmRegLR)
-#define ENCODE_ARM_REG_PC           (1ULL << kArmRegPC)
-#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16)
 
-enum ArmNativeRegisterPool {
-  r0           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
-  r1           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
-  r2           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
-  r3           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
-  rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
-  r5           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
-  r6           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
-  r7           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
-  r8           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  8,
-  rARM_SELF    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  9,
-  r10          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
-  r11          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
-  r12          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
-  r13sp        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
-  rARM_SP      = r13sp,
-  r14lr        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
-  rARM_LR      = r14lr,
-  r15pc        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
-  rARM_PC      = r15pc,
+#define IS_SIGNED_IMM(size, value) \
+  ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1)))
+#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
+#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value)
+#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value)
+#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
+#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
 
-  fr0          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  0,
-  fr1          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  1,
-  fr2          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  2,
-  fr3          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  3,
-  fr4          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  4,
-  fr5          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  5,
-  fr6          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  6,
-  fr7          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  7,
-  fr8          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  8,
-  fr9          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  9,
-  fr10         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
-  fr11         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
-  fr12         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
-  fr13         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
-  fr14         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
-  fr15         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
-  fr16         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
-  fr17         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
-  fr18         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
-  fr19         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
-  fr20         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
-  fr21         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
-  fr22         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
-  fr23         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
-  fr24         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
-  fr25         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
-  fr26         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
-  fr27         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
-  fr28         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
-  fr29         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
-  fr30         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
-  fr31         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
+// Quick macro used to define the registers.
+#define A64_REGISTER_CODE_LIST(R) \
+  R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7) \
+  R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15) \
+  R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \
+  R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
 
-  dr0          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  dr1          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
-  dr2          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  dr3          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
-  dr4          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  dr5          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
-  dr6          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  dr7          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
-  dr8          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  dr9          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
-  dr10         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  dr11         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
-  dr12         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  dr13         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
-  dr14         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-  dr15         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-#if 0
-  // Enable when def/use and runtime able to handle these.
-  dr16         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
-  dr17         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17,
-  dr18         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
-  dr19         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19,
-  dr20         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
-  dr21         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21,
-  dr22         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
-  dr23         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23,
-  dr24         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
-  dr25         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25,
-  dr26         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
-  dr27         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27,
-  dr28         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
-  dr29         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29,
-  dr30         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
-  dr31         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31,
-#endif
+// Registers (integer) values.
+// TODO(Arm64): for now we define rx##nr identically to rw##nr. We should rather define rx##nr as
+// a k64BitSolo. We should do this once the register allocator is ready.
+enum A64NativeRegisterPool {
+#  define A64_DEFINE_REGISTERS(nr) \
+    rw##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+    rx##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+    rf##nr = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | nr, \
+    rd##nr = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | nr,
+  A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS)
+#undef A64_DEFINE_REGISTERS
+
+  // TODO(Arm64): can we change the lines below such that rwzr != rwsp && rxzr != rsp?
+  //   This would be desirable to allow detecting usage-errors in the assembler.
+  rwzr = rw31,
+  rxzr = rx31,
+  rwsp = rw31,
+  rsp = rx31,
+  rA64_SUSPEND = rx4,
+  rA64_SELF = rx18,
+  rA64_SP = rx31,
+  rA64_LR = rx30
 };
 
-constexpr RegStorage rs_r0(RegStorage::kValid | r0);
-constexpr RegStorage rs_r1(RegStorage::kValid | r1);
-constexpr RegStorage rs_r2(RegStorage::kValid | r2);
-constexpr RegStorage rs_r3(RegStorage::kValid | r3);
-constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
-constexpr RegStorage rs_r5(RegStorage::kValid | r5);
-constexpr RegStorage rs_r6(RegStorage::kValid | r6);
-constexpr RegStorage rs_r7(RegStorage::kValid | r7);
-constexpr RegStorage rs_r8(RegStorage::kValid | r8);
-constexpr RegStorage rs_rARM_SELF(RegStorage::kValid | rARM_SELF);
-constexpr RegStorage rs_r10(RegStorage::kValid | r10);
-constexpr RegStorage rs_r11(RegStorage::kValid | r11);
-constexpr RegStorage rs_r12(RegStorage::kValid | r12);
-constexpr RegStorage rs_r13sp(RegStorage::kValid | r13sp);
-constexpr RegStorage rs_rARM_SP(RegStorage::kValid | rARM_SP);
-constexpr RegStorage rs_r14lr(RegStorage::kValid | r14lr);
-constexpr RegStorage rs_rARM_LR(RegStorage::kValid | rARM_LR);
-constexpr RegStorage rs_r15pc(RegStorage::kValid | r15pc);
-constexpr RegStorage rs_rARM_PC(RegStorage::kValid | rARM_PC);
-constexpr RegStorage rs_invalid(RegStorage::kInvalid);
+#define A64_DEFINE_REGSTORAGES(nr) \
+  constexpr RegStorage rs_w##nr(RegStorage::kValid | rw##nr); \
+  constexpr RegStorage rs_x##nr(RegStorage::kValid | rx##nr); \
+  constexpr RegStorage rs_f##nr(RegStorage::kValid | rf##nr); \
+  constexpr RegStorage rs_d##nr(RegStorage::kValid | rd##nr);
+A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES)
+#undef A64_DEFINE_REGSTORAGES
 
-constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
-constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
-constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
-constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
-constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
-constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
-constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
-constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
-constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
-constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
-constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
-constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
-constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
-constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
-constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
-constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
-constexpr RegStorage rs_fr16(RegStorage::kValid | fr16);
-constexpr RegStorage rs_fr17(RegStorage::kValid | fr17);
-constexpr RegStorage rs_fr18(RegStorage::kValid | fr18);
-constexpr RegStorage rs_fr19(RegStorage::kValid | fr19);
-constexpr RegStorage rs_fr20(RegStorage::kValid | fr20);
-constexpr RegStorage rs_fr21(RegStorage::kValid | fr21);
-constexpr RegStorage rs_fr22(RegStorage::kValid | fr22);
-constexpr RegStorage rs_fr23(RegStorage::kValid | fr23);
-constexpr RegStorage rs_fr24(RegStorage::kValid | fr24);
-constexpr RegStorage rs_fr25(RegStorage::kValid | fr25);
-constexpr RegStorage rs_fr26(RegStorage::kValid | fr26);
-constexpr RegStorage rs_fr27(RegStorage::kValid | fr27);
-constexpr RegStorage rs_fr28(RegStorage::kValid | fr28);
-constexpr RegStorage rs_fr29(RegStorage::kValid | fr29);
-constexpr RegStorage rs_fr30(RegStorage::kValid | fr30);
-constexpr RegStorage rs_fr31(RegStorage::kValid | fr31);
+constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
+constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
+constexpr RegStorage rs_rA64_SUSPEND(RegStorage::kValid | rA64_SUSPEND);
+constexpr RegStorage rs_rA64_SELF(RegStorage::kValid | rA64_SELF);
+constexpr RegStorage rs_rA64_SP(RegStorage::kValid | rA64_SP);
+constexpr RegStorage rs_rA64_LR(RegStorage::kValid | rA64_LR);
 
-constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
-constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
-constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
-constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
-constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
-constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
-constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
-constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
-constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
-constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
-constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
-constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
-constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
-constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
-constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
-constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
-#if 0
-constexpr RegStorage rs_dr16(RegStorage::kValid | dr16);
-constexpr RegStorage rs_dr17(RegStorage::kValid | dr17);
-constexpr RegStorage rs_dr18(RegStorage::kValid | dr18);
-constexpr RegStorage rs_dr19(RegStorage::kValid | dr19);
-constexpr RegStorage rs_dr20(RegStorage::kValid | dr20);
-constexpr RegStorage rs_dr21(RegStorage::kValid | dr21);
-constexpr RegStorage rs_dr22(RegStorage::kValid | dr22);
-constexpr RegStorage rs_dr23(RegStorage::kValid | dr23);
-constexpr RegStorage rs_dr24(RegStorage::kValid | dr24);
-constexpr RegStorage rs_dr25(RegStorage::kValid | dr25);
-constexpr RegStorage rs_dr26(RegStorage::kValid | dr26);
-constexpr RegStorage rs_dr27(RegStorage::kValid | dr27);
-constexpr RegStorage rs_dr28(RegStorage::kValid | dr28);
-constexpr RegStorage rs_dr29(RegStorage::kValid | dr29);
-constexpr RegStorage rs_dr30(RegStorage::kValid | dr30);
-constexpr RegStorage rs_dr31(RegStorage::kValid | dr31);
-#endif
+// RegisterLocation templates return values (following the hard-float calling convention).
+const RegLocation arm_loc_c_return =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_wide =
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_float =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_double =
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
 
-// RegisterLocation templates return values (r0, or r0/r1).
-const RegLocation arm_loc_c_return
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_float
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_double
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
+/**
+ * @brief Shift-type to be applied to a register via EncodeShift().
+ */
+enum A64ShiftEncodings {
+  kA64Lsl = 0x0,
+  kA64Lsr = 0x1,
+  kA64Asr = 0x2,
+  kA64Ror = 0x3
+};
 
-enum ArmShiftEncodings {
-  kArmLsl = 0x0,
-  kArmLsr = 0x1,
-  kArmAsr = 0x2,
-  kArmRor = 0x3
+/**
+ * @brief Extend-type to be applied to a register via EncodeExtend().
+ */
+enum A64RegExtEncodings {
+  kA64Uxtb = 0x0,
+  kA64Uxth = 0x1,
+  kA64Uxtw = 0x2,
+  kA64Uxtx = 0x3,
+  kA64Sxtb = 0x4,
+  kA64Sxth = 0x5,
+  kA64Sxtw = 0x6,
+  kA64Sxtx = 0x7
+};
+
+#define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0))
+
+/*
+ * The following enum defines the list of supported A64 instructions by the
+ * assembler. Their corresponding EncodingMap positions will be defined in
+ * assemble_arm64.cc.
+ */
+enum ArmOpcode {
+  kA64First = 0,
+  kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+  kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Add4rrro,      // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
+  kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Asr3rrd,       // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
+  kA64Asr3rrr,       // asr alias of "sbfm arg0, arg1, arg2, {#31/#63}".
+  kA64B2ct,          // b.cond [01010100] imm_19[23-5] [0] cond[3-0].
+  kA64Blr1x,         // blr [1101011000111111000000] rn[9-5] [00000].
+  kA64Br1x,          // br  [1101011000011111000000] rn[9-5] [00000].
+  kA64Brk1d,         // brk [11010100001] imm_16[20-5] [00000].
+  kA64B1t,           // b   [00010100] offset_26[25-0].
+  kA64Cbnz2rt,       // cbnz[00110101] imm_19[23-5] rt[4-0].
+  kA64Cbz2rt,        // cbz [00110100] imm_19[23-5] rt[4-0].
+  kA64Cmn3Rro,       // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmn3RdT,       // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+  kA64Cmp3Rro,       // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmp3RdT,       // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+  kA64Csel4rrrc,     // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
+  kA64Csinc4rrrc,    // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+  kA64Csneg4rrrc,    // csneg [s1011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+  kA64Dmb1B,         // dmb [11010101000000110011] CRm[11-8] [10111111].
+  kA64Eor3Rrl,       // eor [s10100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Eor4rrro,      // eor [s1001010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Extr4rrrd,     // extr[s00100111N0] rm[20-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Fabs2ff,       // fabs[000111100s100000110000] rn[9-5] rd[4-0].
+  kA64Fadd3fff,      // fadd[000111100s1] rm[20-16] [001010] rn[9-5] rd[4-0].
+  kA64Fcmp1f,        // fcmp[000111100s100000001000] rn[9-5] [01000].
+  kA64Fcmp2ff,       // fcmp[000111100s1] rm[20-16] [001000] rn[9-5] [00000].
+  kA64Fcvtzs2wf,     // fcvtzs [000111100s111000000000] rn[9-5] rd[4-0].
+  kA64Fcvtzs2xf,     // fcvtzs [100111100s111000000000] rn[9-5] rd[4-0].
+  kA64Fcvt2Ss,       // fcvt   [0001111000100010110000] rn[9-5] rd[4-0].
+  kA64Fcvt2sS,       // fcvt   [0001111001100010010000] rn[9-5] rd[4-0].
+  kA64Fdiv3fff,      // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0].
+  kA64Fmov2ff,       // fmov[000111100s100000010000] rn[9-5] rd[4-0].
+  kA64Fmov2fI,       // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0].
+  kA64Fmov2sw,       // fmov[0001111000100111000000] rn[9-5] rd[4-0].
+  kA64Fmov2Sx,       // fmov[1001111001100111000000] rn[9-5] rd[4-0].
+  kA64Fmov2ws,       // fmov[0001111001101110000000] rn[9-5] rd[4-0].
+  kA64Fmov2xS,       // fmov[1001111001101111000000] rn[9-5] rd[4-0].
+  kA64Fmul3fff,      // fmul[000111100s1] rm[20-16] [000010] rn[9-5] rd[4-0].
+  kA64Fneg2ff,       // fneg[000111100s100001010000] rn[9-5] rd[4-0].
+  kA64Frintz2ff,     // frintz [000111100s100101110000] rn[9-5] rd[4-0].
+  kA64Fsqrt2ff,      // fsqrt[000111100s100001110000] rn[9-5] rd[4-0].
+  kA64Fsub3fff,      // fsub[000111100s1] rm[20-16] [001110] rn[9-5] rd[4-0].
+  kA64Ldrb3wXd,      // ldrb[0011100101] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrb3wXx,      // ldrb[00111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrsb3rXd,     // ldrsb[001110011s] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrsb3rXx,     // ldrsb[0011 1000 1s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrh3wXF,      // ldrh[0111100101] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrh4wXxd,     // ldrh[01111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrsh3rXF,     // ldrsh[011110011s] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrsh4rXxd,    // ldrsh[011110001s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]
+  kA64Ldr2fp,        // ldr [0s011100] imm_19[23-5] rt[4-0].
+  kA64Ldr2rp,        // ldr [0s011000] imm_19[23-5] rt[4-0].
+  kA64Ldr3fXD,       // ldr [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldr3rXD,       // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Ldr4fXxG,      // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldr4rXxG,      // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64LdrPost3rXd,   // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Ldp4rrXD,      // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64LdpPost4rrXD,  // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Ldur3fXd,      // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Ldur3rXd,      // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Ldxr2rX,       // ldxr[1s00100001011111011111] rn[9-5] rt[4-0].
+  kA64Lsl3rrr,       // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0].
+  kA64Lsr3rrd,       // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}".
+  kA64Lsr3rrr,       // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0].
+  kA64Movk3rdM,      // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Movn3rdM,      // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Movz3rdM,      // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Mov2rr,        // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0].
+  kA64Mvn2rr,        // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0].
+  kA64Mul3rrr,       // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0].
+  kA64Neg3rro,       // neg alias of "sub arg0, rzr, arg1, arg2".
+  kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Ret,           // ret [11010110010111110000001111000000].
+  kA64Rev2rr,        // rev [s10110101100000000001x] rn[9-5] rd[4-0].
+  kA64Rev162rr,      // rev16[s101101011000000000001] rn[9-5] rd[4-0].
+  kA64Ror3rrr,       // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0].
+  kA64Sbc3rrr,       // sbc [s0011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+  kA64Sbfm4rrdd,     // sbfm[0001001100] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Scvtf2fw,      // scvtf  [000111100s100010000000] rn[9-5] rd[4-0].
+  kA64Scvtf2fx,      // scvtf  [100111100s100010000000] rn[9-5] rd[4-0].
+  kA64Sdiv3rrr,      // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
+  kA64Smaddl4xwwx,   // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
+  kA64Stp4rrXD,      // stp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64StpPost4rrXD,  // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64StpPre4rrXD,   // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Str3fXD,       // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Str4fXxG,      // str [1s111100001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Str3rXD,       // str [1s11100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Str4rXxG,      // str [1s111000001] rm[20-16] option[15-13] S[12-12] [10] rn[9-5] rt[4-0].
+  kA64Strb3wXd,      // strb[0011100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Strb3wXx,      // strb[00111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Strh3wXF,      // strh[0111100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Strh4wXxd,     // strh[01111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64StrPost3rXd,   // str [1s111000000] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Stur3fXd,      // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Stur3rXd,      // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
+  kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
+  kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Last,
+  kA64NotWide = 0,   // Flag used to select the first instruction variant.
+  kA64Wide = 0x1000  // Flag used to select the second instruction variant.
 };
 
 /*
- * The following enum defines the list of supported Thumb instructions by the
- * assembler. Their corresponding EncodingMap positions will be defined in
- * Assemble.cc.
+ * The A64 instruction set provides two variants for many instructions. For example, "mov wN, wM"
+ * and "mov xN, xM" or - for floating point instructions - "mov sN, sM" and "mov dN, dM".
+ * It definitely makes sense to exploit this symmetries of the instruction set. We do this via the
+ * WIDE, UNWIDE macros. For opcodes that allow it, the wide variant can be obtained by applying the
+ * WIDE macro to the non-wide opcode. E.g. WIDE(kA64Sub4RRdT).
  */
-enum ArmOpcode {
-  kArmFirst = 0,
-  kArm16BitData = kArmFirst,  // DATA   [0] rd[15..0].
-  kThumbAdcRR,       // adc   [0100000101] rm[5..3] rd[2..0].
-  kThumbAddRRI3,     // add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0].
-  kThumbAddRI8,      // add(2)  [00110] rd[10..8] imm_8[7..0].
-  kThumbAddRRR,      // add(3)  [0001100] rm[8..6] rn[5..3] rd[2..0].
-  kThumbAddRRLH,     // add(4)  [01000100] H12[01] rm[5..3] rd[2..0].
-  kThumbAddRRHL,     // add(4)  [01001000] H12[10] rm[5..3] rd[2..0].
-  kThumbAddRRHH,     // add(4)  [01001100] H12[11] rm[5..3] rd[2..0].
-  kThumbAddPcRel,    // add(5)  [10100] rd[10..8] imm_8[7..0].
-  kThumbAddSpRel,    // add(6)  [10101] rd[10..8] imm_8[7..0].
-  kThumbAddSpI7,     // add(7)  [101100000] imm_7[6..0].
-  kThumbAndRR,       // and   [0100000000] rm[5..3] rd[2..0].
-  kThumbAsrRRI5,     // asr(1)  [00010] imm_5[10..6] rm[5..3] rd[2..0].
-  kThumbAsrRR,       // asr(2)  [0100000100] rs[5..3] rd[2..0].
-  kThumbBCond,       // b(1)  [1101] cond[11..8] offset_8[7..0].
-  kThumbBUncond,     // b(2)  [11100] offset_11[10..0].
-  kThumbBicRR,       // bic   [0100001110] rm[5..3] rd[2..0].
-  kThumbBkpt,        // bkpt  [10111110] imm_8[7..0].
-  kThumbBlx1,        // blx(1)  [111] H[10] offset_11[10..0].
-  kThumbBlx2,        // blx(1)  [111] H[01] offset_11[10..0].
-  kThumbBl1,         // blx(1)  [111] H[10] offset_11[10..0].
-  kThumbBl2,         // blx(1)  [111] H[11] offset_11[10..0].
-  kThumbBlxR,        // blx(2)  [010001111] rm[6..3] [000].
-  kThumbBx,          // bx    [010001110] H2[6..6] rm[5..3] SBZ[000].
-  kThumbCmnRR,       // cmn   [0100001011] rm[5..3] rd[2..0].
-  kThumbCmpRI8,      // cmp(1)  [00101] rn[10..8] imm_8[7..0].
-  kThumbCmpRR,       // cmp(2)  [0100001010] rm[5..3] rd[2..0].
-  kThumbCmpLH,       // cmp(3)  [01000101] H12[01] rm[5..3] rd[2..0].
-  kThumbCmpHL,       // cmp(3)  [01000110] H12[10] rm[5..3] rd[2..0].
-  kThumbCmpHH,       // cmp(3)  [01000111] H12[11] rm[5..3] rd[2..0].
-  kThumbEorRR,       // eor   [0100000001] rm[5..3] rd[2..0].
-  kThumbLdmia,       // ldmia   [11001] rn[10..8] reglist [7..0].
-  kThumbLdrRRI5,     // ldr(1)  [01101] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbLdrRRR,      // ldr(2)  [0101100] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrPcRel,    // ldr(3)  [01001] rd[10..8] imm_8[7..0].
-  kThumbLdrSpRel,    // ldr(4)  [10011] rd[10..8] imm_8[7..0].
-  kThumbLdrbRRI5,    // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbLdrbRRR,     // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrhRRI5,    // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbLdrhRRR,     // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrsbRRR,    // ldrsb   [0101011] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLdrshRRR,    // ldrsh   [0101111] rm[8..6] rn[5..3] rd[2..0].
-  kThumbLslRRI5,     // lsl(1)  [00000] imm_5[10..6] rm[5..3] rd[2..0].
-  kThumbLslRR,       // lsl(2)  [0100000010] rs[5..3] rd[2..0].
-  kThumbLsrRRI5,     // lsr(1)  [00001] imm_5[10..6] rm[5..3] rd[2..0].
-  kThumbLsrRR,       // lsr(2)  [0100000011] rs[5..3] rd[2..0].
-  kThumbMovImm,      // mov(1)  [00100] rd[10..8] imm_8[7..0].
-  kThumbMovRR,       // mov(2)  [0001110000] rn[5..3] rd[2..0].
-  kThumbMovRR_H2H,   // mov(3)  [01000111] H12[11] rm[5..3] rd[2..0].
-  kThumbMovRR_H2L,   // mov(3)  [01000110] H12[01] rm[5..3] rd[2..0].
-  kThumbMovRR_L2H,   // mov(3)  [01000101] H12[10] rm[5..3] rd[2..0].
-  kThumbMul,         // mul   [0100001101] rm[5..3] rd[2..0].
-  kThumbMvn,         // mvn   [0100001111] rm[5..3] rd[2..0].
-  kThumbNeg,         // neg   [0100001001] rm[5..3] rd[2..0].
-  kThumbOrr,         // orr   [0100001100] rm[5..3] rd[2..0].
-  kThumbPop,         // pop   [1011110] r[8..8] rl[7..0].
-  kThumbPush,        // push  [1011010] r[8..8] rl[7..0].
-  kThumbRev,         // rev   [1011101000] rm[5..3] rd[2..0]
-  kThumbRevsh,       // revsh   [1011101011] rm[5..3] rd[2..0]
-  kThumbRorRR,       // ror   [0100000111] rs[5..3] rd[2..0].
-  kThumbSbc,         // sbc   [0100000110] rm[5..3] rd[2..0].
-  kThumbStmia,       // stmia   [11000] rn[10..8] reglist [7.. 0].
-  kThumbStrRRI5,     // str(1)  [01100] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbStrRRR,      // str(2)  [0101000] rm[8..6] rn[5..3] rd[2..0].
-  kThumbStrSpRel,    // str(3)  [10010] rd[10..8] imm_8[7..0].
-  kThumbStrbRRI5,    // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbStrbRRR,     // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0].
-  kThumbStrhRRI5,    // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0].
-  kThumbStrhRRR,     // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0].
-  kThumbSubRRI3,     // sub(1)  [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/
-  kThumbSubRI8,      // sub(2)  [00111] rd[10..8] imm_8[7..0].
-  kThumbSubRRR,      // sub(3)  [0001101] rm[8..6] rn[5..3] rd[2..0].
-  kThumbSubSpI7,     // sub(4)  [101100001] imm_7[6..0].
-  kThumbSwi,         // swi   [11011111] imm_8[7..0].
-  kThumbTst,         // tst   [0100001000] rm[5..3] rn[2..0].
-  kThumb2Vldrs,      // vldr low  sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0].
-  kThumb2Vldrd,      // vldr low  dx [111011011001] rn[19..16] rd[15-12] [1011] imm_8[7..0].
-  kThumb2Vmuls,      // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10100000] rm[3..0].
-  kThumb2Vmuld,      // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10110000] rm[3..0].
-  kThumb2Vstrs,      // vstr low  sx [111011011000] rn[19..16] rd[15-12] [1010] imm_8[7..0].
-  kThumb2Vstrd,      // vstr low  dx [111011011000] rn[19..16] rd[15-12] [1011] imm_8[7..0].
-  kThumb2Vsubs,      // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100040] rm[3..0].
-  kThumb2Vsubd,      // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110040] rm[3..0].
-  kThumb2Vadds,      // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100000] rm[3..0].
-  kThumb2Vaddd,      // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0].
-  kThumb2Vdivs,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0].
-  kThumb2Vdivd,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0].
-  kThumb2VmlaF64,    // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0].
-  kThumb2VcvtIF,     // vcvt.F32.S32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0].
-  kThumb2VcvtFI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0].
-  kThumb2VcvtDI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0].
-  kThumb2VcvtFd,     // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0].
-  kThumb2VcvtDF,     // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
-  kThumb2VcvtF64S32,  // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
-  kThumb2VcvtF64U32,  // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0].
-  kThumb2Vsqrts,     // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
-  kThumb2Vsqrtd,     // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
-  kThumb2MovI8M,     // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
-  kThumb2MovImm16,   // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
-  kThumb2StrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
-  kThumb2LdrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
-  kThumb2StrRRI8Predec,  // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0].
-  kThumb2LdrRRI8Predec,  // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0].
-  kThumb2Cbnz,       // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0].
-  kThumb2Cbz,        // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0].
-  kThumb2AddRRI12,   // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2MovRR,      // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0].
-  kThumb2Vmovs,      // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0].
-  kThumb2Vmovd,      // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0].
-  kThumb2Ldmia,      // ldmia  [111010001001] rn[19..16] mask[15..0].
-  kThumb2Stmia,      // stmia  [111010001000] rn[19..16] mask[15..0].
-  kThumb2AddRRR,     // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2SubRRR,     // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2SbcRRR,     // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2CmpRR,      // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2MvnI8M,     // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
-  kThumb2Sel,        // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0].
-  kThumb2Ubfx,       // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
-  kThumb2Sbfx,       // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
-  kThumb2LdrRRR,     // ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrhRRR,    // ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrshRRR,   // ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrbRRR,    // ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrsbRRR,   // ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2StrRRR,     // str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2StrhRRR,    // str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2StrbRRR,    // str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
-  kThumb2LdrhRRI12,  // ldrh rt,[rn,#imm12] [111110001011] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2LdrshRRI12,  // ldrsh rt,[rn,#imm12] [111110011011] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2LdrbRRI12,  // ldrb rt,[rn,#imm12] [111110001001] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2LdrsbRRI12,  // ldrsb rt,[rn,#imm12] [111110011001] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2StrhRRI12,  // strh rt,[rn,#imm12] [111110001010] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2StrbRRI12,  // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
-  kThumb2Pop,        // pop   [1110100010111101] list[15-0]*/
-  kThumb2Push,       // push  [1110100100101101] list[15-0]*/
-  kThumb2CmpRI8M,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
-  kThumb2CmnRI8M,    // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0].
-  kThumb2AdcRRR,     // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2AndRRR,     // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2BicRRR,     // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2CmnRR,      // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2EorRRR,     // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2MulRRR,     // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2SdivRRR,    // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
-  kThumb2UdivRRR,    // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
-  kThumb2MnvRR,      // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
-  kThumb2RsubRRI8M,  // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2NegRR,      // actually rsub rd, rn, #0.
-  kThumb2OrrRRR,     // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2TstRR,      // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2LslRRR,     // lsl [111110100000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2LsrRRR,     // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2AsrRRR,     // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2RorRRR,     // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
-  kThumb2LslRRI5,    // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0].
-  kThumb2LsrRRI5,    // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
-  kThumb2AsrRRI5,    // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
-  kThumb2RorRRI5,    // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
-  kThumb2BicRRI8M,   // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2AndRRI8M,   // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2OrrRRI8M,   // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2EorRRI8M,   // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2AddRRI8M,   // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2AdcRRI8M,   // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2SubRRI8M,   // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2SbcRRI8M,   // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2RevRR,      // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
-  kThumb2RevshRR,    // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
-  kThumb2It,         // it [10111111] firstcond[7-4] mask[3-0].
-  kThumb2Fmstat,     // fmstat [11101110111100011111101000010000].
-  kThumb2Vcmpd,      // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0].
-  kThumb2Vcmps,      // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0].
-  kThumb2LdrPcRel12,  // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0].
-  kThumb2BCond,      // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0].
-  kThumb2Fmrs,       // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000].
-  kThumb2Fmsr,       // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000].
-  kThumb2Fmrrd,      // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
-  kThumb2Fmdrr,      // vmov [111011000101] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
-  kThumb2Vabsd,      // vabs.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
-  kThumb2Vabss,      // vabs.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
-  kThumb2Vnegd,      // vneg.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
-  kThumb2Vnegs,      // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
-  kThumb2Vmovs_IMM8,  // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0].
-  kThumb2Vmovd_IMM8,  // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0].
-  kThumb2Mla,        // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0].
-  kThumb2Umull,      // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
-  kThumb2Ldrex,      // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0].
-  kThumb2Ldrexd,     // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111].
-  kThumb2Strex,      // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0].
-  kThumb2Strexd,     // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0].
-  kThumb2Clrex,      // clrex [11110011101111111000111100101111].
-  kThumb2Bfi,        // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
-  kThumb2Bfc,        // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
-  kThumb2Dmb,        // dmb [1111001110111111100011110101] option[3-0].
-  kThumb2LdrPcReln12,  // ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0].
-  kThumb2Stm,        // stm <list> [111010010000] rn[19-16] 000 rl[12-0].
-  kThumbUndefined,   // undefined [11011110xxxxxxxx].
-  kThumb2VPopCS,     // vpop <list of callee save fp singles (s16+).
-  kThumb2VPushCS,    // vpush <list callee save fp singles (s16+).
-  kThumb2Vldms,      // vldms rd, <list>.
-  kThumb2Vstms,      // vstms rd, <list>.
-  kThumb2BUncond,    // b <label>.
-  kThumb2MovImm16H,  // similar to kThumb2MovImm16, but target high hw.
-  kThumb2AddPCR,     // Thumb2 2-operand add with hard-coded PC target.
-  kThumb2Adr,        // Special purpose encoding of ADR for switch tables.
-  kThumb2MovImm16LST,  // Special purpose version for switch table use.
-  kThumb2MovImm16HST,  // Special purpose version for switch table use.
-  kThumb2LdmiaWB,    // ldmia  [111010011001[ rn[19..16] mask[15..0].
-  kThumb2OrrRRRs,    // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2Push1,      // t3 encoding of push.
-  kThumb2Pop1,       // t3 encoding of pop.
-  kThumb2RsubRRR,    // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
-  kThumb2Smull,      // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
-  kThumb2LdrdPcRel8,  // ldrd rt, rt2, pc +-/1024.
-  kThumb2LdrdI8,     // ldrd rt, rt2, [rn +-/1024].
-  kThumb2StrdI8,     // strd rt, rt2, [rn +-/1024].
-  kArmLast,
-};
+
+// Return the wide and no-wide variants of the given opcode.
+#define WIDE(op) ((ArmOpcode)((op) | kA64Wide))
+#define UNWIDE(op) ((ArmOpcode)((op) & ~kA64Wide))
+
+// Whether the given opcode is wide.
+#define IS_WIDE(op) (((op) & kA64Wide) != 0)
+
+/*
+ * Floating point variants. These are just aliases of the macros above which we use for floating
+ * point instructions, just for readibility reasons.
+ * TODO(Arm64): should we remove these and use the original macros?
+ */
+#define FWIDE WIDE
+#define FUNWIDE UNWIDE
+#define IS_FWIDE IS_WIDE
+
+#define OP_KIND_UNWIDE(opcode) (opcode)
+#define OP_KIND_IS_WIDE(opcode) (false)
 
 enum ArmOpDmbOptions {
   kSY = 0xf,
@@ -551,40 +385,63 @@
 
 // Instruction assembly field_loc kind.
 enum ArmEncodingKind {
-  kFmtUnused,    // Unused field and marks end of formats.
+  // All the formats below are encoded in the same way (as a kFmtBitBlt).
+  // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
+  kFmtRegW = 0,  // Word register (w) or wzr.
+  kFmtRegX,      // Extended word register (x) or xzr.
+  kFmtRegR,      // Register with same width as the instruction or zr.
+  kFmtRegWOrSp,  // Word register (w) or wsp.
+  kFmtRegXOrSp,  // Extended word register (x) or sp.
+  kFmtRegROrSp,  // Register with same width as the instruction or sp.
+  kFmtRegS,      // Single FP reg.
+  kFmtRegD,      // Double FP reg.
+  kFmtRegF,      // Single/double FP reg depending on the instruction width.
   kFmtBitBlt,    // Bit string using end/start.
-  kFmtDfp,       // Double FP reg.
-  kFmtSfp,       // Single FP reg.
-  kFmtModImm,    // Shifted 8-bit immed using [26,14..12,7..0].
-  kFmtImm16,     // Zero-extended immed using [26,19..16,14..12,7..0].
-  kFmtImm6,      // Encoded branch target using [9,7..3]0.
-  kFmtImm12,     // Zero-extended immediate using [26,14..12,7..0].
-  kFmtShift,     // Shift descriptor, [14..12,7..4].
-  kFmtLsb,       // least significant bit using [14..12][7..6].
-  kFmtBWidth,    // bit-field width, encoded as width-1.
-  kFmtShift5,    // Shift count, [14..12,7..6].
-  kFmtBrOffset,  // Signed extended [26,11,13,21-16,10-0]:0.
-  kFmtFPImm,     // Encoded floating point immediate.
-  kFmtOff24,     // 24-bit Thumb2 unconditional branch encoding.
+
+  // Less likely formats.
+  kFmtUnused,    // Unused field and marks end of formats.
+  kFmtImm21,     // Sign-extended immediate using [23..5,30..29].
+  kFmtShift,     // Register shift, 9-bit at [23..21, 15..10]..
+  kFmtExtend,    // Register extend, 9-bit at [23..21, 15..10].
   kFmtSkip,      // Unused field, but continue to next.
 };
 
-// Struct used to define the snippet positions for each Thumb opcode.
+// Struct used to define the snippet positions for each A64 opcode.
 struct ArmEncodingMap {
-  uint32_t skeleton;
+  uint32_t wskeleton;
+  uint32_t xskeleton;
   struct {
     ArmEncodingKind kind;
-    int end;   // end for kFmtBitBlt, 1-bit slice end for FP regs.
-    int start;  // start for kFmtBitBlt, 4-bit slice end for FP regs.
+    int end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
+    int start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
   } field_loc[4];
-  ArmOpcode opcode;
+  ArmOpcode opcode;  // can be WIDE()-ned to indicate it has a wide variant.
   uint64_t flags;
   const char* name;
   const char* fmt;
-  int size;   // Note: size is in bytes.
+  int size;          // Note: size is in bytes.
   FixupKind fixup;
 };
 
+#if 0
+// TODO(Arm64): try the following alternative, which fits exactly in one cache line (64 bytes).
+struct ArmEncodingMap {
+  uint32_t wskeleton;
+  uint32_t xskeleton;
+  uint64_t flags;
+  const char* name;
+  const char* fmt;
+  struct {
+    uint8_t kind;
+    int8_t end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
+    int8_t start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
+  } field_loc[4];
+  uint32_t fixup;
+  uint32_t opcode;         // can be WIDE()-ned to indicate it has a wide variant.
+  uint32_t padding[3];
+};
+#endif
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index e79ebad..8accd0a 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -20,26 +20,47 @@
 
 namespace art {
 
+// The macros below are exclusively used in the encoding map.
+
+// Most generic way of providing two variants for one instructions.
+#define CUSTOM_VARIANTS(variant1, variant2) variant1, variant2
+
+// Used for instructions which do not have a wide variant.
+#define NO_VARIANTS(variant) \
+  CUSTOM_VARIANTS(variant, 0)
+
+// Used for instructions which have a wide variant with the sf bit set to 1.
+#define SF_VARIANTS(sf0_skeleton) \
+  CUSTOM_VARIANTS(sf0_skeleton, (sf0_skeleton | 0x80000000))
+
+// Used for instructions which have a wide variant with the size bits set to either x0 or x1.
+#define SIZE_VARIANTS(sizex0_skeleton) \
+  CUSTOM_VARIANTS(sizex0_skeleton, (sizex0_skeleton | 0x40000000))
+
+// Used for instructions which have a wide variant with the sf and n bits set to 1.
+#define SF_N_VARIANTS(sf0_n0_skeleton) \
+  CUSTOM_VARIANTS(sf0_n0_skeleton, (sf0_n0_skeleton | 0x80400000))
+
+// Used for FP instructions which have a single and double precision variants, with he type bits set
+// to either 00 or 01.
+#define FLOAT_VARIANTS(type00_skeleton) \
+  CUSTOM_VARIANTS(type00_skeleton, (type00_skeleton | 0x00400000))
+
 /*
  * opcode: ArmOpcode enum
- * skeleton: pre-designated bit-pattern for this opcode
- * k0: key to applying ds/de
- * ds: dest start bit position
- * de: dest end bit position
- * k1: key to applying s1s/s1e
- * s1s: src1 start bit position
- * s1e: src1 end bit position
- * k2: key to applying s2s/s2e
- * s2s: src2 start bit position
- * s2e: src2 end bit position
- * operands: number of operands (for sanity check purposes)
+ * variants: instruction skeletons supplied via CUSTOM_VARIANTS or derived macros.
+ * a{n}k: key to applying argument {n}    \
+ * a{n}s: argument {n} start bit position | n = 0, 1, 2, 3
+ * a{n}e: argument {n} end bit position   /
+ * flags: instruction attributes (used in optimization)
  * name: mnemonic name
  * fmt: for pretty-printing
+ * fixup: used for second-pass fixes (e.g. adresses fixups in branch instructions).
  */
-#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
-                     k3, k3s, k3e, flags, name, fmt, size, fixup) \
-        {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
-                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup}
+#define ENCODING_MAP(opcode, variants, a0k, a0s, a0e, a1k, a1s, a1e, a2k, a2s, a2e, \
+                     a3k, a3s, a3e, flags, name, fmt, fixup) \
+        {variants, {{a0k, a0s, a0e}, {a1k, a1s, a1e}, {a2k, a2s, a2e}, \
+                    {a3k, a3s, a3e}}, opcode, flags, name, fmt, 4, fixup}
 
 /* Instruction dump string format keys: !pf, where "!" is the start
  * of the key, "p" is which numeric operand to use and "f" is the
@@ -52,989 +73,475 @@
  *     3 -> operands[3] (extra)
  *
  * [f]ormats:
- *     h -> 4-digit hex
  *     d -> decimal
+ *     D -> decimal*4 or decimal*8 depending on the instruction width
  *     E -> decimal*4
  *     F -> decimal*2
- *     c -> branch condition (beq, bne, etc.)
+ *     G -> ", lsl #2" or ", lsl #3" depending on the instruction width
+ *     c -> branch condition (eq, ne, etc.)
  *     t -> pc-relative target
- *     u -> 1st half of bl[x] target
- *     v -> 2nd half ob bl[x] target
- *     R -> register list
+ *     p -> pc-relative address
  *     s -> single precision floating point register
  *     S -> double precision floating point register
- *     m -> Thumb2 modified immediate
- *     n -> complimented Thumb2 modified immediate
- *     M -> Thumb2 16-bit zero-extended immediate
- *     b -> 4-digit binary
+ *     f -> single or double precision register (depending on instruction width)
+ *     I -> 8-bit immediate floating point number
+ *     l -> logical immediate
+ *     M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
  *     B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
  *     H -> operand shift
- *     C -> core register name
- *     P -> fp cs register list (base of s16)
- *     Q -> fp cs register list (base of s0)
+ *     T -> register shift (either ", lsl #0" or ", lsl #12")
+ *     e -> register extend (e.g. uxtb #1)
+ *     o -> register shift (e.g. lsl #1) for Word registers
+ *     w -> word (32-bit) register wn, or wzr
+ *     W -> word (32-bit) register wn, or wsp
+ *     x -> extended (64-bit) register xn, or xzr
+ *     X -> extended (64-bit) register xn, or sp
+ *     r -> register with same width as instruction, r31 -> wzr, xzr
+ *     R -> register with same width as instruction, r31 -> wsp, sp
  *
  *  [!] escape.  To insert "!", use "!!"
  */
-/* NOTE: must be kept in sync with enum ArmOpcode from LIR.h */
-const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kArmLast] = {
-    ENCODING_MAP(kArm16BitData,    0x0000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone),
-    ENCODING_MAP(kThumbAdcRR,        0x4140,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRI3,      0x1c00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRI8,       0x3000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "adds", "!0C, !0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRR,       0x1800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRLH,     0x4440,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRHL,     0x4480,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddRRHH,     0x44c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddPcRel,    0xa000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "add", "!0C, pc, #!1E", 2, kFixupLoad),
-    ENCODING_MAP(kThumbAddSpRel,    0xa800,
-                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "add", "!0C, sp, #!2E", 2, kFixupNone),
-    ENCODING_MAP(kThumbAddSpI7,      0xb000,
-                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "add", "sp, #!0d*4", 2, kFixupNone),
-    ENCODING_MAP(kThumbAndRR,        0x4000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "ands", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbAsrRRI5,      0x1000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "asrs", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbAsrRR,        0x4100,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "asrs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbBCond,        0xd000,
-                 kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
+/* NOTE: must be kept in sync with enum ArmOpcode from arm64_lir.h */
+const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
+    ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "adc", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4RRdT), SF_VARIANTS(0x11000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+                 "add", "!0R, !1R, #!2d!3T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4rrro), SF_VARIANTS(0x0b000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
+                 "add", "!0r, !1r, !2r!3o", kFixupNone),
+    // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
+    //   fixups and contains information to identify the adr label.
+    ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
+                 kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
+                 "adr", "!0x, #!1d", kFixupAdr),
+    ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "and", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64And4rrro), SF_VARIANTS(0x0a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "and", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Asr3rrd), CUSTOM_VARIANTS(0x13007c00, 0x9340fc00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "asr", "!0r, !1r, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Asr3rrr), SF_VARIANTS(0x1ac02800),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "asr", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(kA64B2ct, NO_VARIANTS(0x54000000),
+                 kFmtBitBlt, 3, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
-                 NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch),
-    ENCODING_MAP(kThumbBUncond,      0xe000,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "b", "!0t", 2, kFixupT1Branch),
-    ENCODING_MAP(kThumbBicRR,        0x4380,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "bics", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbBkpt,          0xbe00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "bkpt", "!0d", 2, kFixupNone),
-    ENCODING_MAP(kThumbBlx1,         0xf000,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1),
-    ENCODING_MAP(kThumbBlx2,         0xe800,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel),
-    ENCODING_MAP(kThumbBl1,          0xf000,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
-                 "bl_1", "!0u", 2, kFixupBl1),
-    ENCODING_MAP(kThumbBl2,          0xf800,
-                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
-                 "bl_2", "!0v", 2, kFixupLabel),
-    ENCODING_MAP(kThumbBlxR,         0x4780,
-                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 NEEDS_FIXUP, "b.!0c", "!1t", kFixupCondBranch),
+    ENCODING_MAP(kA64Blr1x, NO_VARIANTS(0xd63f0000),
+                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
-                 "blx", "!0C", 2, kFixupNone),
-    ENCODING_MAP(kThumbBx,            0x4700,
-                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 "blr", "!0x", kFixupNone),
+    ENCODING_MAP(kA64Br1x, NO_VARIANTS(0xd61f0000),
+                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
+                 "br", "!0x", kFixupNone),
+    ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000),
+                 kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "bx", "!0C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmnRR,        0x42c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmn", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpRI8,       0x2800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpRR,        0x4280,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpLH,        0x4540,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpHL,        0x4580,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbCmpHH,        0x45c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbEorRR,        0x4040,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 "brk", "!0d", kFixupNone),
+    ENCODING_MAP(kA64B1t, NO_VARIANTS(0x14000000),
+                 kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
+                 "b", "!0t", kFixupT1Branch),
+    ENCODING_MAP(WIDE(kA64Cbnz2rt), SF_VARIANTS(0x35000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "eors", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdmia,         0xc800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+                 "cbnz", "!0r, !1t", kFixupCBxZ),
+    ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrRRI5,      0x6800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrRRR,       0x5800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrPcRel,    0x4800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
-                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
-    ENCODING_MAP(kThumbLdrSpRel,    0x9800,
-                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
-                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrbRRI5,     0x7800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrbRRR,      0x5c00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrhRRI5,     0x8800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrhRRR,      0x5a00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrsbRRR,     0x5600,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLdrshRRR,     0x5e00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbLslRRI5,      0x0000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsls", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbLslRR,        0x4080,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsls", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbLsrRRI5,      0x0800,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsrs", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbLsrRR,        0x40c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsrs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovImm,       0x2000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | SETS_CCODES,
-                 "movs", "!0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR,        0x1c00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "movs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR_H2H,    0x46c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR_H2L,    0x4640,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMovRR_L2H,    0x4680,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMul,           0x4340,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "muls", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbMvn,           0x43c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "mvns", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbNeg,           0x4240,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "negs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbOrr,           0x4300,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "orrs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbPop,           0xbc00,
-                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD, "pop", "<!0R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbPush,          0xb400,
-                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE, "push", "<!0R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbRev,           0xba00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1,
-                 "rev", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbRevsh,         0xbac0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1,
-                 "rev", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbRorRR,        0x41c0,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "rors", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbSbc,           0x4180,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C", 2, kFixupNone),
-    ENCODING_MAP(kThumbStmia,         0xc000,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrRRI5,      0x6000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrRRR,       0x5000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "str", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrSpRel,    0x9000,
-                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
-                 | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrbRRI5,     0x7000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strb", "!0C, [!1C, #!2d]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrbRRR,      0x5400,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrhRRI5,     0x8000,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strh", "!0C, [!1C, #!2F]", 2, kFixupNone),
-    ENCODING_MAP(kThumbStrhRRR,      0x5200,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0C, [!1C, !2C]", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubRRI3,      0x1e00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2d", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubRI8,       0x3800,
-                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "subs", "!0C, #!1d", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubRRR,       0x1a00,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C", 2, kFixupNone),
-    ENCODING_MAP(kThumbSubSpI7,      0xb080,
-                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "sub", "sp, #!0d*4", 2, kFixupNone),
-    ENCODING_MAP(kThumbSwi,           0xdf00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "swi", "!0d", 2, kFixupNone),
-    ENCODING_MAP(kThumbTst,           0x4200,
-                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C", 2, kFixupNone),
-    /*
-     * Note: The encoding map entries for vldrd and vldrs include REG_DEF_LR, even though
-     * these instructions don't define lr.  The reason is that these instructions
-     * are used for loading values from the literal pool, and the displacement may be found
-     * to be insuffient at assembly time.  In that case, we need to materialize a new base
-     * register - and will use lr as the temp register.  This works because lr is used as
-     * a temp register in very limited situations, and never in conjunction with a floating
-     * point constant load.  However, it is possible that during instruction scheduling,
-     * another use of lr could be moved across a vldrd/vldrs.  By setting REG_DEF_LR, we
-     * prevent that from happening.  Note that we set REG_DEF_LR on all vldrd/vldrs - even those
-     * not used in a pc-relative case.  It is really only needed on the pc-relative loads, but
-     * the case we're handling is rare enough that it seemed not worth the trouble to distinguish.
-     */
-    ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
-                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad),
-    ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
-                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad),
-    ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuls", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmuld,        0xee200b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuld", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vstrs,       0xed800a00,
-                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vstrd,       0xed800b00,
-                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsubs,        0xee300a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsubd,        0xee300b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vadds,        0xee300a00,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vaddd,        0xee300b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vdivs,        0xee800a00,
-                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivs", "!0s, !1s, !2s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vdivd,        0xee800b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivd", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VmlaF64,     0xee000b00,
-                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012,
-                 "vmla", "!0S, !1S, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtIF,       0xeeb80ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32.s32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtFI,       0xeebd0ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtDI,       0xeebd0bc0,
-                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtFd,       0xeeb70ac0,
-                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtDF,       0xeeb70bc0,
-                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtF64S32,   0xeeb80bc0,
-                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VcvtF64U32,   0xeeb80b40,
-                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsqrts,       0xeeb10ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vsqrtd,       0xeeb10bc0,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovImm16,       0xf2400000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1M", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrRRI12,       0xf8c00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrRRI12,       0xf8d00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrRRI8Predec,       0xf8400c00,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #-!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrRRI8Predec,       0xf8500c00,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
-                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ),
-    ENCODING_MAP(kThumb2Cbz,       0xb100, /* Note: does not affect flags */
-                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ),
-    ENCODING_MAP(kThumb2AddRRI12,       0xf2000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "add", "!0C,!1C,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovRR,       0xea4f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovs,       0xeeb00a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f32 ", " !0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovd,       0xeeb00b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f64 ", " !0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ldmia,         0xe8900000,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Stmia,         0xe8800000,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddRRR,  0xeb100000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRR,       0xebb00000, /* setflags enconding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SbcRRR,       0xeb700000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2CmpRR,       0xebb00f00,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRI12,       0xf2a00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "sub", "!0C,!1C,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MvnI8M,  0xf06f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mvn", "!0C, #!1n", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "sel", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ubfx,       0xf3c00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
-                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Sbfx,       0xf3400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
-                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrRRR,    0xf8500000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrhRRR,    0xf8300000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrshRRR,    0xf9300000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrbRRR,    0xf8100000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrsbRRR,    0xf9100000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrRRR,    0xf8400000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrhRRR,    0xf8200000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrbRRR,    0xf8000000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrhRRI12,       0xf8b00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrshRRI12,       0xf9b00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrbRRI12,       0xf8900000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrsbRRI12,       0xf9900000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrhRRI12,       0xf8a00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strh", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrbRRI12,       0xf8800000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strb", "!0C, [!1C, #!2d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Pop,           0xe8bd0000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop),
-    ENCODING_MAP(kThumb2Push,          0xe92d0000,
-                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
-    ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
-                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00,
-                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmn", "!0C, #!1m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AndRRR,  0xea000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "and", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BicRRR,  0xea200000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "bic", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2CmnRR,  0xeb000000,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "cmn", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2EorRRR,  0xea800000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "eor", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MulRRR,  0xfb00f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SdivRRR,  0xfb90f0f0,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "sdiv", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2UdivRRR,  0xfbb0f0f0,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "udiv", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RsubRRI8M,       0xf1d00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsbs", "!0C,!1C,#!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "neg", "!0C,!1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRR,  0xea400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "orr", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2TstRR,       0xea100f00,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LslRRR,  0xfa00f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsl", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LsrRRR,  0xfa20f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsr", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AsrRRR,  0xfa40f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "asr", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RorRRR,  0xfa60f000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "ror", "!0C, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LslRRI5,  0xea4f0000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsl", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LsrRRI5,  0xea4f0010,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsr", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AsrRRI5,  0xea4f0020,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "asr", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RorRRI5,  0xea4f0030,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "ror", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BicRRI8M,  0xf0200000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "bic", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AndRRI8M,  0xf0000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "and", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRI8M,  0xf0400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "orr", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2EorRRI8M,  0xf0800000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "eor", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddRRI8M,  0xf1100000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AdcRRI8M,  0xf1500000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRI8M,  0xf1b00000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SbcRRI8M,  0xf1700000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "sbcs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RevRR, 0xfa90f080,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
-                 "rev", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
-                 "revsh", "!0C, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2It,  0xbf00,
-                 kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
-                 "it:!1b", "!0c", 2, kFixupNone),
-    ENCODING_MAP(kThumb2Fmstat,  0xeef1fa10,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES,
-                 "fmstat", "", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vcmpd,        0xeeb40b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
-                 "vcmp.f64", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vcmps,        0xeeb40a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
-                 "vcmp.f32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
-    ENCODING_MAP(kThumb2BCond,        0xf0008000,
-                 kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP,
-                 "b!1c", "!0t", 4, kFixupCondBranch),
-    ENCODING_MAP(kThumb2Fmrs,       0xee100a10,
-                 kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmrs", "!0C, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Fmsr,       0xee000a10,
-                 kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmsr", "!0s, !1C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Fmrrd,       0xec500b10,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2,
-                 "fmrrd", "!0C, !1C, !2S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Fmdrr,       0xec400b10,
-                 kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fmdrr", "!0S, !1C, !2C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vabsd,       0xeeb00bc0,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f64", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vabss,       0xeeb00ac0,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vnegd,       0xeeb10b40,
-                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f64", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vnegs,       0xeeb10a40,
-                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f32", "!0s, !1s", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovs_IMM8,       0xeeb00a00,
-                 kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f32", "!0s, #0x!1h", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vmovd_IMM8,       0xeeb00b00,
-                 kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f64", "!0S, #0x!1h", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Mla,  0xfb000000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
-                 "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Umull,  0xfba00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 3, 0,
-                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ldrex,       0xe8500f00,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Ldrexd,      0xe8d0007f,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD,
-                 "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Strex,       0xe8400000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
-                 "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Strexd,      0xe8c00070,
-                 kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8,
-                 kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE,
-                 "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
-                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND,
-                 "clrex", "", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Bfi,         0xf3600000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1,
-                 kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Bfc,         0xf36f0000,
-                 kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
-                 "bfc", "!0C,#!1d,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Dmb,         0xf3bf8f50,
-                 kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | IS_BRANCH  | NEEDS_FIXUP,
+                 "cbz", "!0r, !1t", kFixupCBxZ),
+    ENCODING_MAP(WIDE(kA64Cmn3Rro), SF_VARIANTS(0x6b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmn", "!0R, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f),
+                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmn", "!0R, #!1d!2T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0R, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f),
+                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmp", "!0R, #!1d!2T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csel4rrrc), SF_VARIANTS(0x1a800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csel", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csinc4rrrc), SF_VARIANTS(0x1a800400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csinc", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csneg4rrrc), SF_VARIANTS(0x5a800400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csneg", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
+                 kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP,
-                 "dmb", "#!0B", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrPcReln12,       0xf85f0000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
+                 "dmb", "#!0B", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "eor", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Eor4rrro), SF_VARIANTS(0x4a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "eor", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Extr4rrrd), SF_N_VARIANTS(0x13800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE12,
+                 "extr", "!0r, !1r, !2r, #!3d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP| REG_DEF0_USE1,
+                 "fabs", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fadd", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008),
+                 kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | SETS_CCODES,
+                 "fcmp", "!0f, #0", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000),
+                 kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "fcmp", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000),
+                 kFmtRegW, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvtzs", "!0w, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000),
+                 kFmtRegX, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvtzs", "!0x, !1f", kFixupNone),
+    ENCODING_MAP(kA64Fcvt2Ss, NO_VARIANTS(0x1e22C000),
+                 kFmtRegD, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvt", "!0S, !1s", kFixupNone),
+    ENCODING_MAP(kA64Fcvt2sS, NO_VARIANTS(0x1e624000),
+                 kFmtRegS, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvt", "!0s, !1S", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fdiv", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
+                 kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "fmov", "!0f, #!1I", kFixupNone),
+    ENCODING_MAP(kA64Fmov2sw, NO_VARIANTS(0x1e270000),
+                 kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0s, !1w", kFixupNone),
+    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000),
+                 kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0S, !1x", kFixupNone),
+    ENCODING_MAP(kA64Fmov2ws, NO_VARIANTS(0x1e260000),
+                 kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0w, !1s", kFixupNone),
+    ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e6e0000),
+                 kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0x, !1S", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmul", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fneg", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "frintz", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fsqrt", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fsub", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrb", "!0w, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrb", "!0w, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsb", "!0r, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrh", "!0w, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
+                 kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
-                 "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Stm,          0xe9000000,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldr", "!0f, !1p", kFixupLoad),
+    ENCODING_MAP(WIDE(kA64Ldr2rp), SIZE_VARIANTS(0x18000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stm", "!0C, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumbUndefined,       0xde00,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldr", "!0r, !1p", kFixupLoad),
+    ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0f, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0r, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0f, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldr4rXxG), SIZE_VARIANTS(0xb8606800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0r, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64LdrPost3rXd), SIZE_VARIANTS(0xb8400400),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD,
+                 "ldr", "!0r, [!1X], #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+                 "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+                 "ldp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldur", "!0f, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldur3rXd), SIZE_VARIANTS(0xb8400000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldur", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldxr", "!0r, [!1X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsl", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsr3rrd), CUSTOM_VARIANTS(0x53007c00, 0xd340fc00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "lsr", "!0r, !1r, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsr3rrr), SF_VARIANTS(0x1ac02400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsr", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0,
+                 "movk", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movn3rdM), SF_VARIANTS(0x12800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "movn", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movz3rdM), SF_VARIANTS(0x52800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "movz", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mvn", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mul3rrr), SF_VARIANTS(0x1b007c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mul", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "neg", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "orr", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Orr4rrro), SF_VARIANTS(0x2a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "orr", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(kA64Ret, NO_VARIANTS(0xd65f03c0),
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND,
-                 "undefined", "", 2, kFixupNone),
-    // NOTE: vpop, vpush hard-encoded for s16+ reg list
-    ENCODING_MAP(kThumb2VPopCS,       0xecbd8a00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0
-                 | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2VPushCS,      0xed2d8a00,
-                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0
-                 | IS_STORE, "vpush", "<!0P>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vldms,        0xec900a00,
-                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2
-                 | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Vstms,        0xec800a00,
-                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2
-                 | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BUncond,      0xf0009000,
-                 kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
-                 "b", "!0t", 4, kFixupT2Branch),
-    ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
-                 "movt", "!0C, #!1M", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddPCR,      0x4487,
-                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
-                 "add", "rPC, !0C", 2, kFixupLabel),
-    ENCODING_MAP(kThumb2Adr,         0xf20f0000,
-                 kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 /* Note: doesn't affect flags */
-                 IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "adr", "!0C,#!1d", 4, kFixupAdr),
-    ENCODING_MAP(kThumb2MovImm16LST,     0xf2400000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "mov", "!0C, #!1M", 4, kFixupMovImmLST),
-    ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
-                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
-                 "movt", "!0C, #!1M", 4, kFixupMovImmHST),
-    ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
-                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRRs,  0xea500000,
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Push1,    0xf84d0d04,
-                 kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0
-                 | IS_STORE, "push1", "!0C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Pop1,    0xf85d0b04,
-                 kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
-                 | IS_LOAD, "pop1", "!0C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RsubRRR,  0xebd00000, /* setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone),
-    ENCODING_MAP(kThumb2Smull,  0xfb800000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 3, 0,
-                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
-    ENCODING_MAP(kThumb2LdrdPcRel8,  0xe9df0000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad),
-    ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 7, 0,
-                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
-                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
-    ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
-                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 7, 0,
-                 IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
-                 "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+                 "ret", "", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
+                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0),
+                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev16", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "ror", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sbc3rrr), SF_VARIANTS(0x5a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sbc", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sbfm4rrdd), SF_N_VARIANTS(0x13000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sbfm", "!0r, !1r, #!2d, #!3d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000),
+                 kFmtRegF, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "scvtf", "!0f, !1w", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000),
+                 kFmtRegF, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "scvtf", "!0f, !1x", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sdiv3rrr), SF_VARIANTS(0x1ac00c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sdiv", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000),
+                 kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
+                 kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
+                 "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StpPre4rrXD), CUSTOM_VARIANTS(0x29800000, 0xa9800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0f, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0f, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0r, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0r, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strb", "!0w, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "strb", "!0w, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strh", "!0w, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "strh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StrPost3rXd), SIZE_VARIANTS(0xb8000400),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | REG_DEF1 | IS_STORE,
+                 "str", "!0r, [!1X], #!2d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "stur", "!0f, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stur3rXd), SIZE_VARIANTS(0xb8000000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "stur", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
+                 kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 "stxr", "!0w, !1r, [!2X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sub", "!0R, !1R, #!2d!3T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4rrro), SF_VARIANTS(0x4b000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "sub", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
+                 kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0r, !1R, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000),
+                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES,
+                 "tst", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+                 "ubfm", "!0r, !1r, !2d, !3d", kFixupNone),
 };
 
 // new_lir replaces orig_lir in the pcrel_fixup list.
@@ -1059,153 +566,159 @@
   }
 }
 
-/*
- * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
- * not ready. Since r5FP is not updated often, it is less likely to
- * generate unnecessary stall cycles.
- * TUNING: No longer true - find new NOP pattern.
- */
-#define PADDING_MOV_R5_R5               0x1C2D
+/* Nop, used for aligning code. Nop is an alias for hint #0. */
+#define PADDING_NOP (UINT32_C(0xd503201f))
 
 uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
-  for (; lir != NULL; lir = NEXT_LIR(lir)) {
-    if (!lir->flags.is_nop) {
-      int opcode = lir->opcode;
-      if (IsPseudoLirOp(opcode)) {
-        if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
-          // Note: size for this opcode will be either 0 or 2 depending on final alignment.
-          if (lir->offset & 0x2) {
-            write_pos[0] = (PADDING_MOV_R5_R5 & 0xff);
-            write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
-            write_pos += 2;
-          }
-        }
-      } else if (LIKELY(!lir->flags.is_nop)) {
-        const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
-        uint32_t bits = encoder->skeleton;
-        for (int i = 0; i < 4; i++) {
-          uint32_t operand;
-          uint32_t value;
-          operand = lir->operands[i];
-          ArmEncodingKind kind = encoder->field_loc[i].kind;
-          if (LIKELY(kind == kFmtBitBlt)) {
-            value = (operand << encoder->field_loc[i].start) &
-                ((1 << (encoder->field_loc[i].end + 1)) - 1);
-            bits |= value;
-          } else {
-            switch (encoder->field_loc[i].kind) {
-              case kFmtSkip:
-                break;  // Nothing to do, but continue to next.
-              case kFmtUnused:
-                i = 4;  // Done, break out of the enclosing loop.
-                break;
-              case kFmtFPImm:
-                value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
-                value |= (operand & 0x0F) << encoder->field_loc[i].start;
-                bits |= value;
-                break;
-              case kFmtBrOffset:
-                value = ((operand  & 0x80000) >> 19) << 26;
-                value |= ((operand & 0x40000) >> 18) << 11;
-                value |= ((operand & 0x20000) >> 17) << 13;
-                value |= ((operand & 0x1f800) >> 11) << 16;
-                value |= (operand  & 0x007ff);
-                bits |= value;
-                break;
-              case kFmtShift5:
-                value = ((operand & 0x1c) >> 2) << 12;
-                value |= (operand & 0x03) << 6;
-                bits |= value;
-                break;
-              case kFmtShift:
-                value = ((operand & 0x70) >> 4) << 12;
-                value |= (operand & 0x0f) << 4;
-                bits |= value;
-                break;
-              case kFmtBWidth:
-                value = operand - 1;
-                bits |= value;
-                break;
-              case kFmtLsb:
-                value = ((operand & 0x1c) >> 2) << 12;
-                value |= (operand & 0x03) << 6;
-                bits |= value;
-                break;
-              case kFmtImm6:
-                value = ((operand & 0x20) >> 5) << 9;
-                value |= (operand & 0x1f) << 3;
-                bits |= value;
-                break;
-              case kFmtDfp: {
-                DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand;
-                uint32_t reg_num = RegStorage::RegNum(operand);
-                /* Snag the 1-bit slice and position it */
-                value = ((reg_num & 0x10) >> 4) << encoder->field_loc[i].end;
-                /* Extract and position the 4-bit slice */
-                value |= (reg_num & 0x0f) << encoder->field_loc[i].start;
-                bits |= value;
-                break;
+  for (; lir != nullptr; lir = NEXT_LIR(lir)) {
+    bool opcode_is_wide = IS_WIDE(lir->opcode);
+    ArmOpcode opcode = UNWIDE(lir->opcode);
+
+    if (UNLIKELY(IsPseudoLirOp(opcode))) {
+      continue;
+    }
+
+    if (LIKELY(!lir->flags.is_nop)) {
+      const ArmEncodingMap *encoder = &EncodingMap[opcode];
+
+      // Select the right variant of the skeleton.
+      uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
+      DCHECK(!opcode_is_wide || IS_WIDE(encoder->opcode));
+
+      for (int i = 0; i < 4; i++) {
+        ArmEncodingKind kind = encoder->field_loc[i].kind;
+        uint32_t operand = lir->operands[i];
+        uint32_t value;
+
+        if (LIKELY(static_cast<unsigned>(kind) <= kFmtBitBlt)) {
+          // Note: this will handle kFmtReg* and kFmtBitBlt.
+
+          if (static_cast<unsigned>(kind) < kFmtBitBlt) {
+            bool is_zero = A64_REG_IS_ZR(operand);
+
+            if (kIsDebugBuild) {
+              // Register usage checks: First establish register usage requirements based on the
+              // format in `kind'.
+              bool want_float = false;
+              bool want_64_bit = false;
+              bool want_size_match = false;
+              bool want_zero = false;
+              switch (kind) {
+                case kFmtRegX:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegW:
+                  want_size_match = true;
+                  // Intentional fall-through.
+                case kFmtRegR:
+                  want_zero = true;
+                  break;
+                case kFmtRegXOrSp:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegWOrSp:
+                  want_size_match = true;
+                  break;
+                case kFmtRegROrSp:
+                  break;
+                case kFmtRegD:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegS:
+                  want_size_match = true;
+                  // Intentional fall-through.
+                case kFmtRegF:
+                  want_float = true;
+                  break;
+                default:
+                  LOG(FATAL) << "Bad fmt for arg n. " << i << " of " << encoder->name
+                             << " (" << kind << ")";
+                  break;
               }
-              case kFmtSfp: {
-                DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand;
-                uint32_t reg_num = RegStorage::RegNum(operand);
-                /* Snag the 1-bit slice and position it */
-                value = (reg_num & 0x1) << encoder->field_loc[i].end;
-                /* Extract and position the 4-bit slice */
-                value |= ((reg_num & 0x1e) >> 1) << encoder->field_loc[i].start;
-                bits |= value;
-                break;
-              }
-              case kFmtImm12:
-              case kFmtModImm:
-                value = ((operand & 0x800) >> 11) << 26;
-                value |= ((operand & 0x700) >> 8) << 12;
-                value |= operand & 0x0ff;
-                bits |= value;
-                break;
-              case kFmtImm16:
-                value = ((operand & 0x0800) >> 11) << 26;
-                value |= ((operand & 0xf000) >> 12) << 16;
-                value |= ((operand & 0x0700) >> 8) << 12;
-                value |= operand & 0x0ff;
-                bits |= value;
-                break;
-              case kFmtOff24: {
-                uint32_t signbit = (operand >> 31) & 0x1;
-                uint32_t i1 = (operand >> 22) & 0x1;
-                uint32_t i2 = (operand >> 21) & 0x1;
-                uint32_t imm10 = (operand >> 11) & 0x03ff;
-                uint32_t imm11 = operand & 0x07ff;
-                uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
-                uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
-                value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
-                    imm11;
-                bits |= value;
+
+              // Now check that the requirements are satisfied.
+              RegStorage reg(operand);
+              const char *expected = nullptr;
+              if (want_float) {
+                if (!reg.IsFloat()) {
+                  expected = "float register";
+                } else if (want_size_match && (reg.IsDouble() != want_64_bit)) {
+                  expected = (want_64_bit) ? "double register" : "single register";
                 }
-                break;
-              default:
-                LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+              } else {
+                if (reg.IsFloat()) {
+                  expected = "core register";
+                } else if (want_size_match && (reg.Is64Bit() != want_64_bit)) {
+                  expected = (want_64_bit) ? "x-register" : "w-register";
+                } else if (reg.GetRegNum() == 31 && is_zero == want_zero) {
+                  expected = (want_zero) ? "zero-register" : "sp-register";
+                }
+              }
+
+              // TODO(Arm64): if !want_size_match, then we still should compare the size of the
+              //   register with the size required by the instruction width (kA64Wide).
+
+              // Fail, if `expected' contains an unsatisfied requirement.
+              if (expected != nullptr) {
+                // TODO(Arm64): make this FATAL.
+                LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name
+                             << ". Expected " << expected << ", got 0x" << std::hex << operand;
+              }
+            }
+
+            // TODO(Arm64): this may or may not be necessary, depending on how wzr, xzr are
+            //   defined.
+            if (is_zero) {
+              operand = 31;
             }
           }
-        }
-        if (encoder->size == 4) {
-          write_pos[0] = ((bits >> 16) & 0xff);
-          write_pos[1] = ((bits >> 24) & 0xff);
-          write_pos[2] = (bits & 0xff);
-          write_pos[3] = ((bits >> 8) & 0xff);
-          write_pos += 4;
+
+          value = (operand << encoder->field_loc[i].start) &
+              ((1 << (encoder->field_loc[i].end + 1)) - 1);
+          bits |= value;
         } else {
-          DCHECK_EQ(encoder->size, 2);
-          write_pos[0] = (bits & 0xff);
-          write_pos[1] = ((bits >> 8) & 0xff);
-          write_pos += 2;
+          switch (kind) {
+            case kFmtSkip:
+              break;  // Nothing to do, but continue to next.
+            case kFmtUnused:
+              i = 4;  // Done, break out of the enclosing loop.
+              break;
+            case kFmtShift:
+              // Intentional fallthrough.
+            case kFmtExtend:
+              DCHECK_EQ((operand & (1 << 6)) == 0, kind == kFmtShift);
+              value = (operand & 0x3f) << 10;
+              value |= ((operand & 0x1c0) >> 6) << 21;
+              bits |= value;
+              break;
+            case kFmtImm21:
+              value = (operand & 0x3) << 29;
+              value |= ((operand & 0x1ffffc) >> 2) << 5;
+              bits |= value;
+              break;
+            default:
+              LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
+                         << " (" << kind << ")";
+          }
         }
       }
+
+      DCHECK_EQ(encoder->size, 4);
+      write_pos[0] = (bits & 0xff);
+      write_pos[1] = ((bits >> 8) & 0xff);
+      write_pos[2] = ((bits >> 16) & 0xff);
+      write_pos[3] = ((bits >> 24) & 0xff);
+      write_pos += 4;
     }
   }
+
   return write_pos;
 }
 
+// Align data offset on 8 byte boundary: it will only contain double-word items, as word immediates
+// are better set directly from the code (they will require no more than 2 instructions).
+#define ALIGNED_DATA_OFFSET(offset) (((offset) + 0x7) & ~0x7)
+
 // Assemble the LIR into binary instruction format.
 void Arm64Mir2Lir::AssembleLIR() {
   LIR* lir;
@@ -1213,20 +726,25 @@
   cu_->NewTimingSplit("Assemble");
   int assembler_retries = 0;
   CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
-  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
   int32_t offset_adjustment;
   AssignDataOffsets();
 
   /*
-   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for
-   * non-visited nodes).  Start at zero here, and bit will be flipped to 1 on entry to the loop.
+   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0
+   * for non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop.
    */
   int generation = 0;
   while (true) {
+    // TODO(Arm64): check whether passes and offset adjustments are really necessary.
+    //   Currently they aren't, as - in the fixups below - LIR are never inserted.
+    //   Things can be different if jump ranges above 1 MB need to be supported.
+    //   If they are not, then we can get rid of the assembler retry logic.
+
     offset_adjustment = 0;
     AssemblerStatus res = kSuccess;  // Assume success
     generation ^= 1;
-    // Note: nodes requring possible fixup linked in ascending order.
+    // Note: nodes requiring possible fixup linked in ascending order.
     lir = first_fixup_;
     prev_lir = NULL;
     while (lir != NULL) {
@@ -1243,341 +761,54 @@
       switch (static_cast<FixupKind>(lir->flags.fixup)) {
         case kFixupLabel:
         case kFixupNone:
-          break;
         case kFixupVLoad:
-          if (lir->operands[1] != rs_r15pc.GetReg()) {
-            break;
-          }
-          // NOTE: intentional fallthrough.
-        case kFixupLoad: {
-          /*
-           * PC-relative loads are mostly used to load immediates
-           * that are too large to materialize directly in one shot.
-           * However, if the load displacement exceeds the limit,
-           * we revert to a multiple-instruction materialization sequence.
-           */
-          LIR *lir_target = lir->target;
-          CodeOffset pc = (lir->offset + 4) & ~3;
-          CodeOffset target = lir_target->offset +
-              ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          if (res != kSuccess) {
-            /*
-             * In this case, we're just estimating and will do it again for real.  Ensure offset
-             * is legal.
-             */
-            delta &= ~0x3;
-          }
-          DCHECK_EQ((delta & 0x3), 0);
-          // First, a sanity check for cases we shouldn't see now
-          if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
-              ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) {
-            // Shouldn't happen in current codegen.
-            LOG(FATAL) << "Unexpected pc-rel offset " << delta;
-          }
-          // Now, check for the difficult cases
-          if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
-              ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
-              ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
-              ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
-            /*
-             * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we
-             * sometimes have to use it to fix up out-of-range accesses.  This is where that
-             * happens.
-             */
-            int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) ||
-                            (lir->opcode == kThumb2LdrPcRel12)) ?  lir->operands[0] :
-                            rs_rARM_LR.GetReg();
-
-            // Add new Adr to generate the address.
-            LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
-                       base_reg, 0, 0, 0, 0, lir->target);
-            new_adr->offset = lir->offset;
-            new_adr->flags.fixup = kFixupAdr;
-            new_adr->flags.size = EncodingMap[kThumb2Adr].size;
-            InsertLIRBefore(lir, new_adr);
-            lir->offset += new_adr->flags.size;
-            offset_adjustment += new_adr->flags.size;
-
-            // lir no longer pcrel, unlink and link in new_adr.
-            ReplaceFixup(prev_lir, lir, new_adr);
-
-            // Convert to normal load.
-            offset_adjustment -= lir->flags.size;
-            if (lir->opcode == kThumb2LdrPcRel12) {
-              lir->opcode = kThumb2LdrRRI12;
-            } else if (lir->opcode == kThumb2LdrdPcRel8) {
-              lir->opcode = kThumb2LdrdI8;
-            }
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            offset_adjustment += lir->flags.size;
-            // Change the load to be relative to the new Adr base.
-            if (lir->opcode == kThumb2LdrdI8) {
-              lir->operands[3] = 0;
-              lir->operands[2] = base_reg;
-            } else {
-              lir->operands[2] = 0;
-              lir->operands[1] = base_reg;
-            }
-            prev_lir = new_adr;  // Continue scan with new_adr;
-            lir = new_adr->u.a.pcrel_next;
-            res = kRetryAll;
-            continue;
-          } else {
-            if ((lir->opcode == kThumb2Vldrs) ||
-                (lir->opcode == kThumb2Vldrd) ||
-                (lir->opcode == kThumb2LdrdPcRel8)) {
-              lir->operands[2] = delta >> 2;
-            } else {
-              lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
-                  delta >> 2;
-            }
-          }
           break;
-        }
-        case kFixupCBxZ: {
-          LIR *target_lir = lir->target;
-          CodeOffset pc = lir->offset + 4;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          if (delta > 126 || delta < 0) {
-            /*
-             * Convert to cmp rx,#0 / b[eq/ne] tgt pair
-             * Make new branch instruction and insert after
-             */
-            LIR* new_inst =
-              RawLIR(lir->dalvik_offset, kThumbBCond, 0,
-                     (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
-                     0, 0, 0, lir->target);
-            InsertLIRAfter(lir, new_inst);
-
-            /* Convert the cb[n]z to a cmp rx, #0 ] */
-            // Subtract the old size.
-            offset_adjustment -= lir->flags.size;
-            lir->opcode = kThumbCmpRI8;
-            /* operand[0] is src1 in both cb[n]z & CmpRI8 */
-            lir->operands[1] = 0;
-            lir->target = 0;
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            // Add back the new size.
-            offset_adjustment += lir->flags.size;
-            // Set up the new following inst.
-            new_inst->offset = lir->offset + lir->flags.size;
-            new_inst->flags.fixup = kFixupCondBranch;
-            new_inst->flags.size = EncodingMap[new_inst->opcode].size;
-            offset_adjustment += new_inst->flags.size;
-
-            // lir no longer pcrel, unlink and link in new_inst.
-            ReplaceFixup(prev_lir, lir, new_inst);
-            prev_lir = new_inst;  // Continue with the new instruction.
-            lir = new_inst->u.a.pcrel_next;
-            res = kRetryAll;
-            continue;
-          } else {
-            lir->operands[1] = delta >> 1;
-          }
-          break;
-        }
-        case kFixupPushPop: {
-          if (__builtin_popcount(lir->operands[0]) == 1) {
-            /*
-             * The standard push/pop multiple instruction
-             * requires at least two registers in the list.
-             * If we've got just one, switch to the single-reg
-             * encoding.
-             */
-            lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
-                kThumb2Pop1;
-            int reg = 0;
-            while (lir->operands[0]) {
-              if (lir->operands[0] & 0x1) {
-                break;
-              } else {
-                reg++;
-                lir->operands[0] >>= 1;
-              }
-            }
-            lir->operands[0] = reg;
-            // This won't change again, don't bother unlinking, just reset fixup kind
-            lir->flags.fixup = kFixupNone;
-          }
-          break;
-        }
-        case kFixupCondBranch: {
-          LIR *target_lir = lir->target;
-          int32_t delta = 0;
-          DCHECK(target_lir);
-          CodeOffset pc = lir->offset + 4;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          delta = target - pc;
-          if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
-            offset_adjustment -= lir->flags.size;
-            lir->opcode = kThumb2BCond;
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            // Fixup kind remains the same.
-            offset_adjustment += lir->flags.size;
-            res = kRetryAll;
-          }
-          lir->operands[0] = delta >> 1;
-          break;
-        }
-        case kFixupT2Branch: {
-          LIR *target_lir = lir->target;
-          CodeOffset pc = lir->offset + 4;
-          CodeOffset target = target_lir->offset +
-              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
-          int32_t delta = target - pc;
-          lir->operands[0] = delta >> 1;
-          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
-            // Useless branch
-            offset_adjustment -= lir->flags.size;
-            lir->flags.is_nop = true;
-            // Don't unlink - just set to do-nothing.
-            lir->flags.fixup = kFixupNone;
-            res = kRetryAll;
-          }
-          break;
-        }
         case kFixupT1Branch: {
           LIR *target_lir = lir->target;
-          CodeOffset pc = lir->offset + 4;
+          DCHECK(target_lir);
+          CodeOffset pc = lir->offset;
           CodeOffset target = target_lir->offset +
               ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
           int32_t delta = target - pc;
-          if (delta > 2046 || delta < -2048) {
-            // Convert to Thumb2BCond w/ kArmCondAl
-            offset_adjustment -= lir->flags.size;
-            lir->opcode = kThumb2BUncond;
-            lir->operands[0] = 0;
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            lir->flags.fixup = kFixupT2Branch;
-            offset_adjustment += lir->flags.size;
-            res = kRetryAll;
-          } else {
-            lir->operands[0] = delta >> 1;
-            if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) {
-              // Useless branch
-              offset_adjustment -= lir->flags.size;
-              lir->flags.is_nop = true;
-              // Don't unlink - just set to do-nothing.
-              lir->flags.fixup = kFixupNone;
-              res = kRetryAll;
-            }
+          if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+            LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
           }
+          lir->operands[0] = delta >> 2;
           break;
         }
-        case kFixupBlx1: {
-          DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
-          /* cur_pc is Thumb */
-          CodeOffset cur_pc = (lir->offset + 4) & ~3;
-          CodeOffset target = lir->operands[1];
-
-          /* Match bit[1] in target with base */
-          if (cur_pc & 0x2) {
-            target |= 0x2;
+        case kFixupLoad:
+        case kFixupCBxZ:
+        case kFixupCondBranch: {
+          LIR *target_lir = lir->target;
+          DCHECK(target_lir);
+          CodeOffset pc = lir->offset;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+            LOG(FATAL) << "Invalid jump range in kFixupLoad";
           }
-          int32_t delta = target - cur_pc;
-          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-          lir->operands[0] = (delta >> 12) & 0x7ff;
-          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-          break;
-        }
-        case kFixupBl1: {
-          DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
-          /* Both cur_pc and target are Thumb */
-          CodeOffset cur_pc = lir->offset + 4;
-          CodeOffset target = lir->operands[1];
-
-          int32_t delta = target - cur_pc;
-          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-          lir->operands[0] = (delta >> 12) & 0x7ff;
-          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+          lir->operands[1] = delta >> 2;
           break;
         }
         case kFixupAdr: {
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
-          LIR* target = lir->target;
-          int32_t target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
-              : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
-              offset_adjustment);
-          int32_t disp = target_disp - ((lir->offset + 4) & ~3);
-          if (disp < 4096) {
-            lir->operands[1] = disp;
+          LIR* target_lir = lir->target;
+          int32_t delta;
+          if (target_lir) {
+            CodeOffset target_offs = ((target_lir->flags.generation == lir->flags.generation) ?
+                                      0 : offset_adjustment) + target_lir->offset;
+            delta = target_offs - lir->offset;
+          } else if (lir->operands[2] >= 0) {
+            EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+            delta = tab->offset + offset_adjustment - lir->offset;
           } else {
-            // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
-            // TUNING: if this case fires often, it can be improved.  Not expected to be common.
-            LIR *new_mov16L =
-                RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0,
-                       WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
-            new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size;
-            new_mov16L->flags.fixup = kFixupMovImmLST;
-            new_mov16L->offset = lir->offset;
-            // Link the new instruction, retaining lir.
-            InsertLIRBefore(lir, new_mov16L);
-            lir->offset += new_mov16L->flags.size;
-            offset_adjustment += new_mov16L->flags.size;
-            InsertFixupBefore(prev_lir, lir, new_mov16L);
-            prev_lir = new_mov16L;   // Now we've got a new prev.
-            LIR *new_mov16H =
-                RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0,
-                       WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
-            new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size;
-            new_mov16H->flags.fixup = kFixupMovImmHST;
-            new_mov16H->offset = lir->offset;
-            // Link the new instruction, retaining lir.
-            InsertLIRBefore(lir, new_mov16H);
-            lir->offset += new_mov16H->flags.size;
-            offset_adjustment += new_mov16H->flags.size;
-            InsertFixupBefore(prev_lir, lir, new_mov16H);
-            prev_lir = new_mov16H;  // Now we've got a new prev.
-
-            offset_adjustment -= lir->flags.size;
-            if (RegStorage::RegNum(lir->operands[0]) < 8) {
-              lir->opcode = kThumbAddRRLH;
-            } else {
-              lir->opcode = kThumbAddRRHH;
-            }
-            lir->operands[1] = rs_rARM_PC.GetReg();
-            lir->flags.size = EncodingMap[lir->opcode].size;
-            offset_adjustment += lir->flags.size;
-            // Must stay in fixup list and have offset updated; will be used by LST/HSP pair.
-            lir->flags.fixup = kFixupNone;
-            res = kRetryAll;
+            // No fixup: this usage allows to retrieve the current PC.
+            delta = lir->operands[1];
           }
-          break;
-        }
-        case kFixupMovImmLST: {
-          // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
-          // If tab_rec is null, this is a literal load. Use target
-          LIR* target = lir->target;
-          int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
-          lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
-          break;
-        }
-        case kFixupMovImmHST: {
-          // operands[1] should hold disp, [2] has add, [3] has tab_rec
-          LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
-          EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
-          // If tab_rec is null, this is a literal load. Use target
-          LIR* target = lir->target;
-          int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
-          lir->operands[1] =
-              ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
-          break;
-        }
-        case kFixupAlign4: {
-          int32_t required_size = lir->offset & 0x2;
-          if (lir->flags.size != required_size) {
-            offset_adjustment += required_size - lir->flags.size;
-            lir->flags.size = required_size;
-            res = kRetryAll;
+          if (!IS_SIGNED_IMM21(delta)) {
+            LOG(FATAL) << "Jump range above 1MB in kFixupAdr";
           }
+          lir->operands[1] = delta;
           break;
         }
         default:
@@ -1596,7 +827,7 @@
         LOG(FATAL) << "Assembler error - too many retries";
       }
       starting_offset += offset_adjustment;
-      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
       AssignDataOffsets();
     }
   }
@@ -1609,7 +840,7 @@
   write_pos = EncodeLIRs(write_pos, first_lir_insn_);
   DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
 
-  DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
+  DCHECK_EQ(data_offset_, ALIGNED_DATA_OFFSET(code_buffer_.size()));
 
   // Install literals
   InstallLiteralPools();
@@ -1629,8 +860,9 @@
 }
 
 int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
-  DCHECK(!IsPseudoLirOp(lir->opcode));
-  return EncodingMap[lir->opcode].size;
+  ArmOpcode opcode = UNWIDE(lir->opcode);
+  DCHECK(!IsPseudoLirOp(opcode));
+  return EncodingMap[opcode].size;
 }
 
 // Encode instruction bit pattern and assign offsets.
@@ -1639,15 +871,14 @@
 
   LIR* last_fixup = NULL;
   for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
+    ArmOpcode opcode = UNWIDE(lir->opcode);
     if (!lir->flags.is_nop) {
       if (lir->flags.fixup != kFixupNone) {
-        if (!IsPseudoLirOp(lir->opcode)) {
-          lir->flags.size = EncodingMap[lir->opcode].size;
-          lir->flags.fixup = EncodingMap[lir->opcode].fixup;
-        } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
-          lir->flags.size = (offset & 0x2);
-          lir->flags.fixup = kFixupAlign4;
+        if (!IsPseudoLirOp(opcode)) {
+          lir->flags.size = EncodingMap[opcode].size;
+          lir->flags.fixup = EncodingMap[opcode].fixup;
         } else {
+          DCHECK_NE(static_cast<int>(opcode), kPseudoPseudoAlign4);
           lir->flags.size = 0;
           lir->flags.fixup = kFixupLabel;
         }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 9dfee6e..c210816 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -23,27 +23,32 @@
 
 namespace art {
 
+bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
+                                  const InlineMethod& special) {
+  return Mir2Lir::GenSpecialCase(bb, mir, special);
+}
+
 /*
  * The sparse table in the literal pool is an array of <key,displacement>
- * pairs.  For each set, we'll load them as a pair using ldmia.
- * This means that the register number of the temp we use for the key
- * must be lower than the reg for the displacement.
- *
+ * pairs.  For each set, we'll load them as a pair using ldp.
  * The test loop will look something like:
  *
  *   adr   r_base, <table>
- *   ldr   r_val, [rARM_SP, v_reg_off]
+ *   ldr   r_val, [rA64_SP, v_reg_off]
  *   mov   r_idx, #table_size
- * lp:
- *   ldmia r_base!, {r_key, r_disp}
+ * loop:
+ *   cbz   r_idx, quit
+ *   ldp   r_key, r_disp, [r_base], #8
  *   sub   r_idx, #1
  *   cmp   r_val, r_key
- *   ifeq
- *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
- *   cbnz  r_idx, lp
+ *   b.ne  loop
+ *   adr   r_base, #0        ; This is the instruction from which we compute displacements
+ *   add   r_base, r_disp
+ *   br    r_base
+ * quit:
  */
 void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
-                                 RegLocation rl_src) {
+                                   RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpSparseSwitchTable(table);
@@ -60,34 +65,39 @@
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
   RegStorage r_base = AllocTemp();
-  /* Allocate key and disp temps */
+  // Allocate key and disp temps.
   RegStorage r_key = AllocTemp();
   RegStorage r_disp = AllocTemp();
-  // Make sure r_key's register number is less than r_disp's number for ldmia
-  if (r_key.GetReg() > r_disp.GetReg()) {
-    RegStorage tmp = r_disp;
-    r_disp = r_key;
-    r_key = tmp;
-  }
   // Materialize a pointer to the switch table
-  NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec));
+  NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
   // Set up r_idx
   RegStorage r_idx = AllocTemp();
   LoadConstant(r_idx, size);
-  // Establish loop branch target
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  // Load next key/disp
-  NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetRegNum()) | (1 << r_disp.GetRegNum()));
+
+  // Entry of loop.
+  LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
+  LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
+
+  // Load next key/disp.
+  NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
+  OpRegRegImm(kOpSub, r_idx, r_idx, 1);
+
+  // Go to next case, if key does not match.
   OpRegReg(kOpCmp, r_key, rl_src.reg);
-  // Go if match. NOTE: No instruction set switch here - must stay Thumb2
-  LIR* it = OpIT(kCondEq, "");
-  LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg());
-  OpEndIT(it);
-  tab_rec->anchor = switch_branch;
-  // Needs to use setflags encoding here
-  OpRegRegImm(kOpSub, r_idx, r_idx, 1);  // For value == 1, this should set flags.
-  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-  OpCondBranch(kCondNe, target);
+  OpCondBranch(kCondNe, loop_entry);
+
+  // Key does match: branch to case label.
+  LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
+  tab_rec->anchor = switch_label;
+
+  // Add displacement to base branch address and go!
+  OpRegRegRegShift(kOpAdd, r_base.GetReg(), r_base.GetReg(), r_disp.GetReg(),
+                   ENCODE_NO_SHIFT, true);
+  NewLIR1(kA64Br1x, r_base.GetReg());
+
+  // Loop exit label.
+  LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
+  branch_out->target = loop_exit;
 }
 
 
@@ -111,29 +121,35 @@
   rl_src = LoadValue(rl_src, kCoreReg);
   RegStorage table_base = AllocTemp();
   // Materialize a pointer to the switch table
-  NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec));
+  NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
   int low_key = s4FromSwitchData(&table[2]);
-  RegStorage keyReg;
+  RegStorage key_reg;
   // Remove the bias, if necessary
   if (low_key == 0) {
-    keyReg = rl_src.reg;
+    key_reg = rl_src.reg;
   } else {
-    keyReg = AllocTemp();
-    OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
+    key_reg = AllocTemp();
+    OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
   }
   // Bounds check - if < 0 or >= size continue following switch
-  OpRegImm(kOpCmp, keyReg, size-1);
+  OpRegImm(kOpCmp, key_reg, size - 1);
   LIR* branch_over = OpCondBranch(kCondHi, NULL);
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32);
+  LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32);
 
-  // ..and go! NOTE: No instruction set switch here - must stay Thumb2
-  LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
-  tab_rec->anchor = switch_branch;
+  // Get base branch address.
+  RegStorage branch_reg = AllocTemp();
+  LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
+  tab_rec->anchor = switch_label;
 
-  /* branch_over target here */
+  // Add displacement to base branch address and go!
+  OpRegRegRegShift(kOpAdd, branch_reg.GetReg(), branch_reg.GetReg(), disp_reg.GetReg(),
+                   ENCODE_NO_SHIFT, true);
+  NewLIR1(kA64Br1x, branch_reg.GetReg());
+
+  // branch_over target here
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch_over->target = target;
 }
@@ -163,13 +179,13 @@
 
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
-  LoadValueDirectFixed(rl_src, rs_r0);
-  LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(),
-               rs_rARM_LR);
+  LoadValueDirectFixed(rl_src, rs_x0);
+  LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pHandleFillArrayData),
+               rs_rA64_LR);
   // Materialize a pointer to the fill data image
-  NewLIR3(kThumb2Adr, rs_r1.GetReg(), 0, WrapPointer(tab_rec));
+  NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
   ClobberCallerSave();
-  LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
   MarkSafepointPC(call_inst);
 }
 
@@ -180,7 +196,7 @@
 void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
   // FIXME: need separate LoadValues for object references.
-  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
+  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
@@ -190,17 +206,15 @@
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
       if (Runtime::Current()->ExplicitNullChecks()) {
-        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
       }
     }
-    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
+    Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
+    NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
-    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
-    NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
-    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
+    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
+    NewLIR4(kA64Stxr3wrX, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
 
 
     LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
@@ -210,9 +224,9 @@
     }
     // TODO: move to a slow path.
     // Go expensive route - artLockObjectFromCode(obj);
-    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
+    LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR);
     ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+    LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
     MarkSafepointPC(call_inst);
 
     LIR* success_target = NewLIR0(kPseudoTargetLabel);
@@ -220,24 +234,19 @@
     GenMemBarrier(kLoadLoad);
   } else {
     // Explicit null-check as slow-path is entered using an IT.
-    GenNullCheck(rs_r0, opt_flags);
-    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
+    GenNullCheck(rs_x0, opt_flags);
+    Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
     MarkPossibleNullPointerException(opt_flags);
-    OpRegImm(kOpCmp, rs_r1, 0);
-    LIR* it = OpIT(kCondEq, "");
-    NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
-        mirror::Object::MonitorOffset().Int32Value() >> 2);
-    OpEndIT(it);
-    OpRegImm(kOpCmp, rs_r1, 0);
-    it = OpIT(kCondNe, "T");
+    NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, rs_x1, 0);
+    OpIT(kCondEq, "");
+    NewLIR4(kA64Stxr3wrX/*eq*/, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, rs_x1, 0);
+    OpIT(kCondNe, "T");
     // Go expensive route - artLockObjectFromCode(self, obj);
-    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(),
-                       rs_rARM_LR);
+    LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR);
     ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
-    OpEndIT(it);
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
     MarkSafepointPC(call_inst);
     GenMemBarrier(kLoadLoad);
   }
@@ -250,10 +259,10 @@
  */
 void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
+  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
-  Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+  Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
     if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -261,14 +270,14 @@
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
       if (Runtime::Current()->ExplicitNullChecks()) {
-        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
       }
     }
-    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);
     MarkPossibleNullPointerException(opt_flags);
-    LoadConstantNoClobber(rs_r3, 0);
-    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
-    Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    LoadConstantNoClobber(rs_x3, 0);
+    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_x1, rs_x2, NULL);
+    Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
     LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
 
     LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
@@ -278,9 +287,9 @@
     }
     // TODO: move to a slow path.
     // Go expensive route - artUnlockObjectFromCode(obj);
-    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR);
+    LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR);
     ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+    LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
     MarkSafepointPC(call_inst);
 
     LIR* success_target = NewLIR0(kPseudoTargetLabel);
@@ -288,33 +297,31 @@
     GenMemBarrier(kStoreLoad);
   } else {
     // Explicit null-check as slow-path is entered using an IT.
-    GenNullCheck(rs_r0, opt_flags);
-    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
+    GenNullCheck(rs_x0, opt_flags);
+    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);  // Get lock
     MarkPossibleNullPointerException(opt_flags);
-    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    LoadConstantNoClobber(rs_r3, 0);
+    Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_x2);
+    LoadConstantNoClobber(rs_x3, 0);
     // Is lock unheld on lock or held by us (==thread_id) on unlock?
-    OpRegReg(kOpCmp, rs_r1, rs_r2);
-    LIR* it = OpIT(kCondEq, "EE");
-    Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    OpRegReg(kOpCmp, rs_x1, rs_x2);
+    OpIT(kCondEq, "EE");
+    Store32Disp/*eq*/(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
     // Go expensive route - UnlockObjectFromCode(obj);
-    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
-                       rs_rARM_LR);
+    LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR);
     ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
-    OpEndIT(it);
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
     MarkSafepointPC(call_inst);
     GenMemBarrier(kStoreLoad);
   }
 }
 
 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
+  int ex_offset = A64_THREAD_EXCEPTION_INT_OFFS;
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
+  Store32Disp(rs_rA64_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
@@ -326,7 +333,7 @@
   RegStorage reg_card_base = AllocTemp();
   RegStorage reg_card_no = AllocTemp();
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
-  LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
+  LoadWordDisp(rs_rA64_SELF, A64_THREAD_CARD_TABLE_INT_OFFS, reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -336,17 +343,16 @@
 }
 
 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
-  int spill_count = num_core_spills_ + num_fp_spills_;
   /*
-   * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
+   * On entry, x0, x1, x2 & x3 are live.  Let the register allocation
    * mechanism know so it doesn't try to use any of them when
    * expanding the frame or flushing.  This leaves the utility
    * code with a single temp: r12.  This should be enough.
    */
-  LockTemp(rs_r0);
-  LockTemp(rs_r1);
-  LockTemp(rs_r2);
-  LockTemp(rs_r3);
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+  LockTemp(rs_x2);
+  LockTemp(rs_x3);
 
   /*
    * We can safely skip the stack overflow check if we're
@@ -356,14 +362,30 @@
                             (static_cast<size_t>(frame_size_) <
                             Thread::kStackOverflowReservedBytes));
   NewLIR0(kPseudoMethodEntry);
+
   if (!skip_overflow_check) {
+    LoadWordDisp(rs_rA64_SELF, A64_THREAD_STACK_END_INT_OFFS, rs_x12);
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       /* Load stack limit */
-      Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+      // TODO(Arm64): fix the line below:
+      // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-framesize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
+      Load32Disp(rs_rA64_SP, 0, rs_wzr);
+      MarkPossibleStackOverflowException();
     }
+  } else if (frame_size_ > 0) {
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
   }
+
   /* Spill core callee saves */
-  NewLIR1(kThumb2Push, core_spill_mask_);
+  if (core_spill_mask_) {
+    SpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
+  }
   /* Need to spill any FP regs? */
   if (num_fp_spills_) {
     /*
@@ -371,107 +393,40 @@
      * they are pushed as a contiguous block.  When promoting from
      * the fp set, we must allocate all singles from s16..highest-promoted
      */
-    NewLIR1(kThumb2VPushCS, num_fp_spills_);
-  }
-
-  const int spill_size = spill_count * 4;
-  const int frame_size_without_spills = frame_size_ - spill_size;
-  if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
-      class StackOverflowSlowPath : public LIRSlowPath {
-       public:
-        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
-            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
-              sp_displace_(sp_displace) {
-        }
-        void Compile() OVERRIDE {
-          m2l_->ResetRegPool();
-          m2l_->ResetDefTracking();
-          GenerateTargetLabel(kPseudoThrowTarget);
-          if (restore_lr_) {
-            m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
-          }
-          m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
-          m2l_->ClobberCallerSave();
-          ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
-          // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
-          // codegen and target are in thumb2 mode.
-          // NOTE: native pointer.
-          m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
-        }
-
-       private:
-        const bool restore_lr_;
-        const size_t sp_displace_;
-      };
-      if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
-        OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr);
-        // Need to restore LR since we used it as a temp.
-        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
-        OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
-      } else {
-        // If the frame is small enough we are guaranteed to have enough space that remains to
-        // handle signals on the user stack.
-        OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
-        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
-      }
-    } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-overflowsize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-      //
-      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
-      // we need to make sure that it's loadable in an immediate field of
-      // a sub instruction.  Otherwise we will get a temp allocation and the
-      // code size will increase.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
-      Load32Disp(rs_r12, 0, rs_r12);
-      MarkPossibleStackOverflowException();
-      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
-    }
-  } else {
-    OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+    // TODO(Arm64): SpillFPRegs(rA64_SP, frame_size_, core_spill_mask_);
   }
 
   FlushIns(ArgLocs, rl_method);
 
-  FreeTemp(rs_r0);
-  FreeTemp(rs_r1);
-  FreeTemp(rs_r2);
-  FreeTemp(rs_r3);
+  FreeTemp(rs_x0);
+  FreeTemp(rs_x1);
+  FreeTemp(rs_x2);
+  FreeTemp(rs_x3);
 }
 
 void Arm64Mir2Lir::GenExitSequence() {
-  int spill_count = num_core_spills_ + num_fp_spills_;
   /*
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
    */
-  LockTemp(rs_r0);
-  LockTemp(rs_r1);
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
 
   NewLIR0(kPseudoMethodExit);
-  OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
   /* Need to restore any FP callee saves? */
   if (num_fp_spills_) {
-    NewLIR1(kThumb2VPopCS, num_fp_spills_);
+    // TODO(Arm64): UnspillFPRegs(num_fp_spills_);
   }
-  if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) {
-    /* Unspill rARM_LR to rARM_PC */
-    core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
-    core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
+  if (core_spill_mask_) {
+    UnSpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
   }
-  NewLIR1(kThumb2Pop, core_spill_mask_);
-  if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) {
-    /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
-    NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
-  }
+
+  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true);
+  NewLIR0(kA64Ret);
 }
 
 void Arm64Mir2Lir::GenSpecialExitSequence() {
-  NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
+  NewLIR0(kA64Ret);
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 3d5e054..903be10 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -22,7 +22,7 @@
 
 namespace art {
 
-class Arm64Mir2Lir FINAL : public Mir2Lir {
+class Arm64Mir2Lir : public Mir2Lir {
   public:
     Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -31,7 +31,7 @@
                             RegLocation rl_dest, int lit);
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<4> offset);
+    RegStorage LoadHelper(A64ThreadOffset offset);
     LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                       OpSize size) OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
@@ -75,7 +75,7 @@
     uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
     int AssignInsnOffsets();
     void AssignOffsets();
-    static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
+    uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
     void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
@@ -95,6 +95,7 @@
                      RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
+    void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
     void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -141,6 +142,11 @@
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+
+    uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
+    void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+    void SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
 
     // Required for target - single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
@@ -156,6 +162,7 @@
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide);
     LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
     LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
@@ -165,44 +172,50 @@
     LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
     LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
     LIR* OpTestSuspend(LIR* target);
-    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
+    LIR* OpThreadMem(OpKind op, A64ThreadOffset thread_offset);
     LIR* OpVldm(RegStorage r_base, int count);
     LIR* OpVstm(RegStorage r_base, int count);
     void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
     void OpRegCopyWide(RegStorage dest, RegStorage src);
-    void OpTlsCmp(ThreadOffset<4> offset, int val);
+    void OpTlsCmp(A64ThreadOffset offset, int val);
 
     LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
-                          int shift);
-    LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
-    static const ArmEncodingMap EncodingMap[kArmLast];
+    LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift,
+                          bool is_wide = false);
+    LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift, bool is_wide = false);
+    static const ArmEncodingMap EncodingMap[kA64Last];
     int EncodeShift(int code, int amount);
-    int ModifiedImmediate(uint32_t value);
+    int EncodeExtend(int extend_type, int amount);
+    bool IsExtendEncoding(int encoded_value);
+    int EncodeLogicalImmediate(bool is_wide, uint64_t value);
+    uint64_t DecodeLogicalImmediate(bool is_wide, int value);
+
     ArmConditionCode ArmConditionEncoding(ConditionCode code);
     bool InexpensiveConstantInt(int32_t value);
     bool InexpensiveConstantFloat(int32_t value);
     bool InexpensiveConstantLong(int64_t value);
     bool InexpensiveConstantDouble(int64_t value);
 
+    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+    int LoadArgRegs(CallInfo* info, int call_state,
+                    NextCallInsn next_call_insn,
+                    const MethodReference& target_method,
+                    uint32_t vtable_idx,
+                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                    bool skip_this);
+
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
-    LIR* LoadFPConstantValue(int r_dest, int value);
+    LIR* LoadFPConstantValue(int r_dest, int32_t value);
+    LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
                           bool is_div, bool check_zero);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
-    typedef struct {
-      OpKind op;
-      uint32_t shift;
-    } EasyMultiplyOp;
-    bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op);
-    bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops);
-    void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/create.sh b/compiler/dex/quick/arm64/create.sh
deleted file mode 100644
index a3833bd..0000000
--- a/compiler/dex/quick/arm64/create.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-set -e
-
-if [ ! -d ./arm ]; then
-  echo "Directory ./arm not found."
-  exit 1
-fi
-
-mkdir -p arm64
-dst=`cd arm64 && pwd`
-cd arm/
-for f in *; do
-  cp $f $dst/`echo $f | sed 's/arm/arm64/g'`
-done
-
-sed -i 's,ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_,ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_,g' $dst/arm64_lir.h
-sed -i 's,ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_,ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_,g' $dst/codegen_arm64.h
-sed -i -e 's,ArmMir2Lir,Arm64Mir2Lir,g' -e 's,arm_lir.h,arm64_lir.h,g' -e 's,codegen_arm.h,codegen_arm64.h,g' $dst/*.h $dst/*.cc
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 9684283..c2a550e 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -21,8 +21,8 @@
 namespace art {
 
 void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
-                                 RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kThumbBkpt;
+                                   RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kA64Brk1d;
   RegLocation rl_result;
 
   /*
@@ -32,24 +32,24 @@
   switch (opcode) {
     case Instruction::ADD_FLOAT_2ADDR:
     case Instruction::ADD_FLOAT:
-      op = kThumb2Vadds;
+      op = kA64Fadd3fff;
       break;
     case Instruction::SUB_FLOAT_2ADDR:
     case Instruction::SUB_FLOAT:
-      op = kThumb2Vsubs;
+      op = kA64Fsub3fff;
       break;
     case Instruction::DIV_FLOAT_2ADDR:
     case Instruction::DIV_FLOAT:
-      op = kThumb2Vdivs;
+      op = kA64Fdiv3fff;
       break;
     case Instruction::MUL_FLOAT_2ADDR:
     case Instruction::MUL_FLOAT:
-      op = kThumb2Vmuls;
+      op = kA64Fmul3fff;
       break;
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
+      CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturn(true);
       StoreValue(rl_dest, rl_result);
@@ -68,31 +68,31 @@
 }
 
 void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
-                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  int op = kThumbBkpt;
+                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kA64Brk1d;
   RegLocation rl_result;
 
   switch (opcode) {
     case Instruction::ADD_DOUBLE_2ADDR:
     case Instruction::ADD_DOUBLE:
-      op = kThumb2Vaddd;
+      op = kA64Fadd3fff;
       break;
     case Instruction::SUB_DOUBLE_2ADDR:
     case Instruction::SUB_DOUBLE:
-      op = kThumb2Vsubd;
+      op = kA64Fsub3fff;
       break;
     case Instruction::DIV_DOUBLE_2ADDR:
     case Instruction::DIV_DOUBLE:
-      op = kThumb2Vdivd;
+      op = kA64Fdiv3fff;
       break;
     case Instruction::MUL_DOUBLE_2ADDR:
     case Instruction::MUL_DOUBLE:
-      op = kThumb2Vmuld;
+      op = kA64Fmul3fff;
       break;
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
+      CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturnWide(true);
       StoreValueWide(rl_dest, rl_result);
@@ -111,98 +111,62 @@
   rl_result = EvalLoc(rl_dest, kFPReg, true);
   DCHECK(rl_dest.wide);
   DCHECK(rl_result.wide);
-  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
-void Arm64Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
-  int op = kThumbBkpt;
-  int src_reg;
+void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
+                                 RegLocation rl_dest, RegLocation rl_src) {
+  int op = kA64Brk1d;
   RegLocation rl_result;
 
   switch (opcode) {
     case Instruction::INT_TO_FLOAT:
-      op = kThumb2VcvtIF;
+      op = kA64Scvtf2fw;
       break;
     case Instruction::FLOAT_TO_INT:
-      op = kThumb2VcvtFI;
+      op = kA64Fcvtzs2wf;
       break;
     case Instruction::DOUBLE_TO_FLOAT:
-      op = kThumb2VcvtDF;
+      op = kA64Fcvt2sS;
       break;
     case Instruction::FLOAT_TO_DOUBLE:
-      op = kThumb2VcvtFd;
+      op = kA64Fcvt2Ss;
       break;
     case Instruction::INT_TO_DOUBLE:
-      op = kThumb2VcvtF64S32;
+      op = FWIDE(kA64Scvtf2fw);
       break;
     case Instruction::DOUBLE_TO_INT:
-      op = kThumb2VcvtDI;
+      op = FWIDE(kA64Fcvtzs2wf);
       break;
-    case Instruction::LONG_TO_DOUBLE: {
-      rl_src = LoadValueWide(rl_src, kFPReg);
-      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
-      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
-      rl_result = EvalLoc(rl_dest, kFPReg, true);
-      RegStorage tmp1 = AllocTempDouble();
-      RegStorage tmp2 = AllocTempDouble();
-
-      NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
-      NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
-      LoadConstantWide(tmp2, 0x41f0000000000000LL);
-      NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
-      FreeTemp(tmp1);
-      FreeTemp(tmp2);
-      StoreValueWide(rl_dest, rl_result);
-      return;
-    }
+    case Instruction::LONG_TO_DOUBLE:
+      op = FWIDE(kA64Scvtf2fx);
+      break;
     case Instruction::FLOAT_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
-      return;
-    case Instruction::LONG_TO_FLOAT: {
-      rl_src = LoadValueWide(rl_src, kFPReg);
-      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
-      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
-      rl_result = EvalLoc(rl_dest, kFPReg, true);
-      // Allocate temp registers.
-      RegStorage high_val = AllocTempDouble();
-      RegStorage low_val = AllocTempDouble();
-      RegStorage const_val = AllocTempDouble();
-      // Long to double.
-      NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
-      NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
-      LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
-      NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
-      // Double to float.
-      NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
-      // Free temp registers.
-      FreeTemp(high_val);
-      FreeTemp(low_val);
-      FreeTemp(const_val);
-      // Store result.
-      StoreValue(rl_dest, rl_result);
-      return;
-    }
+      op = kA64Fcvtzs2xf;
+      break;
+    case Instruction::LONG_TO_FLOAT:
+      op = kA64Scvtf2fx;
+      break;
     case Instruction::DOUBLE_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
-      return;
+      op = FWIDE(kA64Fcvtzs2xf);
+      break;
     default:
       LOG(FATAL) << "Unexpected opcode: " << opcode;
   }
+
   if (rl_src.wide) {
     rl_src = LoadValueWide(rl_src, kFPReg);
-    src_reg = rl_src.reg.GetReg();
   } else {
     rl_src = LoadValue(rl_src, kFPReg);
-    src_reg = rl_src.reg.GetReg();
   }
+
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+
   if (rl_dest.wide) {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
     StoreValueWide(rl_dest, rl_result);
   } else {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -217,15 +181,14 @@
     rl_src2 = mir_graph_->GetSrcWide(mir, 2);
     rl_src1 = LoadValueWide(rl_src1, kFPReg);
     rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
     rl_src1 = mir_graph_->GetSrc(mir, 0);
     rl_src2 = mir_graph_->GetSrc(mir, 1);
     rl_src1 = LoadValue(rl_src1, kFPReg);
     rl_src2 = LoadValue(rl_src2, kFPReg);
-    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   }
-  NewLIR0(kThumb2Fmstat);
   ConditionCode ccode = mir->meta.ccode;
   switch (ccode) {
     case kCondEq:
@@ -259,7 +222,7 @@
 
 
 void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
-                          RegLocation rl_src1, RegLocation rl_src2) {
+                            RegLocation rl_src1, RegLocation rl_src2) {
   bool is_double = false;
   int default_result = -1;
   RegLocation rl_result;
@@ -291,7 +254,7 @@
     ClobberSReg(rl_dest.s_reg_low);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     LoadConstant(rl_result.reg, default_result);
-    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
     rl_src1 = LoadValue(rl_src1, kFPReg);
     rl_src2 = LoadValue(rl_src2, kFPReg);
@@ -299,20 +262,20 @@
     ClobberSReg(rl_dest.s_reg_low);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     LoadConstant(rl_result.reg, default_result);
-    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   }
   DCHECK(!rl_result.reg.IsFloat());
-  NewLIR0(kThumb2Fmstat);
 
-  LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
-  NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(),
-          ModifiedImmediate(-default_result));  // Must not alter ccodes
-  OpEndIT(it);
+  // TODO(Arm64): should we rather do this?
+  // csinc wD, wzr, wzr, eq
+  // csneg wD, wD, wD, le
+  // (which requires 2 instructions rather than 3)
 
-  it = OpIT(kCondEq, "");
-  LoadConstant(rl_result.reg, 0);
-  OpEndIT(it);
-
+  // Rd = if cond then Rd else -Rd.
+  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe);
+  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(),
+          kArmCondEq);
   StoreValue(rl_dest, rl_result);
 }
 
@@ -320,7 +283,7 @@
   RegLocation rl_result;
   rl_src = LoadValue(rl_src, kFPReg);
   rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValue(rl_dest, rl_result);
 }
 
@@ -328,31 +291,32 @@
   RegLocation rl_result;
   rl_src = LoadValueWide(rl_src, kFPReg);
   rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
 bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL) << "GenInlinedSqrt not implemented for Arm64";
+
+  DCHECK_EQ(cu_->instruction_set, kArm64);
   LIR *branch;
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-  NewLIR0(kThumb2Fmstat);
-  branch = NewLIR2(kThumbBCond, 0, kArmCondEq);
+  NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(FWIDE(kA64Fcmp2ff), rl_result.reg.GetReg(), rl_result.reg.GetReg());
+  branch = NewLIR2(kA64B2ct, kArmCondEq, 0);
   ClobberCallerSave();
   LockCallTemps();  // Using fixed registers
-  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt));
-  NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg());
-  NewLIR1(kThumbBlxR, r_tgt.GetReg());
-  NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg());
+  RegStorage r_tgt = LoadHelper(A64_QUICK_ENTRYPOINT_OFFSET(pSqrt));
+  // NewLIR3(kThumb2Fmrrd, r0, r1, rl_src.reg.GetReg());
+  NewLIR1(kA64Blr1x, r_tgt.GetReg());
+  // NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), r0, r1);
   branch->target = NewLIR0(kPseudoTargetLabel);
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
 
-
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index c5a3ab6..709f583 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -29,152 +29,55 @@
   return OpCondBranch(cond, target);
 }
 
-/*
- * Generate a Thumb2 IT instruction, which can nullify up to
- * four subsequent instructions based on a condition and its
- * inverse.  The condition applies to the first instruction, which
- * is executed if the condition is met.  The string "guide" consists
- * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
- * A "T" means the instruction is executed if the condition is
- * met, and an "E" means the instruction is executed if the condition
- * is not met.
- */
+// TODO(Arm64): remove this.
 LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
-  int mask;
-  int mask3 = 0;
-  int mask2 = 0;
-  int mask1 = 0;
-  ArmConditionCode code = ArmConditionEncoding(ccode);
-  int cond_bit = code & 1;
-  int alt_bit = cond_bit ^ 1;
-
-  // Note: case fallthroughs intentional
-  switch (strlen(guide)) {
-    case 3:
-      mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
-    case 2:
-      mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
-    case 1:
-      mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
-      break;
-    case 0:
-      break;
-    default:
-      LOG(FATAL) << "OAT: bad case in OpIT";
-  }
-  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
-       (1 << (3 - strlen(guide)));
-  return NewLIR2(kThumb2It, code, mask);
+  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
+  return NULL;
 }
 
 void Arm64Mir2Lir::OpEndIT(LIR* it) {
-  // TODO: use the 'it' pointer to do some checks with the LIR, for example
-  //       we could check that the number of instructions matches the mask
-  //       in the IT instruction.
-  CHECK(it != nullptr);
-  GenBarrier();
+  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
 }
 
 /*
  * 64-bit 3way compare function.
- *     mov   rX, #-1
- *     cmp   op1hi, op2hi
- *     blt   done
- *     bgt   flip
- *     sub   rX, op1lo, op2lo (treat as unsigned)
- *     beq   done
- *     ite   hi
- *     mov(hi)   rX, #-1
- *     mov(!hi)  rX, #1
- * flip:
- *     neg   rX
- * done:
+ *     cmp   xA, xB
+ *     csinc wC, wzr, wzr, eq
+ *     csneg wC, wC, wC, le
  */
-void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  LIR* target1;
-  LIR* target2;
+void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2) {
+  RegLocation rl_result;
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  RegStorage t_reg = AllocTemp();
-  LoadConstant(t_reg, -1);
-  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
-  LIR* branch1 = OpCondBranch(kCondLt, NULL);
-  LIR* branch2 = OpCondBranch(kCondGt, NULL);
-  OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
-  LIR* branch3 = OpCondBranch(kCondEq, NULL);
+  rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
-  LIR* it = OpIT(kCondHi, "E");
-  NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
-  LoadConstant(t_reg, 1);
-  OpEndIT(it);
-
-  target2 = NewLIR0(kPseudoTargetLabel);
-  OpRegReg(kOpNeg, t_reg, t_reg);
-
-  target1 = NewLIR0(kPseudoTargetLabel);
-
-  RegLocation rl_temp = LocCReturn();  // Just using as template, will change
-  rl_temp.reg.SetReg(t_reg.GetReg());
-  StoreValue(rl_dest, rl_temp);
-  FreeTemp(t_reg);
-
-  branch1->target = target1;
-  branch2->target = target2;
-  branch3->target = branch1->target;
+  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
+  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), kArmCondLe);
+  StoreValue(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
-                                          int64_t val, ConditionCode ccode) {
-  int32_t val_lo = Low32Bits(val);
-  int32_t val_hi = High32Bits(val);
-  DCHECK_GE(ModifiedImmediate(val_lo), 0);
-  DCHECK_GE(ModifiedImmediate(val_hi), 0);
+                                            int64_t val, ConditionCode ccode) {
   LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  RegStorage low_reg = rl_src1.reg.GetLow();
-  RegStorage high_reg = rl_src1.reg.GetHigh();
 
   if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    RegStorage t_reg = AllocTemp();
-    NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
-    FreeTemp(t_reg);
+    ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+    LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0);
+    branch->target = taken;
+  } else {
+    OpRegImm64(kOpCmp, rl_src1.reg, val, /*is_wide*/true);
     OpCondBranch(ccode, taken);
-    return;
   }
-
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
-      break;
-    case kCondLt:
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
-      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
 }
 
 void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
@@ -194,21 +97,21 @@
     if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
       OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
       DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-      LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
+      OpIT(true_val == 0 ? kCondNe : kCondUge, "");
       LoadConstant(rl_result.reg, false_val);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
     } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
       OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
       DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-      LIR* it = OpIT(kCondLs, "");
+      OpIT(kCondLs, "");
       LoadConstant(rl_result.reg, false_val);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
     } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
       OpRegImm(kOpCmp, rl_src.reg, 0);
-      LIR* it = OpIT(ccode, "E");
+      OpIT(ccode, "E");
       LoadConstant(rl_result.reg, true_val);
       LoadConstant(rl_result.reg, false_val);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
     } else {
       // Unlikely case - could be tuned.
       RegStorage t_reg1 = AllocTemp();
@@ -216,10 +119,10 @@
       LoadConstant(t_reg1, true_val);
       LoadConstant(t_reg2, false_val);
       OpRegImm(kOpCmp, rl_src.reg, 0);
-      LIR* it = OpIT(ccode, "E");
+      OpIT(ccode, "E");
       OpRegCopy(rl_result.reg, t_reg1);
       OpRegCopy(rl_result.reg, t_reg2);
-      OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
     }
   } else {
     // MOVE case
@@ -229,24 +132,26 @@
     rl_false = LoadValue(rl_false, kCoreReg);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     OpRegImm(kOpCmp, rl_src.reg, 0);
-    LIR* it = nullptr;
     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
-      it = OpIT(NegateComparison(ccode), "");
+      OpIT(NegateComparison(ccode), "");
       OpRegCopy(rl_result.reg, rl_false.reg);
     } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
-      it = OpIT(ccode, "");
+      OpIT(ccode, "");
       OpRegCopy(rl_result.reg, rl_true.reg);
     } else {  // Normal - select between the two.
-      it = OpIT(ccode, "E");
+      OpIT(ccode, "E");
       OpRegCopy(rl_result.reg, rl_true.reg);
       OpRegCopy(rl_result.reg, rl_false.reg);
     }
-    OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
+    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
   }
   StoreValue(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
   // Normalize such that if either operand is constant, src2 will be constant.
@@ -259,8 +164,8 @@
     RegLocation rl_temp = UpdateLocWide(rl_src2);
     // Do special compare/branch against simple const operand if not already in registers.
     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    if ((rl_temp.location != kLocPhysReg) &&
-        ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
+    if ((rl_temp.location != kLocPhysReg)
+     /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) {
       GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
       return;
     }
@@ -308,56 +213,77 @@
  * Generate a register comparison to an immediate and branch.  Caller
  * is responsible for setting branch target field.
  */
-LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
+LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
+                                  LIR* target) {
   LIR* branch;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
-  /*
-   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
-   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
-   * branch forward to a slow path, they will frequently not reach - and thus have to
-   * be converted to a long form during assembly (which will trigger another assembly
-   * pass).  Here we estimate the branch distance for checks, and if large directly
-   * generate the long form in an attempt to avoid an extra assembly pass.
-   * TODO: consider interspersing slowpaths in code following unconditional branches.
-   */
-  bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
-  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
-  if (!skip && reg.Low8() && (check_value == 0) &&
-     ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
-    branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
-                     reg.GetReg(), 0);
+  if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) {
+    ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+    branch = NewLIR2(opcode, reg.GetReg(), 0);
   } else {
     OpRegImm(kOpCmp, reg, check_value);
-    branch = NewLIR2(kThumbBCond, 0, arm_cond);
+    branch = NewLIR2(kA64B2ct, arm_cond, 0);
   }
   branch->target = target;
   return branch;
 }
 
 LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
+  bool dest_is_fp = r_dest.IsFloat();
+  bool src_is_fp = r_src.IsFloat();
+  ArmOpcode opcode = kA64Brk1d;
   LIR* res;
-  int opcode;
-  // If src or dest is a pair, we'll be using low reg.
-  if (r_dest.IsPair()) {
-    r_dest = r_dest.GetLow();
+
+  if (LIKELY(dest_is_fp == src_is_fp)) {
+    if (LIKELY(!dest_is_fp)) {
+      // Core/core copy.
+      // Copies involving the sp register require a different instruction.
+      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
+
+      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
+      //   This currently works because the other arguments are set to 0 by default. We should
+      //   rather introduce an alias kA64Mov2RR.
+
+      // core/core copy. Do a x/x copy only if both registers are x.
+      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
+        opcode = WIDE(opcode);
+      }
+    } else {
+      // Float/float copy.
+      bool dest_is_double = r_dest.IsDouble();
+      bool src_is_double = r_src.IsDouble();
+
+      // We do not do float/double or double/float casts here.
+      DCHECK_EQ(dest_is_double, src_is_double);
+
+      // Homogeneous float/float copy.
+      opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
+    }
+  } else {
+    // Inhomogeneous register copy.
+    if (dest_is_fp) {
+      if (r_dest.IsDouble()) {
+        opcode = kA64Fmov2Sx;
+      } else {
+        DCHECK(r_src.IsSingle());
+        opcode = kA64Fmov2sw;
+      }
+    } else {
+      if (r_src.IsDouble()) {
+        opcode = kA64Fmov2xS;
+      } else {
+        DCHECK(r_dest.Is32Bit());
+        opcode = kA64Fmov2ws;
+      }
+    }
   }
-  if (r_src.IsPair()) {
-    r_src = r_src.GetLow();
-  }
-  if (r_dest.IsFloat() || r_src.IsFloat())
-    return OpFpRegCopy(r_dest, r_src);
-  if (r_dest.Low8() && r_src.Low8())
-    opcode = kThumbMovRR;
-  else if (!r_dest.Low8() && !r_src.Low8())
-     opcode = kThumbMovRR_H2H;
-  else if (r_dest.Low8())
-     opcode = kThumbMovRR_H2L;
-  else
-     opcode = kThumbMovRR_L2H;
+
   res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
+
   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
     res->flags.is_nop = true;
   }
+
   return res;
 }
 
@@ -369,33 +295,7 @@
 }
 
 void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  if (r_dest != r_src) {
-    bool dest_fp = r_dest.IsFloat();
-    bool src_fp = r_src.IsFloat();
-    DCHECK(r_dest.Is64Bit());
-    DCHECK(r_src.Is64Bit());
-    if (dest_fp) {
-      if (src_fp) {
-        OpRegCopy(r_dest, r_src);
-      } else {
-        NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
-      }
-    } else {
-      if (src_fp) {
-        NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
-      } else {
-        // Handle overlap
-        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-          DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-        } else {
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-        }
-      }
-    }
-  }
+  OpRegCopy(r_dest, r_src);
 }
 
 // Table of magic divisors
@@ -427,6 +327,12 @@
 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
 bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
+  // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table.
+  //   It should be possible subtracting one from all its entries, and using smaddl
+  //   to counteract this. The advantage is that integers should then be easier to
+  //   encode as logical immediates (0x55555555 rather than 0x55555556).
+  UNIMPLEMENTED(FATAL);
+
   if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
     return false;
   }
@@ -434,6 +340,10 @@
   if (pattern == DivideNone) {
     return false;
   }
+  // Tuning: add rem patterns
+  if (!is_div) {
+    return false;
+  }
 
   RegStorage r_magic = AllocTemp();
   LoadConstant(r_magic, magic_table[lit].magic);
@@ -441,182 +351,43 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage r_hi = AllocTemp();
   RegStorage r_lo = AllocTemp();
-
-  // rl_dest and rl_src might overlap.
-  // Reuse r_hi to save the div result for reminder case.
-  RegStorage r_div_result = is_div ? rl_result.reg : r_hi;
-
-  NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
+  NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
   switch (pattern) {
     case Divide3:
-      OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
+      OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(),
+               rl_src.reg.GetReg(), EncodeShift(kA64Asr, 31));
       break;
     case Divide5:
       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
-                       EncodeShift(kArmAsr, magic_table[lit].shift));
+      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
+               EncodeShift(kA64Asr, magic_table[lit].shift));
       break;
     case Divide7:
       OpRegReg(kOpAdd, r_hi, rl_src.reg);
       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
-                       EncodeShift(kArmAsr, magic_table[lit].shift));
+      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
+               EncodeShift(kA64Asr, magic_table[lit].shift));
       break;
     default:
       LOG(FATAL) << "Unexpected pattern: " << pattern;
   }
-
-  if (!is_div) {
-    // div_result = src / lit
-    // tmp1 = div_result * lit
-    // dest = src - tmp1
-    RegStorage tmp1 = r_lo;
-    EasyMultiplyOp ops[2];
-
-    bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
-    DCHECK_NE(canEasyMultiply, false);
-
-    GenEasyMultiplyTwoOps(tmp1, r_div_result, ops);
-    OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
-  }
-
   StoreValue(rl_dest, rl_result);
   return true;
 }
 
-// Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
-bool Arm64Mir2Lir::GetEasyMultiplyOp(int lit, Arm64Mir2Lir::EasyMultiplyOp* op) {
-  if (IsPowerOfTwo(lit)) {
-    op->op = kOpLsl;
-    op->shift = LowestSetBit(lit);
-    return true;
-  }
-
-  if (IsPowerOfTwo(lit - 1)) {
-    op->op = kOpAdd;
-    op->shift = LowestSetBit(lit - 1);
-    return true;
-  }
-
-  if (IsPowerOfTwo(lit + 1)) {
-    op->op = kOpRsub;
-    op->shift = LowestSetBit(lit + 1);
-    return true;
-  }
-
-  op->op = kOpInvalid;
-  op->shift = 0;
-  return false;
-}
-
-// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
-bool Arm64Mir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
-  GetEasyMultiplyOp(lit, &ops[0]);
-  if (GetEasyMultiplyOp(lit, &ops[0])) {
-    ops[1].op = kOpInvalid;
-    ops[1].shift = 0;
-    return true;
-  }
-
-  int lit1 = lit;
-  uint32_t shift = LowestSetBit(lit1);
-  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
-    ops[1].op = kOpLsl;
-    ops[1].shift = shift;
-    return true;
-  }
-
-  lit1 = lit - 1;
-  shift = LowestSetBit(lit1);
-  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
-    ops[1].op = kOpAdd;
-    ops[1].shift = shift;
-    return true;
-  }
-
-  lit1 = lit + 1;
-  shift = LowestSetBit(lit1);
-  if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
-    ops[1].op = kOpRsub;
-    ops[1].shift = shift;
-    return true;
-  }
-
-  return false;
-}
-
-// Generate instructions to do multiply.
-// Additional temporary register is required,
-// if it need to generate 2 instructions and src/dest overlap.
-void Arm64Mir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
-  // tmp1 = ( src << shift1) + [ src | -src | 0 ]
-  // dest = (tmp1 << shift2) + [ src | -src | 0 ]
-
-  RegStorage r_tmp1;
-  if (ops[1].op == kOpInvalid) {
-    r_tmp1 = r_dest;
-  } else if (r_dest.GetReg() != r_src.GetReg()) {
-    r_tmp1 = r_dest;
-  } else {
-    r_tmp1 = AllocTemp();
-  }
-
-  switch (ops[0].op) {
-    case kOpLsl:
-      OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift);
-      break;
-    case kOpAdd:
-      OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
-      break;
-    case kOpRsub:
-      OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
-      break;
-    default:
-      DCHECK_EQ(ops[0].op, kOpInvalid);
-      break;
-  }
-
-  switch (ops[1].op) {
-    case kOpInvalid:
-      return;
-    case kOpLsl:
-      OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift);
-      break;
-    case kOpAdd:
-      OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
-      break;
-    case kOpRsub:
-      OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
-      break;
-    default:
-      LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps";
-      break;
-  }
-}
-
 bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
-  EasyMultiplyOp ops[2];
-
-  if (!GetEasyMultiplyTwoOps(lit, ops)) {
-    return false;
-  }
-
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
-  StoreValue(rl_dest, rl_result);
-  return true;
+  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
+  return false;
 }
 
 RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
                       RegLocation rl_src2, bool is_div, bool check_zero) {
-  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
   return rl_dest;
 }
 
 RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
-  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
   return rl_dest;
 }
 
@@ -657,6 +428,9 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   RegLocation rl_src1 = info->args[0];
   RegLocation rl_src2 = info->args[1];
@@ -665,15 +439,18 @@
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E");
+  OpIT((is_min) ? kCondGt : kCondLt, "E");
   OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
   OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
-  OpEndIT(it);
+  GenBarrier();
   StoreValue(rl_dest, rl_result);
   return true;
 }
 
 bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
   RegLocation rl_src_address = info->args[0];  // long address
   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
   RegLocation rl_dest = InlineTarget(info);
@@ -682,23 +459,26 @@
   if (size == k64) {
     // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
     if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
-      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
-      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
     } else {
-      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
-      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
+      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
     }
     StoreValueWide(rl_dest, rl_result);
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
-    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
     StoreValue(rl_dest, rl_result);
   }
   return true;
 }
 
 bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
   RegLocation rl_src_address = info->args[0];  // long address
   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
   RegLocation rl_src_value = info->args[2];  // [size] value
@@ -718,14 +498,17 @@
 }
 
 void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
-  LOG(FATAL) << "Unexpected use of OpLea for Arm";
+  LOG(FATAL) << "Unexpected use of OpLea for Arm64";
 }
 
-void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
-  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
+void Arm64Mir2Lir::OpTlsCmp(A64ThreadOffset offset, int val) {
+  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm64";
 }
 
 bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
@@ -745,10 +528,10 @@
   // around the potentially locked temp by using LR for r_ptr, unconditionally.
   // TODO: Pass information about the need for more temps to the stack frame generation
   // code so that we can rely on being able to allocate enough temps.
-  DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
-  MarkTemp(rs_rARM_LR);
-  FreeTemp(rs_rARM_LR);
-  LockTemp(rs_rARM_LR);
+  DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp());
+  MarkTemp(rs_rA64_LR);
+  FreeTemp(rs_rA64_LR);
+  LockTemp(rs_rA64_LR);
   bool load_early = true;
   if (is_long) {
     RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
@@ -797,7 +580,7 @@
 
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
 
-  RegStorage r_ptr = rs_rARM_LR;
+  RegStorage r_ptr = rs_rA64_LR;
   OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
 
   // Free now unneeded rl_object and rl_offset to give more temps.
@@ -813,9 +596,9 @@
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
     // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
-    RegStorage low_reg = AllocTemp();
-    RegStorage high_reg = AllocTemp();
-    rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
+    int low_reg = AllocTemp().GetReg();
+    int high_reg = AllocTemp().GetReg();
+    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
     rl_expected = rl_new_value;
   }
 
@@ -827,42 +610,37 @@
   RegStorage r_tmp = AllocTemp();
   LIR* target = NewLIR0(kPseudoTargetLabel);
 
-  LIR* it = nullptr;
   if (is_long) {
     RegStorage r_tmp_high = AllocTemp();
     if (!load_early) {
       LoadValueDirectWide(rl_src_expected, rl_expected.reg);
     }
-    NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
+    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
     OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
     OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
     if (!load_early) {
       LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
     }
-    // Make sure we use ORR that sets the ccode
-    if (r_tmp.Low8() && r_tmp_high.Low8()) {
-      NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
-    } else {
-      NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
-    }
+
+    LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL);
+    LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL);
+    NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(),
+            rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
+    LIR* target2 = NewLIR0(kPseudoTargetLabel);
+    branch1->target = target2;
+    branch2->target = target2;
     FreeTemp(r_tmp_high);  // Now unneeded
 
-    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-    it = OpIT(kCondEq, "T");
-    NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
-
   } else {
-    NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
+    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0);
     OpRegReg(kOpSub, r_tmp, rl_expected.reg);
     DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-    it = OpIT(kCondEq, "T");
-    NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
+    OpIT(kCondEq, "T");
+    NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
   }
 
   // Still one conditional left from OpIT(kCondEq, "T") from either branch
   OpRegImm(kOpCmp /* eq */, r_tmp, 1);
-  OpEndIT(it);
-
   OpCondBranch(kCondEq, target);
 
   if (!load_early) {
@@ -873,36 +651,37 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
   DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-  it = OpIT(kCondUlt, "");
+  OpIT(kCondUlt, "");
   LoadConstant(rl_result.reg, 0); /* cc */
   FreeTemp(r_tmp);  // Now unneeded.
-  OpEndIT(it);     // Barrier to terminate OpIT.
 
   StoreValue(rl_dest, rl_result);
 
   // Now, restore lr to its non-temp status.
-  Clobber(rs_rARM_LR);
-  UnmarkTemp(rs_rARM_LR);
+  Clobber(rs_rA64_LR);
+  UnmarkTemp(rs_rA64_LR);
   return true;
 }
 
 LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
-  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
+  return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
 }
 
 LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
-  return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
+  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
+  return NULL;
 }
 
 LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
-  return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
+  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
+  return NULL;
 }
 
 void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
                                                RegLocation rl_result, int lit,
                                                int first_bit, int second_bit) {
-  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
-                   EncodeShift(kArmLsl, second_bit - first_bit));
+  OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
+                   EncodeShift(kA64Lsl, second_bit - first_bit));
   if (first_bit != 0) {
     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
   }
@@ -910,15 +689,14 @@
 
 void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
   DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
-  RegStorage t_reg = AllocTemp();
-  NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
-  FreeTemp(t_reg);
+  OpRegImm64(kOpCmp, reg, 0, /*is_wide*/true);
   GenDivZeroCheck(kCondEq);
 }
 
+// TODO(Arm64): the function below should go.
 // Test suspend flag, return target of taken suspend branch
 LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
-  NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
+  NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
 }
 
@@ -950,8 +728,8 @@
 
   // If the same barrier already exists, don't generate another.
   if (barrier == nullptr
-      || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
-    barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
+      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
+    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
   }
 
   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
@@ -979,136 +757,45 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
+                             RegLocation rl_src2) {
+  RegLocation rl_result;
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegRegShift(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg(),
+                   ENCODE_NO_SHIFT, /*is_wide*/ true);
+  StoreValueWide(rl_dest, rl_result);
+}
+
 void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
-                            RegLocation rl_src1, RegLocation rl_src2) {
-    /*
-     * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
-     * dest     = src1.lo * src2.lo;
-     * tmp1    += src1.lo * src2.hi;
-     * dest.hi += tmp1;
-     *
-     * To pull off inline multiply, we have a worst-case requirement of 7 temporary
-     * registers.  Normally for Arm, we get 5.  We can get to 6 by including
-     * lr in the temp set.  The only problematic case is all operands and result are
-     * distinct, and none have been promoted.  In that case, we can succeed by aggressively
-     * freeing operand temp registers after they are no longer needed.  All other cases
-     * can proceed normally.  We'll just punt on the case of the result having a misaligned
-     * overlap with either operand and send that case to a runtime handler.
-     */
-    RegLocation rl_result;
-    if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) {
-      ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
-      FlushAllRegs();
-      CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
-      rl_result = GetReturnWide(false);
-      StoreValueWide(rl_dest, rl_result);
-      return;
-    }
-
-    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-
-    int reg_status = 0;
-    RegStorage res_lo;
-    RegStorage res_hi;
-    bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
-        !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
-    bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
-    bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
-    // Check if rl_dest is *not* either operand and we have enough temp registers.
-    if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
-        (dest_promoted || src1_promoted || src2_promoted)) {
-      // In this case, we do not need to manually allocate temp registers for result.
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      res_lo = rl_result.reg.GetLow();
-      res_hi = rl_result.reg.GetHigh();
-    } else {
-      res_lo = AllocTemp();
-      if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
-        // In this case, we have enough temp registers to be allocated for result.
-        res_hi = AllocTemp();
-        reg_status = 1;
-      } else {
-        // In this case, all temps are now allocated.
-        // res_hi will be allocated after we can free src1_hi.
-        reg_status = 2;
-      }
-    }
-
-    // Temporarily add LR to the temp pool, and assign it to tmp1
-    MarkTemp(rs_rARM_LR);
-    FreeTemp(rs_rARM_LR);
-    RegStorage tmp1 = rs_rARM_LR;
-    LockTemp(rs_rARM_LR);
-
-    if (rl_src1.reg == rl_src2.reg) {
-      DCHECK(res_hi.Valid());
-      DCHECK(res_lo.Valid());
-      NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
-      NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
-              rl_src1.reg.GetLowReg());
-      OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
-    } else {
-      NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
-      if (reg_status == 2) {
-        DCHECK(!res_hi.Valid());
-        DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
-        DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
-        FreeTemp(rl_src1.reg.GetHigh());
-        res_hi = AllocTemp();
-      }
-      DCHECK(res_hi.Valid());
-      DCHECK(res_lo.Valid());
-      NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
-              rl_src1.reg.GetLowReg());
-      NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
-              tmp1.GetReg());
-      NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
-      if (reg_status == 2) {
-        // Clobber rl_src1 since it was corrupted.
-        FreeTemp(rl_src1.reg);
-        Clobber(rl_src1.reg);
-      }
-    }
-
-    // Now, restore lr to its non-temp status.
-    FreeTemp(tmp1);
-    Clobber(rs_rARM_LR);
-    UnmarkTemp(rs_rARM_LR);
-
-    if (reg_status != 0) {
-      // We had manually allocated registers for rl_result.
-      // Now construct a RegLocation.
-      rl_result = GetReturnWide(false);  // Just using as a template.
-      rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
-    }
-
-    StoreValueWide(rl_dest, rl_result);
+                              RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
 }
 
 void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
+                              RegLocation rl_src2) {
+  GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
 }
 
 void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
-  LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
+  GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
 }
 
 void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
-  LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
+  GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
 }
 
 void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) {
-  LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
+  GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
 }
 
 void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
-  LOG(FATAL) << "Unexpected use of genXoLong for Arm";
+  GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
 }
 
 /*
@@ -1116,6 +803,9 @@
  */
 void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -1157,7 +847,8 @@
     } else {
       // No special indexed operation, lea + load w/ displacement
       reg_ptr = AllocTemp();
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
+                       EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -1170,7 +861,7 @@
       }
       FreeTemp(reg_len);
     }
-    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG);
+    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
     MarkPossibleNullPointerException(opt_flags);
     if (!constant_index) {
       FreeTemp(reg_ptr);
@@ -1204,6 +895,9 @@
  */
 void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   bool constant_index = rl_index.is_const;
@@ -1259,7 +953,8 @@
       rl_src = LoadValue(rl_src, reg_class);
     }
     if (!constant_index) {
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
+                       EncodeShift(kA64Lsl, scale));
     }
     if (needs_range_check) {
       if (constant_index) {
@@ -1294,6 +989,9 @@
 
 void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
   rl_src = LoadValueWide(rl_src, kCoreReg);
   // Per spec, we only care about low 6 bits of shift amount.
   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
@@ -1320,8 +1018,8 @@
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else {
         OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
-                         EncodeShift(kArmLsr, 32 - shift_amount));
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(),
+                         EncodeShift(kA64Lsr, 32 - shift_amount));
         OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
       }
       break;
@@ -1336,8 +1034,8 @@
       } else {
         RegStorage t_reg = AllocTemp();
         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
-                         EncodeShift(kArmLsl, 32 - shift_amount));
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+                         EncodeShift(kA64Lsl, 32 - shift_amount));
         FreeTemp(t_reg);
         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
       }
@@ -1353,8 +1051,8 @@
       } else {
         RegStorage t_reg = AllocTemp();
         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
-                         EncodeShift(kArmLsl, 32 - shift_amount));
+        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+                         EncodeShift(kA64Lsl, 32 - shift_amount));
         FreeTemp(t_reg);
         OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
       }
@@ -1365,8 +1063,11 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                     RegLocation rl_src1, RegLocation rl_src2) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
   if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
     if (!rl_src2.is_const) {
       // Don't bother with special handling for subtract from immediate.
@@ -1385,11 +1086,10 @@
     return;
   }
   DCHECK(rl_src2.is_const);
-  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-  uint32_t val_lo = Low32Bits(val);
-  uint32_t val_hi = High32Bits(val);
-  int32_t mod_imm_lo = ModifiedImmediate(val_lo);
-  int32_t mod_imm_hi = ModifiedImmediate(val_hi);
+  // TODO(Arm64): implement this.
+  //  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+  int32_t mod_imm_lo = -1;  // ModifiedImmediate(val_lo);
+  int32_t mod_imm_hi = -1;  // ModifiedImmediate(val_hi);
 
   // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
   switch (opcode) {
@@ -1409,6 +1109,7 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
   switch (opcode) {
+#if 0
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
@@ -1442,10 +1143,82 @@
       NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
       NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
       break;
+#endif
     default:
       LOG(FATAL) << "Unexpected opcode " << opcode;
   }
   StoreValueWide(rl_dest, rl_result);
 }
 
+/**
+ * @brief Split a register list in pairs or registers.
+ *
+ * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
+ * @code
+ *   int reg1 = -1, reg2 = -1;
+ *   while (reg_mask) {
+ *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ *     if (UNLIKELY(reg2 < 0)) {
+ *       // Single register in reg1.
+ *     } else {
+ *       // Pair in reg1, reg2.
+ *     }
+ *   }
+ * @endcode
+ */
+uint32_t Arm64Mir2Lir::GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
+  // Find first register.
+  int first_bit_set = __builtin_ctz(reg_mask) + 1;
+  int reg = *reg1 + first_bit_set;
+  reg_mask >>= first_bit_set;
+
+  if (LIKELY(reg_mask)) {
+    // Save the first register, find the second and use the pair opcode.
+    int second_bit_set = __builtin_ctz(reg_mask) + 1;
+    *reg2 = reg;
+    reg_mask >>= second_bit_set;
+    *reg1 = reg + second_bit_set;
+    return reg_mask;
+  }
+
+  // Use the single opcode, as we just have one register.
+  *reg1 = reg;
+  *reg2 = -1;
+  return reg_mask;
+}
+
+void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int pop_log2_size = 3;
+
+  for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
+      NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
+      NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo32(reg1).GetReg(),
+              RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int pop_log2_size = 3;
+
+  for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
+      NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
+      NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo32(reg1).GetReg(),
+              RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 233e9c2..7e07e15 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -27,39 +27,40 @@
 
 // TODO: rework this when c++11 support allows.
 static const RegStorage core_regs_arr[] =
-    {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
-     rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
+     rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
+     rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
+     rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31};
 static const RegStorage sp_regs_arr[] =
-    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
-     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
-     rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30,
-     rs_fr31};
+    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+     rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
+     rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
+     rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
 static const RegStorage dp_regs_arr[] =
-    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
-     rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
+    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
+     rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15};
 static const RegStorage reserved_regs_arr[] =
-    {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+    {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR};
+static const RegStorage core_temps_arr[] =
+    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x12};
 static const RegStorage sp_temps_arr[] =
-    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
-     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
+    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+     rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15};
 static const RegStorage dp_temps_arr[] =
-    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
+    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7};
 
 static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
+    core_regs_arr + arraysize(core_regs_arr));
 static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
+    sp_regs_arr + arraysize(sp_regs_arr));
 static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
+    dp_regs_arr + arraysize(dp_regs_arr));
 static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
+    reserved_regs_arr + arraysize(reserved_regs_arr));
 static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
-    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
-    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+    core_temps_arr + arraysize(core_temps_arr));
+static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr));
+static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr));
 
 RegLocation Arm64Mir2Lir::LocCReturn() {
   return arm_loc_c_return;
@@ -79,25 +80,26 @@
 
 // Return a target-dependent special register.
 RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
+  // TODO(Arm64): this function doesn't work for hard-float ABI.
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
-    case kSelf: res_reg = rs_rARM_SELF; break;
-    case kSuspend: res_reg =  rs_rARM_SUSPEND; break;
-    case kLr: res_reg =  rs_rARM_LR; break;
-    case kPc: res_reg =  rs_rARM_PC; break;
-    case kSp: res_reg =  rs_rARM_SP; break;
-    case kArg0: res_reg = rs_r0; break;
-    case kArg1: res_reg = rs_r1; break;
-    case kArg2: res_reg = rs_r2; break;
-    case kArg3: res_reg = rs_r3; break;
-    case kFArg0: res_reg = rs_r0; break;
-    case kFArg1: res_reg = rs_r1; break;
-    case kFArg2: res_reg = rs_r2; break;
-    case kFArg3: res_reg = rs_r3; break;
-    case kRet0: res_reg = rs_r0; break;
-    case kRet1: res_reg = rs_r1; break;
-    case kInvokeTgt: res_reg = rs_rARM_LR; break;
-    case kHiddenArg: res_reg = rs_r12; break;
+    case kSelf: res_reg = rs_rA64_SELF; break;
+    case kSuspend: res_reg = rs_rA64_SUSPEND; break;
+    case kLr: res_reg =  rs_rA64_LR; break;
+    case kPc: res_reg = RegStorage::InvalidReg(); break;
+    case kSp: res_reg =  rs_rA64_SP; break;
+    case kArg0: res_reg = rs_x0; break;
+    case kArg1: res_reg = rs_x1; break;
+    case kArg2: res_reg = rs_x2; break;
+    case kArg3: res_reg = rs_x3; break;
+    case kFArg0: res_reg = rs_f0; break;
+    case kFArg1: res_reg = rs_f1; break;
+    case kFArg2: res_reg = rs_f2; break;
+    case kFArg3: res_reg = rs_f3; break;
+    case kRet0: res_reg = rs_x0; break;
+    case kRet1: res_reg = rs_x0; break;
+    case kInvokeTgt: res_reg = rs_rA64_LR; break;
+    case kHiddenArg: res_reg = rs_x12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
   }
@@ -105,55 +107,37 @@
 }
 
 RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  switch (arg_num) {
-    case 0:
-      return rs_r1;
-    case 1:
-      return rs_r2;
-    case 2:
-      return rs_r3;
-    default:
-      return RegStorage::InvalidReg();
-  }
+  return RegStorage::InvalidReg();
 }
 
 /*
- * Decode the register id.
+ * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
  */
 uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) {
-  uint64_t seed;
-  int shift;
-  int reg_id = reg.GetRegNum();
-  /* Each double register is equal to a pair of single-precision FP registers */
-  if (reg.IsDouble()) {
-    seed = 0x3;
-    reg_id = reg_id << 1;
-  } else {
-    seed = 1;
+  // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor.
+
+  int reg_raw = reg.GetRawBits();
+  // Check if the shape mask is zero (i.e. invalid).
+  if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) {
+    // The zero register is not a true register. It is just an immediate zero.
+    return 0;
   }
-  /* FP register starts at bit position 16 */
-  shift = reg.IsFloat() ? kArmFPReg0 : 0;
-  /* Expand the double register id into single offset */
-  shift += reg_id;
-  return (seed << shift);
+
+  return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask);
 }
 
 uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() {
-  return ENCODE_ARM_REG_PC;
+  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64";
+  return 0ULL;
 }
 
-// Thumb2 specific setup.  TODO: inline?:
+// Arm64 specific setup.  TODO: inline?:
 void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
-  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  DCHECK_EQ(cu_->instruction_set, kArm64);
   DCHECK(!lir->flags.use_def_invalid);
 
-  int opcode = lir->opcode;
-
   // These flags are somewhat uncommon - bypass if we can.
-  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 |
-                REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
-                REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
+  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
     if (flags & REG_DEF_SP) {
       lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
     }
@@ -162,61 +146,6 @@
       lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
     }
 
-    if (flags & REG_DEF_LIST0) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-    }
-
-    if (flags & REG_DEF_LIST1) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-    }
-
-    if (flags & REG_DEF_FPCS_LIST0) {
-      lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-    }
-
-    if (flags & REG_DEF_FPCS_LIST2) {
-      for (int i = 0; i < lir->operands[2]; i++) {
-        SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i);
-      }
-    }
-
-    if (flags & REG_USE_PC) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_PC;
-    }
-
-    /* Conservatively treat the IT block */
-    if (flags & IS_IT) {
-      lir->u.m.def_mask = ENCODE_ALL;
-    }
-
-    if (flags & REG_USE_LIST0) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-    }
-
-    if (flags & REG_USE_LIST1) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-    }
-
-    if (flags & REG_USE_FPCS_LIST0) {
-      lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-    }
-
-    if (flags & REG_USE_FPCS_LIST2) {
-      for (int i = 0; i < lir->operands[2]; i++) {
-        SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i);
-      }
-    }
-    /* Fixup for kThumbPush/lr and kThumbPop/pc */
-    if (opcode == kThumbPush || opcode == kThumbPop) {
-      uint64_t r8Mask = GetRegMaskCommon(rs_r8);
-      if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
-        lir->u.m.use_mask &= ~r8Mask;
-        lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
-      } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) {
-        lir->u.m.def_mask &= ~r8Mask;
-        lir->u.m.def_mask |= ENCODE_ARM_REG_PC;
-      }
-    }
     if (flags & REG_DEF_LR) {
       lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
     }
@@ -251,92 +180,128 @@
   return res;
 }
 
-static const char* core_reg_names[16] = {
-  "r0",
-  "r1",
-  "r2",
-  "r3",
-  "r4",
-  "r5",
-  "r6",
-  "r7",
-  "r8",
-  "rSELF",
-  "r10",
-  "r11",
-  "r12",
-  "sp",
-  "lr",
-  "pc",
-};
-
-
-static const char* shift_names[4] = {
+static const char *shift_names[4] = {
   "lsl",
   "lsr",
   "asr",
-  "ror"};
+  "ror"
+};
 
-/* Decode and print a ARM register name */
-static char* DecodeRegList(int opcode, int vector, char* buf, size_t buf_size) {
-  int i;
-  bool printed = false;
-  buf[0] = 0;
-  for (i = 0; i < 16; i++, vector >>= 1) {
-    if (vector & 0x1) {
-      int reg_id = i;
-      if (opcode == kThumbPush && i == 8) {
-        reg_id = rs_rARM_LR.GetRegNum();
-      } else if (opcode == kThumbPop && i == 8) {
-        reg_id = rs_rARM_PC.GetRegNum();
-      }
-      if (printed) {
-        snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id);
-      } else {
-        printed = true;
-        snprintf(buf, buf_size, "r%d", reg_id);
+static const char* extend_names[8] = {
+  "uxtb",
+  "uxth",
+  "uxtw",
+  "uxtx",
+  "sxtb",
+  "sxth",
+  "sxtw",
+  "sxtx",
+};
+
+/* Decode and print a register extension (e.g. ", uxtb #1") */
+static void DecodeRegExtendOrShift(int operand, char *buf, size_t buf_size) {
+  if ((operand & (1 << 6)) == 0) {
+    const char *shift_name = shift_names[(operand >> 7) & 0x3];
+    int amount = operand & 0x3f;
+    snprintf(buf, buf_size, ", %s #%d", shift_name, amount);
+  } else {
+    const char *extend_name = extend_names[(operand >> 3) & 0x7];
+    int amount = operand & 0x7;
+    if (amount == 0) {
+      snprintf(buf, buf_size, ", %s", extend_name);
+    } else {
+      snprintf(buf, buf_size, ", %s #%d", extend_name, amount);
+    }
+  }
+}
+
+#define BIT_MASK(w) ((UINT64_C(1) << (w)) - UINT64_C(1))
+
+static uint64_t RotateRight(uint64_t value, unsigned rotate, unsigned width) {
+  DCHECK_LE(width, 64U);
+  rotate &= 63;
+  value = value & BIT_MASK(width);
+  return ((value & BIT_MASK(rotate)) << (width - rotate)) | (value >> rotate);
+}
+
+static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width) {
+  unsigned i;
+  unsigned reg_size = (is_wide) ? 64 : 32;
+  uint64_t result = value & BIT_MASK(width);
+  DCHECK_NE(width, reg_size);
+  for (i = width; i < reg_size; i *= 2) {
+    result |= (result << i);
+  }
+  DCHECK_EQ(i, reg_size);
+  return result;
+}
+
+/**
+ * @brief Decode an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value encodes a 64-bit (as opposed to 32-bit) immediate.
+ * @param value The encoded logical immediates that is to be decoded.
+ * @return The decoded logical immediate.
+ * @note This is the inverse of Arm64Mir2Lir::EncodeLogicalImmediate().
+ */
+uint64_t Arm64Mir2Lir::DecodeLogicalImmediate(bool is_wide, int value) {
+  unsigned n     = (value >> 12) & 0x01;
+  unsigned imm_r = (value >>  6) & 0x3f;
+  unsigned imm_s = (value >>  0) & 0x3f;
+
+  // An integer is constructed from the n, imm_s and imm_r bits according to
+  // the following table:
+  //
+  // N   imms immr  size S             R
+  // 1 ssssss rrrrrr 64  UInt(ssssss) UInt(rrrrrr)
+  // 0 0sssss xrrrrr 32  UInt(sssss)  UInt(rrrrr)
+  // 0 10ssss xxrrrr 16  UInt(ssss)   UInt(rrrr)
+  // 0 110sss xxxrrr 8   UInt(sss)    UInt(rrr)
+  // 0 1110ss xxxxrr 4   UInt(ss)     UInt(rr)
+  // 0 11110s xxxxxr 2   UInt(s)      UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+
+  if (n == 1) {
+    DCHECK_NE(imm_s, 0x3fU);
+    uint64_t bits = BIT_MASK(imm_s + 1);
+    return RotateRight(bits, imm_r, 64);
+  } else {
+    DCHECK_NE((imm_s >> 1), 0x1fU);
+    for (unsigned width = 0x20; width >= 0x2; width >>= 1) {
+      if ((imm_s & width) == 0) {
+        unsigned mask = (unsigned)(width - 1);
+        DCHECK_NE((imm_s & mask), mask);
+        uint64_t bits = BIT_MASK((imm_s & mask) + 1);
+        return RepeatBitsAcrossReg(is_wide, RotateRight(bits, imm_r & mask, width), width);
       }
     }
   }
-  return buf;
+  return 0;
 }
 
-static char*  DecodeFPCSRegList(int count, int base, char* buf, size_t buf_size) {
-  snprintf(buf, buf_size, "s%d", base);
-  for (int i = 1; i < count; i++) {
-    snprintf(buf + strlen(buf), buf_size - strlen(buf), ", s%d", base + i);
-  }
-  return buf;
+/**
+ * @brief Decode an 8-bit single point number encoded with EncodeImmSingle().
+ */
+static float DecodeImmSingle(uint8_t small_float) {
+  int mantissa = (small_float & 0x0f) + 0x10;
+  int sign = ((small_float & 0x80) == 0) ? 1 : -1;
+  float signed_mantissa = static_cast<float>(sign*mantissa);
+  int exponent = (((small_float >> 4) & 0x7) + 4) & 0x7;
+  return signed_mantissa*static_cast<float>(1 << exponent)*0.0078125f;
 }
 
-static int32_t ExpandImmediate(int value) {
-  int32_t mode = (value & 0xf00) >> 8;
-  uint32_t bits = value & 0xff;
-  switch (mode) {
-    case 0:
-      return bits;
-     case 1:
-      return (bits << 16) | bits;
-     case 2:
-      return (bits << 24) | (bits << 8);
-     case 3:
-      return (bits << 24) | (bits << 16) | (bits << 8) | bits;
-    default:
-      break;
-  }
-  bits = (bits | 0x80) << 24;
-  return bits >> (((value & 0xf80) >> 7) - 8);
-}
-
-const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
-                         "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
+static const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+                                 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
 /*
  * Interpret a format string and build a string no longer than size
- * See format key in Assemble.c.
+ * See format key in assemble_arm64.cc.
  */
 std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) {
   std::string buf;
-  int i;
   const char* fmt_end = &fmt[strlen(fmt)];
   char tbuf[256];
   const char* name;
@@ -354,11 +319,24 @@
          DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U);
          operand = lir->operands[nc-'0'];
          switch (*fmt++) {
-           case 'H':
-             if (operand != 0) {
-               snprintf(tbuf, arraysize(tbuf), ", %s %d", shift_names[operand & 0x3], operand >> 2);
-             } else {
+           case 'e':  {
+               // Omit ", uxtw #0" in strings like "add w0, w1, w3, uxtw #0" and
+               // ", uxtx #0" in strings like "add x0, x1, x3, uxtx #0"
+               int omittable = ((IS_WIDE(lir->opcode)) ? EncodeExtend(kA64Uxtw, 0) :
+                                EncodeExtend(kA64Uxtw, 0));
+               if (LIKELY(operand == omittable)) {
+                 strcpy(tbuf, "");
+               } else {
+                 DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
+               }
+             }
+             break;
+           case 'o':
+             // Omit ", lsl #0"
+             if (LIKELY(operand == EncodeShift(kA64Lsl, 0))) {
                strcpy(tbuf, "");
+             } else {
+               DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
              }
              break;
            case 'B':
@@ -387,39 +365,60 @@
              }
              strcpy(tbuf, name);
              break;
-           case 'b':
-             strcpy(tbuf, "0000");
-             for (i = 3; i >= 0; i--) {
-               tbuf[i] += operand & 1;
-               operand >>= 1;
-             }
-             break;
-           case 'n':
-             operand = ~ExpandImmediate(operand);
-             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
-             break;
-           case 'm':
-             operand = ExpandImmediate(operand);
-             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
-             break;
            case 's':
-             snprintf(tbuf, arraysize(tbuf), "s%d", RegStorage::RegNum(operand));
+             snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK);
              break;
            case 'S':
-             snprintf(tbuf, arraysize(tbuf), "d%d", RegStorage::RegNum(operand));
+             snprintf(tbuf, arraysize(tbuf), "d%d", operand & ARM_FP_REG_MASK);
              break;
-           case 'h':
-             snprintf(tbuf, arraysize(tbuf), "%04x", operand);
+           case 'f':
+             snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's',
+                      operand & ARM_FP_REG_MASK);
+             break;
+           case 'l': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               uint64_t imm = DecodeLogicalImmediate(is_wide, operand);
+               snprintf(tbuf, arraysize(tbuf), "%" PRId64 " (%#" PRIx64 ")", imm, imm);
+             }
+             break;
+           case 'I':
+             snprintf(tbuf, arraysize(tbuf), "%f", DecodeImmSingle(operand));
              break;
            case 'M':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else
+               snprintf(tbuf, arraysize(tbuf), ", lsl #%d", 16*operand);
+             break;
            case 'd':
              snprintf(tbuf, arraysize(tbuf), "%d", operand);
              break;
-           case 'C':
-             operand = RegStorage::RegNum(operand);
-             DCHECK_LT(operand, static_cast<int>(
-                 sizeof(core_reg_names)/sizeof(core_reg_names[0])));
-             snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]);
+           case 'w':
+             if (LIKELY(operand != rwzr))
+               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "wzr");
+             break;
+           case 'W':
+             if (LIKELY(operand != rwsp))
+               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "wsp");
+             break;
+           case 'x':
+             if (LIKELY(operand != rxzr))
+               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "xzr");
+             break;
+           case 'X':
+             if (LIKELY(operand != rsp))
+               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "sp");
+             break;
+           case 'D':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*((IS_WIDE(lir->opcode)) ? 8 : 4));
              break;
            case 'E':
              snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
@@ -427,37 +426,51 @@
            case 'F':
              snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
              break;
+           case 'G':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else
+               strcpy(tbuf, (IS_WIDE(lir->opcode)) ? ", lsl #3" : ", lsl #2");
+             break;
            case 'c':
              strcpy(tbuf, cc_names[operand]);
              break;
            case 't':
              snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + (operand << 2),
                  lir->target);
              break;
-           case 'u': {
-             int offset_1 = lir->operands[0];
-             int offset_2 = NEXT_LIR(lir)->operands[0];
-             uintptr_t target =
-                 (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) &
-                 ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
-                 0xfffffffc;
-             snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void *>(target));
+           case 'r': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               if (LIKELY(operand != rwzr && operand != rxzr)) {
+                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+                          operand & RegStorage::kRegNumMask);
+               } else {
+                 strcpy(tbuf, (is_wide) ? "xzr" : "wzr");
+               }
+             }
              break;
-          }
-
-           /* Nothing to print for BLX_2 */
-           case 'v':
-             strcpy(tbuf, "see above");
+           case 'R': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               if (LIKELY(operand != rwsp || operand != rsp)) {
+                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+                          operand & RegStorage::kRegNumMask);
+               } else {
+                 strcpy(tbuf, (is_wide) ? "sp" : "wsp");
+               }
+             }
              break;
-           case 'R':
-             DecodeRegList(lir->opcode, operand, tbuf, arraysize(tbuf));
+           case 'p':
+             snprintf(tbuf, arraysize(tbuf), ".+%d (addr %#" PRIxPTR ")", 4*operand,
+                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4*operand);
              break;
-           case 'P':
-             DecodeFPCSRegList(operand, 16, tbuf, arraysize(tbuf));
-             break;
-           case 'Q':
-             DecodeFPCSRegList(operand, 0, tbuf, arraysize(tbuf));
+           case 'T':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else if (operand == 1)
+               strcpy(tbuf, ", lsl #12");
+             else
+               strcpy(tbuf, ", DecodeError3");
              break;
            default:
              strcpy(tbuf, "DecodeError1");
@@ -519,14 +532,14 @@
 }
 
 bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) {
-  return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond));
+  return (lir->opcode == kA64B1t);
 }
 
 Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena) {
   // Sanity check - make sure encoding map lines up.
-  for (int i = 0; i < kArmLast; i++) {
-    if (Arm64Mir2Lir::EncodingMap[i].opcode != i) {
+  for (int i = 0; i < kA64Last; i++) {
+    if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) {
       LOG(FATAL) << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name
                  << " is wrong: expecting " << i << ", seeing "
                  << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode);
@@ -534,8 +547,8 @@
   }
 }
 
-Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                          ArenaAllocator* const arena) {
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                            ArenaAllocator* const arena) {
   return new Arm64Mir2Lir(cu, mir_graph, arena);
 }
 
@@ -584,7 +597,7 @@
   // TODO: re-enable this when we can safely save r4 over the suspension code path.
   bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
   if (no_suspend) {
-    GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
+    GetRegInfo(rs_rA64_SUSPEND)->MarkFree();
   }
 
   // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
@@ -595,15 +608,7 @@
 }
 
 void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
-    // No overlap, free.
-    FreeTemp(rl_free.reg);
-  }
+  LOG(FATAL) << "Unexpected call to FreeRegLocTemps for Arm64";
 }
 
 /*
@@ -613,7 +618,7 @@
  */
 
 void Arm64Mir2Lir::AdjustSpillMask() {
-  core_spill_mask_ |= (1 << rs_rARM_LR.GetRegNum());
+  core_spill_mask_ |= (1 << rs_rA64_LR.GetRegNum());
   num_core_spills_++;
 }
 
@@ -649,100 +654,96 @@
 
 /* Clobber all regs that might be used by an external C call */
 void Arm64Mir2Lir::ClobberCallerSave() {
-  // TODO: rework this - it's gotten even more ugly.
-  Clobber(rs_r0);
-  Clobber(rs_r1);
-  Clobber(rs_r2);
-  Clobber(rs_r3);
-  Clobber(rs_r12);
-  Clobber(rs_r14lr);
-  Clobber(rs_fr0);
-  Clobber(rs_fr1);
-  Clobber(rs_fr2);
-  Clobber(rs_fr3);
-  Clobber(rs_fr4);
-  Clobber(rs_fr5);
-  Clobber(rs_fr6);
-  Clobber(rs_fr7);
-  Clobber(rs_fr8);
-  Clobber(rs_fr9);
-  Clobber(rs_fr10);
-  Clobber(rs_fr11);
-  Clobber(rs_fr12);
-  Clobber(rs_fr13);
-  Clobber(rs_fr14);
-  Clobber(rs_fr15);
-  Clobber(rs_dr0);
-  Clobber(rs_dr1);
-  Clobber(rs_dr2);
-  Clobber(rs_dr3);
-  Clobber(rs_dr4);
-  Clobber(rs_dr5);
-  Clobber(rs_dr6);
-  Clobber(rs_dr7);
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  Clobber(rs_x0);
+  Clobber(rs_x1);
+  Clobber(rs_x2);
+  Clobber(rs_x3);
+  Clobber(rs_x12);
+  Clobber(rs_x30);
+  Clobber(rs_f0);
+  Clobber(rs_f1);
+  Clobber(rs_f2);
+  Clobber(rs_f3);
+  Clobber(rs_f4);
+  Clobber(rs_f5);
+  Clobber(rs_f6);
+  Clobber(rs_f7);
+  Clobber(rs_f8);
+  Clobber(rs_f9);
+  Clobber(rs_f10);
+  Clobber(rs_f11);
+  Clobber(rs_f12);
+  Clobber(rs_f13);
+  Clobber(rs_f14);
+  Clobber(rs_f15);
 }
 
 RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
   RegLocation res = LocCReturnWide();
-  res.reg.SetLowReg(rs_r2.GetReg());
-  res.reg.SetHighReg(rs_r3.GetReg());
-  Clobber(rs_r2);
-  Clobber(rs_r3);
-  MarkInUse(rs_r2);
-  MarkInUse(rs_r3);
+  res.reg.SetReg(rx2);
+  res.reg.SetHighReg(rx3);
+  Clobber(rs_x2);
+  Clobber(rs_x3);
+  MarkInUse(rs_x2);
+  MarkInUse(rs_x3);
   MarkWide(res.reg);
   return res;
 }
 
 RegLocation Arm64Mir2Lir::GetReturnAlt() {
   RegLocation res = LocCReturn();
-  res.reg.SetReg(rs_r1.GetReg());
-  Clobber(rs_r1);
-  MarkInUse(rs_r1);
+  res.reg.SetReg(rx1);
+  Clobber(rs_x1);
+  MarkInUse(rs_x1);
   return res;
 }
 
 /* To be used when explicitly managing register use */
 void Arm64Mir2Lir::LockCallTemps() {
-  LockTemp(rs_r0);
-  LockTemp(rs_r1);
-  LockTemp(rs_r2);
-  LockTemp(rs_r3);
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+  LockTemp(rs_x2);
+  LockTemp(rs_x3);
 }
 
 /* To be used when explicitly managing register use */
 void Arm64Mir2Lir::FreeCallTemps() {
-  FreeTemp(rs_r0);
-  FreeTemp(rs_r1);
-  FreeTemp(rs_r2);
-  FreeTemp(rs_r3);
+  FreeTemp(rs_x0);
+  FreeTemp(rs_x1);
+  FreeTemp(rs_x2);
+  FreeTemp(rs_x3);
 }
 
-RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
-  LoadWordDisp(rs_rARM_SELF, offset.Int32Value(), rs_rARM_LR);
-  return rs_rARM_LR;
+RegStorage Arm64Mir2Lir::LoadHelper(A64ThreadOffset offset) {
+  // TODO(Arm64): use LoadWordDisp instead.
+  //   e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR);
+  LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64);
+  return rs_rA64_LR;
 }
 
 LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() {
-  RegStorage tmp = rs_r0;
-  Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
-  LIR* load2 = Load32Disp(tmp, 0, tmp);
+  RegStorage tmp = rs_x0;
+  LoadWordDisp(rs_rA64_SELF, A64_THREAD_SUSPEND_TRIGGER_OFFSET, tmp);
+  LIR* load2 = LoadWordDisp(tmp, 0, tmp);
   return load2;
 }
 
 uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) {
   DCHECK(!IsPseudoLirOp(opcode));
-  return Arm64Mir2Lir::EncodingMap[opcode].flags;
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].flags;
 }
 
 const char* Arm64Mir2Lir::GetTargetInstName(int opcode) {
   DCHECK(!IsPseudoLirOp(opcode));
-  return Arm64Mir2Lir::EncodingMap[opcode].name;
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].name;
 }
 
 const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
   DCHECK(!IsPseudoLirOp(opcode));
-  return Arm64Mir2Lir::EncodingMap[opcode].fmt;
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
 }
 
 /*
@@ -800,4 +801,140 @@
   return res;
 }
 
+// TODO(Arm64): reuse info in QuickArgumentVisitor?
+static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
+                                    OpSize* op_size) {
+  if (loc->fp) {
+    int n = *num_fpr_used;
+    if (n < 8) {
+      *num_fpr_used = n + 1;
+      RegStorage::RegStorageKind reg_kind;
+      if (loc->wide) {
+        *op_size = kDouble;
+        reg_kind = RegStorage::k64BitSolo;
+      } else {
+        *op_size = kSingle;
+        reg_kind = RegStorage::k32BitSolo;
+      }
+      return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n);
+    }
+  } else {
+    int n = *num_gpr_used;
+    if (n < 7) {
+      *num_gpr_used = n + 1;
+      if (loc->wide) {
+        *op_size = k64;
+        return RegStorage::Solo64(n);
+      } else {
+        *op_size = k32;
+        return RegStorage::Solo32(n);
+      }
+    }
+  }
+
+  return RegStorage::InvalidReg();
+}
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+  int num_gpr_used = 1;
+  int num_fpr_used = 0;
+
+  /*
+   * Dummy up a RegLocation for the incoming Method*
+   * It will attempt to keep kArg0 live (or copy it to home location
+   * if promoted).
+   */
+  RegLocation rl_src = rl_method;
+  rl_src.location = kLocPhysReg;
+  rl_src.reg = TargetReg(kArg0);
+  rl_src.home = false;
+  MarkLive(rl_src);
+
+  // TODO(Arm64): compress the Method pointer?
+  StoreValueWide(rl_method, rl_src);
+
+  // If Method* has been promoted, explicitly flush
+  if (rl_method.location == kLocPhysReg) {
+    StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+  }
+
+  if (cu_->num_ins == 0) {
+    return;
+  }
+
+  int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+  for (int i = 0; i < cu_->num_ins; i++) {
+    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+    RegLocation* t_loc = &ArgLocs[i];
+    OpSize op_size;
+    RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
+
+    if (reg.Valid()) {
+      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+      } else {
+        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size);
+        if (reg.Is64Bit()) {
+          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
+            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
+          }
+          i += 1;
+        }
+      }
+    } else {
+      // If arriving in frame & promoted
+      if (v_map->core_location == kLocPhysReg) {
+        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
+                     RegStorage::Solo32(v_map->core_reg));
+      }
+      if (v_map->fp_location == kLocPhysReg) {
+        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      }
+    }
+  }
+}
+
+int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
+                              NextCallInsn next_call_insn,
+                              const MethodReference& target_method,
+                              uint32_t vtable_idx, uintptr_t direct_code,
+                              uintptr_t direct_method, InvokeType type, bool skip_this) {
+  int last_arg_reg = TargetReg(kArg3).GetReg();
+  int next_reg = TargetReg(kArg1).GetReg();
+  int next_arg = 0;
+  if (skip_this) {
+    next_reg++;
+    next_arg++;
+  }
+  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
+    RegLocation rl_arg = info->args[next_arg++];
+    rl_arg = UpdateRawLoc(rl_arg);
+    if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
+      RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1);
+      LoadValueDirectWideFixed(rl_arg, r_tmp);
+      next_reg++;
+      next_arg++;
+    } else {
+      if (rl_arg.wide) {
+        rl_arg = NarrowRegLoc(rl_arg);
+        rl_arg.is_const = false;
+      }
+      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg));
+    }
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                                direct_code, direct_method, type);
+  }
+  return call_state;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 3782bc9..e46e201 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -20,133 +20,236 @@
 
 namespace art {
 
-/* This file contains codegen for the Thumb ISA. */
+/* This file contains codegen for the A64 ISA. */
 
-static int32_t EncodeImmSingle(int32_t value) {
-  int32_t res;
-  int32_t bit_a =  (value & 0x80000000) >> 31;
-  int32_t not_bit_b = (value & 0x40000000) >> 30;
-  int32_t bit_b =  (value & 0x20000000) >> 29;
-  int32_t b_smear =  (value & 0x3e000000) >> 25;
-  int32_t slice =   (value & 0x01f80000) >> 19;
-  int32_t zeroes =  (value & 0x0007ffff);
-  if (zeroes != 0)
+static int32_t EncodeImmSingle(uint32_t bits) {
+  /*
+   * Valid values will have the form:
+   *
+   *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
+   *
+   * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
+   */
+
+  // bits[19..0] are cleared.
+  if ((bits & 0x0007ffff) != 0)
     return -1;
-  if (bit_b) {
-    if ((not_bit_b != 0) || (b_smear != 0x1f))
-      return -1;
-  } else {
-    if ((not_bit_b != 1) || (b_smear != 0x0))
-      return -1;
-  }
-  res = (bit_a << 7) | (bit_b << 6) | slice;
-  return res;
+
+  // bits[29..25] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 16) & 0x3e00;
+  if (b_pattern != 0 && b_pattern != 0x3e00)
+    return -1;
+
+  // bit[30] and bit[29] are opposite.
+  if (((bits ^ (bits << 1)) & 0x40000000) == 0)
+    return -1;
+
+  // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+  // bit7: a000.0000
+  uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+  return (bit7 | bit6 | bit5_to_0);
 }
 
-/*
- * Determine whether value can be encoded as a Thumb2 floating point
- * immediate.  If not, return -1.  If so return encoded 8-bit value.
- */
-static int32_t EncodeImmDouble(int64_t value) {
-  int32_t res;
-  int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63;
-  int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62;
-  int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61;
-  int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54;
-  int32_t slice =  (value & INT64_C(0x003f000000000000)) >> 48;
-  uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff));
-  if (zeroes != 0ull)
+static int32_t EncodeImmDouble(uint64_t bits) {
+  /*
+   * Valid values will have the form:
+   *
+   *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+   *   0000.0000.0000.0000.0000.0000.0000.0000
+   *
+   * where B = not(b).
+   */
+
+  // bits[47..0] are cleared.
+  if ((bits & UINT64_C(0xffffffffffff)) != 0)
     return -1;
-  if (bit_b) {
-    if ((not_bit_b != 0) || (b_smear != 0xff))
-      return -1;
-  } else {
-    if ((not_bit_b != 1) || (b_smear != 0x0))
-      return -1;
-  }
-  res = (bit_a << 7) | (bit_b << 6) | slice;
-  return res;
+
+  // bits[61..54] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+  if (b_pattern != 0 && b_pattern != 0x3fc0)
+    return -1;
+
+  // bit[62] and bit[61] are opposite.
+  if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
+    return -1;
+
+  // bit7: a000.0000
+  uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
+  return (bit7 | bit6 | bit5_to_0);
 }
 
-LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int value) {
+LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) {
   DCHECK(RegStorage::IsSingle(r_dest));
   if (value == 0) {
-    // TODO: we need better info about the target CPU.  a vector exclusive or
-    //       would probably be better here if we could rely on its existance.
-    // Load an immediate +2.0 (which encodes to 0)
-    NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0);
-    // +0.0 = +2.0 - +2.0
-    return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest);
+    return NewLIR2(kA64Fmov2sw, r_dest, rwzr);
   } else {
-    int encoded_imm = EncodeImmSingle(value);
+    int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
     if (encoded_imm >= 0) {
-      return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm);
+      return NewLIR2(kA64Fmov2fI, r_dest, encoded_imm);
     }
   }
+
   LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
   if (data_target == NULL) {
     data_target = AddWordData(&literal_list_, value);
   }
-  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
-                          r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target);
+
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
+                            r_dest, 0, 0, 0, 0, data_target);
   SetMemRefType(load_pc_rel, true, kLiteral);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
 
-static int LeadingZeros(uint32_t val) {
-  uint32_t alt;
-  int32_t n;
-  int32_t count;
-
-  count = 16;
-  n = 32;
-  do {
-    alt = val >> count;
-    if (alt != 0) {
-      n = n - count;
-      val = alt;
+LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) {
+  DCHECK(RegStorage::IsDouble(r_dest));
+  if (value == 0) {
+    return NewLIR2(kA64Fmov2Sx, r_dest, rwzr);
+  } else {
+    int32_t encoded_imm = EncodeImmDouble(value);
+    if (encoded_imm >= 0) {
+      return NewLIR2(FWIDE(kA64Fmov2fI), r_dest, encoded_imm);
     }
-    count >>= 1;
-  } while (count);
-  return n - val;
+  }
+
+  // No short form - load from the literal pool.
+  int32_t val_lo = Low32Bits(value);
+  int32_t val_hi = High32Bits(value);
+  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+  if (data_target == NULL) {
+    data_target = AddWideData(&literal_list_, val_lo, val_hi);
+  }
+
+  DCHECK(RegStorage::IsFloat(r_dest));
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
+                            r_dest, 0, 0, 0, 0, data_target);
+  SetMemRefType(load_pc_rel, true, kLiteral);
+  AppendLIR(load_pc_rel);
+  return load_pc_rel;
 }
 
-/*
- * Determine whether value can be encoded as a Thumb2 modified
- * immediate.  If not, return -1.  If so, return i:imm3:a:bcdefgh form.
- */
-int Arm64Mir2Lir::ModifiedImmediate(uint32_t value) {
-  int32_t z_leading;
-  int32_t z_trailing;
-  uint32_t b0 = value & 0xff;
+static int CountLeadingZeros(bool is_wide, uint64_t value) {
+  return (is_wide) ? __builtin_clzl(value) : __builtin_clz((uint32_t)value);
+}
 
-  /* Note: case of value==0 must use 0:000:0:0000000 encoding */
-  if (value <= 0xFF)
-    return b0;  // 0:000:a:bcdefgh
-  if (value == ((b0 << 16) | b0))
-    return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */
-  if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
-    return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */
-  b0 = (value >> 8) & 0xff;
-  if (value == ((b0 << 24) | (b0 << 8)))
-    return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
-  /* Can we do it with rotation? */
-  z_leading = LeadingZeros(value);
-  z_trailing = 32 - LeadingZeros(~value & (value - 1));
-  /* A run of eight or fewer active bits? */
-  if ((z_leading + z_trailing) < 24)
-    return -1;  /* No - bail */
-  /* left-justify the constant, discarding msb (known to be 1) */
-  value <<= z_leading + 1;
-  /* Create bcdefgh */
-  value >>= 25;
-  /* Put it all together */
-  return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
+static int CountTrailingZeros(bool is_wide, uint64_t value) {
+  return (is_wide) ? __builtin_ctzl(value) : __builtin_ctz((uint32_t)value);
+}
+
+static int CountSetBits(bool is_wide, uint64_t value) {
+  return ((is_wide) ?
+          __builtin_popcountl(value) : __builtin_popcount((uint32_t)value));
+}
+
+/**
+ * @brief Try encoding an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
+ * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
+ *   32-bit if @p is_wide is false.
+ * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
+ * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
+ */
+int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
+  unsigned n, imm_s, imm_r;
+
+  // Logical immediates are encoded using parameters n, imm_s and imm_r using
+  // the following table:
+  //
+  //  N   imms    immr    size        S             R
+  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+  //
+  // To test if an arbitary immediate can be encoded using this scheme, an
+  // iterative algorithm is used.
+  //
+
+  // 1. If the value has all set or all clear bits, it can't be encoded.
+  if (value == 0 || value == ~UINT64_C(0) ||
+      (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
+    return -1;
+  }
+
+  unsigned lead_zero  = CountLeadingZeros(is_wide, value);
+  unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
+  unsigned trail_zero = CountTrailingZeros(is_wide, value);
+  unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
+  unsigned set_bits   = CountSetBits(is_wide, value);
+
+  // The fixed bits in the immediate s field.
+  // If width == 64 (X reg), start at 0xFFFFFF80.
+  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
+  // widths won't be executed.
+  unsigned width = (is_wide) ? 64 : 32;
+  int imm_s_fixed = (is_wide) ? -128 : -64;
+  int imm_s_mask = 0x3f;
+
+  for (;;) {
+    // 2. If the value is two bits wide, it can be encoded.
+    if (width == 2) {
+      n = 0;
+      imm_s = 0x3C;
+      imm_r = (value & 3) - 1;
+      break;
+    }
+
+    n = (width == 64) ? 1 : 0;
+    imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
+    if ((lead_zero + set_bits) == width) {
+      imm_r = 0;
+    } else {
+      imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
+    }
+
+    // 3. If the sum of leading zeros, trailing zeros and set bits is
+    //    equal to the bit width of the value, it can be encoded.
+    if (lead_zero + trail_zero + set_bits == width) {
+      break;
+    }
+
+    // 4. If the sum of leading ones, trailing ones and unset bits in the
+    //    value is equal to the bit width of the value, it can be encoded.
+    if (lead_one + trail_one + (width - set_bits) == width) {
+      break;
+    }
+
+    // 5. If the most-significant half of the bitwise value is equal to
+    //    the least-significant half, return to step 2 using the
+    //    least-significant half of the value.
+    uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
+    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
+      width >>= 1;
+      set_bits >>= 1;
+      imm_s_fixed >>= 1;
+      continue;
+    }
+
+    // 6. Otherwise, the value can't be encoded.
+    return -1;
+  }
+
+  return (n << 12 | imm_r << 6 | imm_s);
 }
 
 bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
-  return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+  return false;  // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
 }
 
 bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
@@ -162,8 +265,8 @@
 }
 
 /*
- * Load a immediate using a shortcut if possible; otherwise
- * grab from the per-translation literal pool.
+ * Load a immediate using one single instruction when possible; otherwise
+ * use a pair of movz and movk instructions.
  *
  * No additional register clobbering operation performed. Use this version when
  * 1) r_dest is freshly returned from AllocTemp or
@@ -171,204 +274,163 @@
  */
 LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
   LIR* res;
-  int mod_imm;
 
   if (r_dest.IsFloat()) {
     return LoadFPConstantValue(r_dest.GetReg(), value);
   }
 
-  /* See if the value can be constructed cheaply */
-  if (r_dest.Low8() && (value >= 0) && (value <= 255)) {
-    return NewLIR2(kThumbMovImm, r_dest.GetReg(), value);
+  // Loading SP/ZR with an immediate is not supported.
+  DCHECK_NE(r_dest.GetReg(), rwsp);
+  DCHECK_NE(r_dest.GetReg(), rwzr);
+
+  // Compute how many movk, movz instructions are needed to load the value.
+  uint16_t high_bits = High16Bits(value);
+  uint16_t low_bits = Low16Bits(value);
+
+  bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
+  bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
+
+  if (LIKELY(low_fast || high_fast)) {
+    // 1 instruction is enough to load the immediate.
+    if (LIKELY(low_bits == high_bits)) {
+      // Value is either 0 or -1: we can just use wzr.
+      ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
+      res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
+    } else {
+      uint16_t uniform_bits, useful_bits;
+      int shift;
+
+      if (LIKELY(high_fast)) {
+        shift = 0;
+        uniform_bits = high_bits;
+        useful_bits = low_bits;
+      } else {
+        shift = 1;
+        uniform_bits = low_bits;
+        useful_bits = high_bits;
+      }
+
+      if (UNLIKELY(uniform_bits != 0)) {
+        res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
+      } else {
+        res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
+      }
+    }
+  } else {
+    // movk, movz require 2 instructions. Try detecting logical immediates.
+    int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
+    if (log_imm >= 0) {
+      res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
+    } else {
+      // Use 2 instructions.
+      res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
+      NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
+    }
   }
-  /* Check Modified immediate special cases */
-  mod_imm = ModifiedImmediate(value);
-  if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm);
-    return res;
-  }
-  mod_imm = ModifiedImmediate(~value);
-  if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm);
-    return res;
-  }
-  /* 16-bit immediate? */
-  if ((value & 0xffff) == value) {
-    res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value);
-    return res;
-  }
-  /* Do a low/high pair */
-  res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value));
-  NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value));
+
   return res;
 }
 
 LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
-  LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched  during assembly */);
+  LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
   res->target = target;
   return res;
 }
 
 LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
-  // This is kThumb2BCond instead of kThumbBCond for performance reasons. The assembly
-  // time required for a new pass after kThumbBCond is fixed up to kThumb2BCond is
-  // substantial.
-  LIR* branch = NewLIR2(kThumb2BCond, 0 /* offset to be patched */,
-                        ArmConditionEncoding(cc));
+  LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
+                        0 /* offset to be patched */);
   branch->target = target;
   return branch;
 }
 
 LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
-  ArmOpcode opcode = kThumbBkpt;
+  ArmOpcode opcode = kA64Brk1d;
   switch (op) {
     case kOpBlx:
-      opcode = kThumbBlxR;
+      opcode = kA64Blr1x;
       break;
-    case kOpBx:
-      opcode = kThumbBx;
-      break;
+    // TODO(Arm64): port kThumbBx.
+    // case kOpBx:
+    //   opcode = kThumbBx;
+    //   break;
     default:
       LOG(FATAL) << "Bad opcode " << op;
   }
   return NewLIR1(opcode, r_dest_src.GetReg());
 }
 
-LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
-                               int shift) {
-  bool thumb_form =
-      ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8());
-  ArmOpcode opcode = kThumbBkpt;
-  switch (op) {
-    case kOpAdc:
-      opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR;
-      break;
-    case kOpAnd:
-      opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR;
-      break;
-    case kOpBic:
-      opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR;
-      break;
+LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2,
+                                 int shift, bool is_wide) {
+  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (OP_KIND_UNWIDE(op)) {
     case kOpCmn:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR;
+      opcode = kA64Cmn3Rro;
       break;
     case kOpCmp:
-      if (thumb_form)
-        opcode = kThumbCmpRR;
-      else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8())
-        opcode = kThumbCmpHH;
-      else if ((shift == 0) && r_dest_src1.Low8())
-        opcode = kThumbCmpLH;
-      else if (shift == 0)
-        opcode = kThumbCmpHL;
-      else
-        opcode = kThumb2CmpRR;
-      break;
-    case kOpXor:
-      opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR;
+      // TODO(Arm64): check the instruction above: "cmp w0, w1" is rendered as "cmp w0, w1, uxtb".
+      opcode = kA64Cmp3Rro;
       break;
     case kOpMov:
-      DCHECK_EQ(shift, 0);
-      if (r_dest_src1.Low8() && r_src2.Low8())
-        opcode = kThumbMovRR;
-      else if (!r_dest_src1.Low8() && !r_src2.Low8())
-        opcode = kThumbMovRR_H2H;
-      else if (r_dest_src1.Low8())
-        opcode = kThumbMovRR_H2L;
-      else
-        opcode = kThumbMovRR_L2H;
-      break;
-    case kOpMul:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR;
+      opcode = kA64Mov2rr;
       break;
     case kOpMvn:
-      opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR;
+      opcode = kA64Mvn2rr;
       break;
     case kOpNeg:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR;
-      break;
-    case kOpOr:
-      opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR;
-      break;
-    case kOpSbc:
-      opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR;
+      opcode = kA64Neg3rro;
       break;
     case kOpTst:
-      opcode = (thumb_form) ? kThumbTst : kThumb2TstRR;
-      break;
-    case kOpLsl:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR;
-      break;
-    case kOpLsr:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR;
-      break;
-    case kOpAsr:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR;
-      break;
-    case kOpRor:
-      DCHECK_EQ(shift, 0);
-      opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR;
-      break;
-    case kOpAdd:
-      opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
-      break;
-    case kOpSub:
-      opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
+      opcode = kA64Tst3rro;
       break;
     case kOpRev:
       DCHECK_EQ(shift, 0);
-      if (!thumb_form) {
-        // Binary, but rm is encoded twice.
-        return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
-      }
-      opcode = kThumbRev;
+      // Binary, but rm is encoded twice.
+      return NewLIR3(kA64Rev2rr | wide, r_dest_src1, r_src2, r_src2);
       break;
     case kOpRevsh:
-      DCHECK_EQ(shift, 0);
-      if (!thumb_form) {
-        // Binary, but rm is encoded twice.
-        return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
-      }
-      opcode = kThumbRevsh;
+      // Binary, but rm is encoded twice.
+      return NewLIR3(kA64Rev162rr | wide, r_dest_src1, r_src2, r_src2);
       break;
     case kOp2Byte:
-      DCHECK_EQ(shift, 0);
-      return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8);
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+      // For now we use sbfm directly.
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 7);
     case kOp2Short:
-      DCHECK_EQ(shift, 0);
-      return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      // For now we use sbfm rather than its alias, sbfx.
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
     case kOp2Char:
-      DCHECK_EQ(shift, 0);
-      return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
+      // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+      // For now we use ubfm directly.
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
     default:
-      LOG(FATAL) << "Bad opcode: " << op;
-      break;
+      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
   }
+
   DCHECK(!IsPseudoLirOp(opcode));
   if (EncodingMap[opcode].flags & IS_BINARY_OP) {
-    return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR2(opcode | wide, r_dest_src1, r_src2);
   } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
-    if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) {
-      return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
-    } else {
-      return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg());
+    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+    if (kind == kFmtExtend || kind == kFmtShift) {
+      DCHECK_EQ(kind == kFmtExtend, IsExtendEncoding(shift));
+      return NewLIR3(opcode | wide, r_dest_src1, r_src2, shift);
     }
-  } else if (EncodingMap[opcode].flags & IS_QUAD_OP) {
-    return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
-  } else {
-    LOG(FATAL) << "Unexpected encoding operand count";
-    return NULL;
   }
+
+  LOG(FATAL) << "Unexpected encoding operand count";
+  return NULL;
 }
 
 LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-  return OpRegRegShift(op, r_dest_src1, r_src2, 0);
+  return OpRegRegShift(op, r_dest_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT,
+                       r_dest_src1.Is64Bit());
 }
 
 LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
@@ -382,207 +444,162 @@
 }
 
 LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
-  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
+  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
   return NULL;
 }
 
-LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
-                                  RegStorage r_src2, int shift) {
-  ArmOpcode opcode = kThumbBkpt;
-  bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8();
-  switch (op) {
+LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1,
+                                    int r_src2, int shift, bool is_wide) {
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (OP_KIND_UNWIDE(op)) {
     case kOpAdd:
-      opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
+      opcode = kA64Add4rrro;
       break;
     case kOpSub:
-      opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
+      opcode = kA64Sub4rrro;
       break;
-    case kOpRsub:
-      opcode = kThumb2RsubRRR;
-      break;
+    // case kOpRsub:
+    //   opcode = kA64RsubWWW;
+    //   break;
     case kOpAdc:
-      opcode = kThumb2AdcRRR;
+      opcode = kA64Adc3rrr;
       break;
     case kOpAnd:
-      opcode = kThumb2AndRRR;
-      break;
-    case kOpBic:
-      opcode = kThumb2BicRRR;
+      opcode = kA64And4rrro;
       break;
     case kOpXor:
-      opcode = kThumb2EorRRR;
+      opcode = kA64Eor4rrro;
       break;
     case kOpMul:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2MulRRR;
+      opcode = kA64Mul3rrr;
       break;
     case kOpDiv:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2SdivRRR;
+      opcode = kA64Sdiv3rrr;
       break;
     case kOpOr:
-      opcode = kThumb2OrrRRR;
+      opcode = kA64Orr4rrro;
       break;
     case kOpSbc:
-      opcode = kThumb2SbcRRR;
+      opcode = kA64Sbc3rrr;
       break;
     case kOpLsl:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2LslRRR;
+      opcode = kA64Lsl3rrr;
       break;
     case kOpLsr:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2LsrRRR;
+      opcode = kA64Lsr3rrr;
       break;
     case kOpAsr:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2AsrRRR;
+      opcode = kA64Asr3rrr;
       break;
     case kOpRor:
-      DCHECK_EQ(shift, 0);
-      opcode = kThumb2RorRRR;
+      opcode = kA64Ror3rrr;
       break;
     default:
       LOG(FATAL) << "Bad opcode: " << op;
       break;
   }
-  DCHECK(!IsPseudoLirOp(opcode));
+
+  // The instructions above belong to two kinds:
+  // - 4-operands instructions, where the last operand is a shift/extend immediate,
+  // - 3-operands instructions with no shift/extend.
+  ArmOpcode widened_opcode = (is_wide) ? WIDE(opcode) : opcode;
   if (EncodingMap[opcode].flags & IS_QUAD_OP) {
-    return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR4(widened_opcode, r_dest, r_src1, r_src2, shift);
   } else {
     DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
-    return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR3(widened_opcode, r_dest, r_src1, r_src2);
   }
 }
 
 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
-  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0);
+  return OpRegRegRegShift(op, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT);
 }
 
 LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
   LIR* res;
   bool neg = (value < 0);
-  int32_t abs_value = (neg) ? -value : value;
-  ArmOpcode opcode = kThumbBkpt;
-  ArmOpcode alt_opcode = kThumbBkpt;
-  bool all_low_regs = r_dest.Low8() && r_src1.Low8();
-  int32_t mod_imm = ModifiedImmediate(value);
+  int64_t abs_value = (neg) ? -value : value;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode alt_opcode = kA64Brk1d;
+  int32_t log_imm = -1;
+  bool is_wide = OP_KIND_IS_WIDE(op);
+  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
 
-  switch (op) {
-    case kOpLsl:
-      if (all_low_regs)
-        return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-      else
-        return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+  switch (OP_KIND_UNWIDE(op)) {
+    case kOpLsl: {
+      // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
+      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)".
+      // For now, we just use ubfm directly.
+      int max_value = (is_wide) ? 64 : 32;
+      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
+                     (-value) & (max_value - 1), max_value - value);
+    }
     case kOpLsr:
-      if (all_low_regs)
-        return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-      else
-        return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+      return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
     case kOpAsr:
-      if (all_low_regs)
-        return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
-      else
-        return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+      return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
     case kOpRor:
-      return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+      // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
+      // For now, we just use extr directly.
+      return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
+                     value);
     case kOpAdd:
-      if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) {
-        return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
-      } else if (r_dest.Low8() && (r_src1 == rs_r15pc) &&
-          (value <= 1020) && ((value & 0x3) == 0)) {
-        return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
-      }
+      neg = !neg;
       // Note: intentional fallthrough
     case kOpSub:
-      if (all_low_regs && ((abs_value & 0x7) == abs_value)) {
-        if (op == kOpAdd)
-          opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3;
-        else
-          opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
-        return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
-      }
-      if (mod_imm < 0) {
-        mod_imm = ModifiedImmediate(-value);
-        if (mod_imm >= 0) {
-          op = (op == kOpAdd) ? kOpSub : kOpAdd;
-        }
-      }
-      if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) {
-        // This is deliberately used only if modified immediate encoding is inadequate since
-        // we sometimes actually use the flags for small values but not necessarily low regs.
-        if (op == kOpAdd)
-          opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
-        else
-          opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
-        return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
-      }
-      if (op == kOpSub) {
-        opcode = kThumb2SubRRI8M;
-        alt_opcode = kThumb2SubRRR;
+      // Add and sub below read/write sp rather than xzr.
+      if (abs_value < 0x1000) {
+        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
+      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
       } else {
-        opcode = kThumb2AddRRI8M;
-        alt_opcode = kThumb2AddRRR;
+        log_imm = -1;
+        alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro;
       }
       break;
-    case kOpRsub:
-      opcode = kThumb2RsubRRI8M;
-      alt_opcode = kThumb2RsubRRR;
-      break;
+    // case kOpRsub:
+    //   opcode = kThumb2RsubRRI8M;
+    //   alt_opcode = kThumb2RsubRRR;
+    //   break;
     case kOpAdc:
-      opcode = kThumb2AdcRRI8M;
-      alt_opcode = kThumb2AdcRRR;
+      log_imm = -1;
+      alt_opcode = kA64Adc3rrr;
       break;
     case kOpSbc:
-      opcode = kThumb2SbcRRI8M;
-      alt_opcode = kThumb2SbcRRR;
+      log_imm = -1;
+      alt_opcode = kA64Sbc3rrr;
       break;
     case kOpOr:
-      opcode = kThumb2OrrRRI8M;
-      alt_opcode = kThumb2OrrRRR;
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64Orr3Rrl;
+      alt_opcode = kA64Orr4rrro;
       break;
     case kOpAnd:
-      if (mod_imm < 0) {
-        mod_imm = ModifiedImmediate(~value);
-        if (mod_imm >= 0) {
-          return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
-        }
-      }
-      opcode = kThumb2AndRRI8M;
-      alt_opcode = kThumb2AndRRR;
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64And3Rrl;
+      alt_opcode = kA64And4rrro;
       break;
     case kOpXor:
-      opcode = kThumb2EorRRI8M;
-      alt_opcode = kThumb2EorRRR;
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64Eor3Rrl;
+      alt_opcode = kA64Eor4rrro;
       break;
     case kOpMul:
       // TUNING: power of 2, shift & add
-      mod_imm = -1;
-      alt_opcode = kThumb2MulRRR;
+      log_imm = -1;
+      alt_opcode = kA64Mul3rrr;
       break;
-    case kOpCmp: {
-      LIR* res;
-      if (mod_imm >= 0) {
-        res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm);
-      } else {
-        mod_imm = ModifiedImmediate(-value);
-        if (mod_imm >= 0) {
-          res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm);
-        } else {
-          RegStorage r_tmp = AllocTemp();
-          res = LoadConstant(r_tmp, value);
-          OpRegReg(kOpCmp, r_src1, r_tmp);
-          FreeTemp(r_tmp);
-        }
-      }
-      return res;
-    }
     default:
       LOG(FATAL) << "Bad opcode: " << op;
   }
 
-  if (mod_imm >= 0) {
-    return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
+  if (log_imm >= 0) {
+    return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
   } else {
     RegStorage r_scratch = AllocTemp();
     LoadConstant(r_scratch, value);
@@ -595,226 +612,209 @@
   }
 }
 
-/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */
 LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
+  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value), /*is_wide*/false);
+}
+
+LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide) {
+  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode neg_opcode = kA64Brk1d;
+  bool shift;
   bool neg = (value < 0);
-  int32_t abs_value = (neg) ? -value : value;
-  bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8());
-  ArmOpcode opcode = kThumbBkpt;
-  switch (op) {
+  uint64_t abs_value = (neg) ? -value : value;
+
+  if (LIKELY(abs_value < 0x1000)) {
+    // abs_value is a 12-bit immediate.
+    shift = false;
+  } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+    // abs_value is a shifted 12-bit immediate.
+    shift = true;
+    abs_value >>= 12;
+  } else {
+    RegStorage r_tmp = AllocTemp();
+    LIR* res = LoadConstant(r_tmp, value);
+    OpRegReg(op, r_dest_src1, r_tmp);
+    FreeTemp(r_tmp);
+    return res;
+  }
+
+  switch (OP_KIND_UNWIDE(op)) {
     case kOpAdd:
-      if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
-        DCHECK_EQ((value & 0x3), 0);
-        return NewLIR1(kThumbAddSpI7, value >> 2);
-      } else if (short_form) {
-        opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8;
-      }
+      neg_opcode = kA64Sub4RRdT;
+      opcode = kA64Add4RRdT;
       break;
     case kOpSub:
-      if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
-        DCHECK_EQ((value & 0x3), 0);
-        return NewLIR1(kThumbSubSpI7, value >> 2);
-      } else if (short_form) {
-        opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8;
-      }
+      neg_opcode = kA64Add4RRdT;
+      opcode = kA64Sub4RRdT;
       break;
     case kOpCmp:
-      if (!neg && short_form) {
-        opcode = kThumbCmpRI8;
-      } else {
-        short_form = false;
-      }
+      neg_opcode = kA64Cmn3RdT;
+      opcode = kA64Cmp3RdT;
       break;
     default:
-      /* Punt to OpRegRegImm - if bad case catch it there */
-      short_form = false;
+      LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
       break;
   }
-  if (short_form) {
-    return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value);
-  } else {
-    return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
-  }
+
+  if (UNLIKELY(neg))
+    opcode = neg_opcode;
+
+  if (EncodingMap[opcode].flags & IS_QUAD_OP)
+    return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
+                   (shift) ? 1 : 0);
+  else
+    return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
 }
 
 LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
-  LIR* res = NULL;
-  int32_t val_lo = Low32Bits(value);
-  int32_t val_hi = High32Bits(value);
   if (r_dest.IsFloat()) {
-    DCHECK(!r_dest.IsPair());
-    if ((val_lo == 0) && (val_hi == 0)) {
-      // TODO: we need better info about the target CPU.  a vector exclusive or
-      //       would probably be better here if we could rely on its existance.
-      // Load an immediate +2.0 (which encodes to 0)
-      NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0);
-      // +0.0 = +2.0 - +2.0
-      res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg());
-    } else {
-      int encoded_imm = EncodeImmDouble(value);
-      if (encoded_imm >= 0) {
-        res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm);
-      }
-    }
+    return LoadFPConstantValueWide(r_dest.GetReg(), value);
   } else {
-    // NOTE: Arm32 assumption here.
-    DCHECK(r_dest.IsPair());
-    if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
-      res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
-      LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
-    }
-  }
-  if (res == NULL) {
+    // TODO(Arm64): check whether we can load the immediate with a short form.
+    //   e.g. via movz, movk or via logical immediate.
+
     // No short form - load from the literal pool.
+    int32_t val_lo = Low32Bits(value);
+    int32_t val_hi = High32Bits(value);
     LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
     if (data_target == NULL) {
       data_target = AddWideData(&literal_list_, val_lo, val_hi);
     }
-    if (r_dest.IsFloat()) {
-      res = RawLIR(current_dalvik_offset_, kThumb2Vldrd,
-                   r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target);
-    } else {
-      DCHECK(r_dest.IsPair());
-      res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8,
-                   r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target);
-    }
+
+    LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
+                      r_dest.GetReg(), 0, 0, 0, 0, data_target);
     SetMemRefType(res, true, kLiteral);
     AppendLIR(res);
+    return res;
   }
-  return res;
 }
 
-int Arm64Mir2Lir::EncodeShift(int code, int amount) {
-  return ((amount & 0x1f) << 2) | code;
+int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
+  return ((shift_type & 0x3) << 7) | (amount & 0x1f);
+}
+
+int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
+  return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
+}
+
+bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
+  return ((1 << 6) & encoded_value) != 0;
 }
 
 LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
-                                 int scale, OpSize size) {
-  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8();
+                                   int scale, OpSize size) {
   LIR* load;
-  ArmOpcode opcode = kThumbBkpt;
-  bool thumb_form = (all_low_regs && (scale == 0));
-  RegStorage reg_ptr;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode wide = kA64NotWide;
+
+  DCHECK(scale == 0 || scale == 1);
 
   if (r_dest.IsFloat()) {
-    if (r_dest.IsSingle()) {
-      DCHECK((size == k32) || (size == kSingle) || (size == kReference));
-      opcode = kThumb2Vldrs;
-      size = kSingle;
-    } else {
-      DCHECK(r_dest.IsDouble());
-      DCHECK((size == k64) || (size == kDouble));
-      opcode = kThumb2Vldrd;
-      size = kDouble;
-    }
-  } else {
-    if (size == kSingle)
-      size = k32;
+    bool is_double = r_dest.IsDouble();
+    bool is_single = !is_double;
+    DCHECK_EQ(is_single, r_dest.IsSingle());
+
+    // If r_dest is a single, then size must be either k32 or kSingle.
+    // If r_dest is a double, then size must be either k64 or kDouble.
+    DCHECK(!is_single || size == k32 || size == kSingle);
+    DCHECK(!is_double || size == k64 || size == kDouble);
+    return NewLIR4((is_double) ? FWIDE(kA64Ldr4fXxG) : kA64Ldr4fXxG,
+                   r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
   }
 
   switch (size) {
-    case kDouble:  // fall-through
-    // Intentional fall-though.
+    case kDouble:
+    case kWord:
+    case k64:
+      wide = kA64Wide;
+      // Intentional fall-trough.
     case kSingle:
-      reg_ptr = AllocTemp();
-      if (scale) {
-        NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                EncodeShift(kArmLsl, scale));
-      } else {
-        OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
-      }
-      load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0);
-      FreeTemp(reg_ptr);
-      return load;
     case k32:
-    // Intentional fall-though.
     case kReference:
-      opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR;
+      opcode = kA64Ldr4rXxG;
       break;
     case kUnsignedHalf:
-      opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR;
+      opcode = kA64Ldrh4wXxd;
       break;
     case kSignedHalf:
-      opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR;
+      opcode = kA64Ldrsh4rXxd;
       break;
     case kUnsignedByte:
-      opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR;
+      opcode = kA64Ldrb3wXx;
       break;
     case kSignedByte:
-      opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR;
+      opcode = kA64Ldrsb3rXx;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
-  if (thumb_form)
-    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
-  else
-    load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+
+  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+    // Tertiary ops (e.g. ldrb, ldrsb) do not support scale.
+    DCHECK_EQ(scale, 0);
+    load = NewLIR3(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
+  } else {
+    DCHECK(scale == 0 || scale == ((wide == kA64Wide) ? 3 : 2));
+    load = NewLIR4(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                   (scale != 0) ? 1 : 0);
+  }
 
   return load;
 }
 
 LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
-                                  int scale, OpSize size) {
-  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8();
-  LIR* store = NULL;
-  ArmOpcode opcode = kThumbBkpt;
-  bool thumb_form = (all_low_regs && (scale == 0));
-  RegStorage reg_ptr;
+                                    int scale, OpSize size) {
+  LIR* store;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode wide = kA64NotWide;
+
+  DCHECK(scale == 0 || scale == 1);
 
   if (r_src.IsFloat()) {
-    if (r_src.IsSingle()) {
-      DCHECK((size == k32) || (size == kSingle) || (size == kReference));
-      opcode = kThumb2Vstrs;
-      size = kSingle;
-    } else {
-      DCHECK(r_src.IsDouble());
-      DCHECK((size == k64) || (size == kDouble));
-      DCHECK_EQ((r_src.GetReg() & 0x1), 0);
-      opcode = kThumb2Vstrd;
-      size = kDouble;
-    }
-  } else {
-    if (size == kSingle)
-      size = k32;
+    bool is_double = r_src.IsDouble();
+    bool is_single = !is_double;
+    DCHECK_EQ(is_single, r_src.IsSingle());
+
+    // If r_src is a single, then size must be either k32 or kSingle.
+    // If r_src is a double, then size must be either k64 or kDouble.
+    DCHECK(!is_single || size == k32 || size == kSingle);
+    DCHECK(!is_double || size == k64 || size == kDouble);
+    return NewLIR4((is_double) ? FWIDE(kA64Str4fXxG) : kA64Str4fXxG,
+                   r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
   }
 
   switch (size) {
-    case kDouble:  // fall-through
-    // Intentional fall-though.
-    case kSingle:
-      reg_ptr = AllocTemp();
-      if (scale) {
-        NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
-                EncodeShift(kArmLsl, scale));
-      } else {
-        OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
-      }
-      store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0);
-      FreeTemp(reg_ptr);
-      return store;
-    case k32:
-    // Intentional fall-though.
+    case kDouble:     // Intentional fall-trough.
+    case kWord:       // Intentional fall-trough.
+    case k64:
+      opcode = kA64Str4rXxG;
+      wide = kA64Wide;
+      break;
+    case kSingle:     // Intentional fall-trough.
+    case k32:         // Intentional fall-trough.
     case kReference:
-      opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR;
+      opcode = kA64Str4rXxG;
       break;
     case kUnsignedHalf:
-    // Intentional fall-though.
     case kSignedHalf:
-      opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR;
+      opcode = kA64Strh4wXxd;
       break;
     case kUnsignedByte:
-    // Intentional fall-though.
     case kSignedByte:
-      opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR;
+      opcode = kA64Strb3wXx;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
-  if (thumb_form)
-    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
-  else
+
+  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+    // Tertiary ops (e.g. strb) do not support scale.
+    DCHECK_EQ(scale, 0);
+    store = NewLIR3(opcode | wide, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
+  } else {
     store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+  }
 
   return store;
 }
@@ -827,311 +827,245 @@
 LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
                                     OpSize size) {
   LIR* load = NULL;
-  ArmOpcode opcode = kThumbBkpt;
+  ArmOpcode opcode = kA64Brk1d;
   bool short_form = false;
-  bool thumb2Form = (displacement < 4092 && displacement >= 0);
-  bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
   int encoded_disp = displacement;
-  bool already_generated = false;
-  bool null_pointer_safepoint = false;
   switch (size) {
-    case kDouble:
-    // Intentional fall-though.
+    case kDouble:     // Intentional fall-through.
+    case kWord:       // Intentional fall-through.
     case k64:
+      DCHECK_EQ(encoded_disp & 0x3, 0);
       if (r_dest.IsFloat()) {
-        DCHECK(!r_dest.IsPair());
-        opcode = kThumb2Vldrd;
-        if (displacement <= 1020) {
+        // Currently double values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled load.
+          opcode = FWIDE(kA64Ldr3fXD);
+          encoded_disp >>= 3;
           short_form = true;
-          encoded_disp >>= 2;
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled load.
+          opcode = FWIDE(kA64Ldur3fXd);
+          short_form = true;
+        } else {
+          short_form = false;
         }
       } else {
-        if (displacement <= 1020) {
-          load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
-                         displacement >> 2);
-        } else {
-          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32);
-          null_pointer_safepoint = true;
-          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32);
-        }
-        already_generated = true;
+        // Currently long values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled store.
+          opcode = FWIDE(kA64Ldr3rXD);
+          encoded_disp >>= 3;
+          short_form = true;
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled store.
+          opcode = FWIDE(kA64Ldur3rXd);
+          short_form = true;
+        }  // else: use long sequence (short_form = false).
       }
       break;
-    case kSingle:
-    // Intentional fall-though.
-    case k32:
-    // Intentional fall-though.
+    case kSingle:     // Intentional fall-through.
+    case k32:         // Intentional fall-trough.
     case kReference:
       if (r_dest.IsFloat()) {
-        opcode = kThumb2Vldrs;
+        opcode = kA64Ldr3fXD;
         if (displacement <= 1020) {
           short_form = true;
           encoded_disp >>= 2;
         }
         break;
       }
-      if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
-          (displacement >= 0)) {
-        short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrPcRel;
-      } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
-                 (displacement >= 0)) {
-        short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrSpRel;
-      } else if (all_low && displacement < 128 && displacement >= 0) {
+      if (displacement <= 16380 && displacement >= 0) {
         DCHECK_EQ((displacement & 0x3), 0);
         short_form = true;
         encoded_disp >>= 2;
-        opcode = kThumbLdrRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrRRI12;
+        opcode = kA64Ldr3rXD;
       }
       break;
     case kUnsignedHalf:
-      if (all_low && displacement < 64 && displacement >= 0) {
+      if (displacement < 64 && displacement >= 0) {
         DCHECK_EQ((displacement & 0x1), 0);
         short_form = true;
         encoded_disp >>= 1;
-        opcode = kThumbLdrhRRI5;
+        opcode = kA64Ldrh3wXF;
       } else if (displacement < 4092 && displacement >= 0) {
         short_form = true;
-        opcode = kThumb2LdrhRRI12;
+        opcode = kA64Ldrh3wXF;
       }
       break;
     case kSignedHalf:
-      if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrshRRI12;
-      }
+      short_form = true;
+      opcode = kA64Ldrsh3rXF;
       break;
     case kUnsignedByte:
-      if (all_low && displacement < 32 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumbLdrbRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrbRRI12;
-      }
+      short_form = true;
+      opcode = kA64Ldrb3wXd;
       break;
     case kSignedByte:
-      if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrsbRRI12;
-      }
+      short_form = true;
+      opcode = kA64Ldrsb3rXd;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
 
-  if (!already_generated) {
-    if (short_form) {
-      load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+  if (short_form) {
+    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+  } else {
+    RegStorage reg_offset = AllocTemp();
+    LoadConstant(reg_offset, encoded_disp);
+    if (r_dest.IsFloat()) {
+      // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+      OpRegReg(kOpAdd, reg_offset, r_base);
+      load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
     } else {
-      RegStorage reg_offset = AllocTemp();
-      LoadConstant(reg_offset, encoded_disp);
-      if (r_dest.IsFloat()) {
-        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-        OpRegReg(kOpAdd, reg_offset, r_base);
-        load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
-      } else {
-        load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
-      }
-      FreeTemp(reg_offset);
+      load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
     }
+    FreeTemp(reg_offset);
   }
 
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
-  if (r_base == rs_rARM_SP) {
+  if (r_base == rs_rA64_SP) {
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
-  } else {
-     // We might need to generate a safepoint if we have two store instructions (wide or double).
-     if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
-       MarkSafepointPC(load);
-     }
   }
   return load;
 }
 
 LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                                 OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
   return LoadBaseDispBody(r_base, displacement, r_dest, size);
 }
 
 
 LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
-                                   OpSize size) {
+                                     OpSize size) {
   LIR* store = NULL;
-  ArmOpcode opcode = kThumbBkpt;
+  ArmOpcode opcode = kA64Brk1d;
   bool short_form = false;
-  bool thumb2Form = (displacement < 4092 && displacement >= 0);
-  bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
   int encoded_disp = displacement;
-  bool already_generated = false;
-  bool null_pointer_safepoint = false;
   switch (size) {
+    case kDouble:     // Intentional fall-through.
+    case kWord:       // Intentional fall-through.
     case k64:
-    case kDouble:
-      if (!r_src.IsFloat()) {
-        if (displacement <= 1020) {
-          store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
-                          displacement >> 2);
-        } else {
-          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
-          null_pointer_safepoint = true;
-          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
-        }
-        already_generated = true;
-      } else {
-        DCHECK(!r_src.IsPair());
-        opcode = kThumb2Vstrd;
-        if (displacement <= 1020) {
+      DCHECK_EQ(encoded_disp & 0x3, 0);
+      if (r_src.IsFloat()) {
+        // Currently double values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled store.
+          opcode = FWIDE(kA64Str3fXD);
+          encoded_disp >>= 3;
           short_form = true;
-          encoded_disp >>= 2;
-        }
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled store.
+          opcode = FWIDE(kA64Stur3fXd);
+          short_form = true;
+        }  // else: use long sequence (short_form = false).
+      } else {
+        // Currently long values may be misaligned.
+        if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+          // Can use scaled store.
+          opcode = FWIDE(kA64Str3rXD);
+          encoded_disp >>= 3;
+          short_form = true;
+        } else if (IS_SIGNED_IMM9(displacement)) {
+          // Can use unscaled store.
+          opcode = FWIDE(kA64Stur3rXd);
+          short_form = true;
+        }  // else: use long sequence (short_form = false).
       }
       break;
-    case kSingle:
-    // Intentional fall-through.
-    case k32:
-    // Intentional fall-through.
+    case kSingle:     // Intentional fall-through.
+    case k32:         // Intentional fall-trough.
     case kReference:
       if (r_src.IsFloat()) {
         DCHECK(r_src.IsSingle());
-        opcode = kThumb2Vstrs;
+        DCHECK_EQ(encoded_disp & 0x3, 0);
+        opcode = kA64Str3fXD;
         if (displacement <= 1020) {
           short_form = true;
           encoded_disp >>= 2;
         }
         break;
       }
-      if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
-        short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbStrSpRel;
-      } else if (all_low && displacement < 128 && displacement >= 0) {
+
+      if (displacement <= 16380 && displacement >= 0) {
         DCHECK_EQ((displacement & 0x3), 0);
         short_form = true;
         encoded_disp >>= 2;
-        opcode = kThumbStrRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrRRI12;
+        opcode = kA64Str3rXD;
       }
       break;
     case kUnsignedHalf:
     case kSignedHalf:
-      if (all_low && displacement < 64 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x1), 0);
-        short_form = true;
-        encoded_disp >>= 1;
-        opcode = kThumbStrhRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrhRRI12;
-      }
+      DCHECK_EQ((displacement & 0x1), 0);
+      short_form = true;
+      encoded_disp >>= 1;
+      opcode = kA64Strh3wXF;
       break;
     case kUnsignedByte:
     case kSignedByte:
-      if (all_low && displacement < 32 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumbStrbRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrbRRI12;
-      }
+      short_form = true;
+      opcode = kA64Strb3wXd;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
-  if (!already_generated) {
-    if (short_form) {
-      store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+
+  if (short_form) {
+    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+  } else {
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, encoded_disp);
+    if (r_src.IsFloat()) {
+      // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+      OpRegReg(kOpAdd, r_scratch, r_base);
+      store = StoreBaseDispBody(r_scratch, 0, r_src, size);
     } else {
-      RegStorage r_scratch = AllocTemp();
-      LoadConstant(r_scratch, encoded_disp);
-      if (r_src.IsFloat()) {
-        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-        OpRegReg(kOpAdd, r_scratch, r_base);
-        store = StoreBaseDispBody(r_scratch, 0, r_src, size);
-      } else {
-        store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
-      }
-      FreeTemp(r_scratch);
+      store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
     }
+    FreeTemp(r_scratch);
   }
 
   // TODO: In future, may need to differentiate Dalvik & spill accesses
-  if (r_base == rs_rARM_SP) {
+  if (r_base == rs_rA64_SP) {
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
-  } else {
-    // We might need to generate a safepoint if we have two store instructions (wide or double).
-    if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
-      MarkSafepointPC(store);
-    }
   }
   return store;
 }
 
 LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
   return StoreBaseDispBody(r_base, displacement, r_src, size);
 }
 
 LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  int opcode;
-  DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
-  if (r_dest.IsDouble()) {
-    opcode = kThumb2Vmovd;
-  } else {
-    if (r_dest.IsSingle()) {
-      opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr;
-    } else {
-      DCHECK(r_src.IsSingle());
-      opcode = kThumb2Fmrs;
-    }
-  }
-  LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
-  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
-    res->flags.is_nop = true;
-  }
-  return res;
+  LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
+  return NULL;
 }
 
-LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
-  LOG(FATAL) << "Unexpected use of OpThreadMem for Arm";
+LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, A64ThreadOffset thread_offset) {
+  LOG(FATAL) << "Unexpected use of OpThreadMem for Arm64";
   return NULL;
 }
 
 LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
-  LOG(FATAL) << "Unexpected use of OpMem for Arm";
+  LOG(FATAL) << "Unexpected use of OpMem for Arm64";
   return NULL;
 }
 
 LIR* Arm64Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, OpSize size) {
-  LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm";
+                                        int displacement, RegStorage r_src, OpSize size) {
+  LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm64";
   return NULL;
 }
 
 LIR* Arm64Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
-  LOG(FATAL) << "Unexpected use of OpRegMem for Arm";
+  LOG(FATAL) << "Unexpected use of OpRegMem for Arm64";
   return NULL;
 }
 
 LIR* Arm64Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
                                        int displacement, RegStorage r_dest, OpSize size) {
-  LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm";
+  LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm64";
   return NULL;
 }
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index fbf8a0c..784dfaf 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -497,6 +497,7 @@
       case kX86_64:
         bx_offset = 0;
         break;
+      case kArm64:
       case kMips:
         bx_offset = tab_rec->anchor->offset;
         break;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index d51f2e0..d321b00 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -346,7 +346,7 @@
 
 /*
  * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform intial
+ * to a callee-save register, flush them to the frame.  Perform initial
  * assignment of promoted arguments.
  *
  * ArgLocs is an array of location records describing the incoming arguments
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4891d8c..4b1de4b 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -91,6 +91,7 @@
 
 // Common combo register usage patterns.
 #define REG_DEF01            (REG_DEF0 | REG_DEF1)
+#define REG_DEF012           (REG_DEF0 | REG_DEF1 | REG_DEF2)
 #define REG_DEF01_USE2       (REG_DEF0 | REG_DEF1 | REG_USE2)
 #define REG_DEF0_USE01       (REG_DEF0 | REG_USE01)
 #define REG_DEF0_USE0        (REG_DEF0 | REG_USE0)
@@ -167,6 +168,8 @@
 // Target-specific initialization.
 Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                            ArenaAllocator* const arena);
 Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
@@ -783,7 +786,7 @@
                                                             bool safepoint_pc);
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
-    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+    virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
     int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                              NextCallInsn next_call_insn,
                              const MethodReference& target_method,
@@ -830,7 +833,7 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    int LoadArgRegs(CallInfo* info, int call_state,
+    virtual int LoadArgRegs(CallInfo* info, int call_state,
                     NextCallInsn next_call_insn,
                     const MethodReference& target_method,
                     uint32_t vtable_idx,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 7436e39..b8481e2 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -742,7 +742,7 @@
   EmitPrefixAndOpcode(entry);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
-  EmitImm(entry, static_cast<int16_t>(imm));
+  EmitImm(entry, imm);
 }
 
 void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) {
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index f5d6299..a0659e7 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -64,8 +64,8 @@
 
 static constexpr bool kProtectFromSpace = true;
 static constexpr bool kStoreStackTraces = false;
-static constexpr bool kUseBytesPromoted = true;
 static constexpr size_t kBytesPromotedThreshold = 4 * MB;
+static constexpr size_t kLargeObjectBytesAllocatedThreshold = 16 * MB;
 
 void SemiSpace::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
@@ -104,8 +104,8 @@
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0),
       bytes_promoted_since_last_whole_heap_collection_(0),
+      large_object_bytes_allocated_at_last_whole_heap_collection_(0),
       whole_heap_collection_(true),
-      whole_heap_collection_interval_counter_(0),
       collector_name_(name_),
       swap_semi_spaces_(true) {
 }
@@ -187,12 +187,8 @@
     if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc ||
         clear_soft_references_) {
       // If an explicit, native allocation-triggered, or last attempt
-      // collection, collect the whole heap (and reset the interval
-      // counter to be consistent.)
+      // collection, collect the whole heap.
       whole_heap_collection_ = true;
-      if (!kUseBytesPromoted) {
-        whole_heap_collection_interval_counter_ = 0;
-      }
     }
     if (whole_heap_collection_) {
       VLOG(heap) << "Whole heap collection";
@@ -798,32 +794,27 @@
     // only space collection at the next collection by updating
     // whole_heap_collection.
     if (!whole_heap_collection_) {
-      if (!kUseBytesPromoted) {
-        // Enable whole_heap_collection once every
-        // kDefaultWholeHeapCollectionInterval collections.
-        --whole_heap_collection_interval_counter_;
-        DCHECK_GE(whole_heap_collection_interval_counter_, 0);
-        if (whole_heap_collection_interval_counter_ == 0) {
-          whole_heap_collection_ = true;
-        }
-      } else {
-        // Enable whole_heap_collection if the bytes promoted since
-        // the last whole heap collection exceeds a threshold.
-        bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
-        if (bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold) {
-          whole_heap_collection_ = true;
-        }
+      // Enable whole_heap_collection if the bytes promoted since the
+      // last whole heap collection or the large object bytes
+      // allocated exceeds a threshold.
+      bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
+      bool bytes_promoted_threshold_exceeded =
+          bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold;
+      uint64_t current_los_bytes_allocated = GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+      uint64_t last_los_bytes_allocated =
+          large_object_bytes_allocated_at_last_whole_heap_collection_;
+      bool large_object_bytes_threshold_exceeded =
+          current_los_bytes_allocated >=
+          last_los_bytes_allocated + kLargeObjectBytesAllocatedThreshold;
+      if (bytes_promoted_threshold_exceeded || large_object_bytes_threshold_exceeded) {
+        whole_heap_collection_ = true;
       }
     } else {
-      if (!kUseBytesPromoted) {
-        DCHECK_EQ(whole_heap_collection_interval_counter_, 0);
-        whole_heap_collection_interval_counter_ = kDefaultWholeHeapCollectionInterval;
-        whole_heap_collection_ = false;
-      } else {
-        // Reset it.
-        bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
-        whole_heap_collection_ = false;
-      }
+      // Reset the counters.
+      bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
+      large_object_bytes_allocated_at_last_whole_heap_collection_ =
+          GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+      whole_heap_collection_ = false;
     }
   }
   // Clear all of the spaces' mark bitmaps.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 3b3e1b1..9fdf471 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -234,14 +234,14 @@
   // the non-moving space, since the last whole heap collection.
   uint64_t bytes_promoted_since_last_whole_heap_collection_;
 
+  // Used for the generational mode. Keeps track of how many bytes of
+  // large objects were allocated at the last whole heap collection.
+  uint64_t large_object_bytes_allocated_at_last_whole_heap_collection_;
+
   // Used for the generational mode. When true, collect the whole
   // heap. When false, collect only the bump pointer spaces.
   bool whole_heap_collection_;
 
-  // Used for the generational mode. A counter used to enable
-  // whole_heap_collection_ once per interval.
-  int whole_heap_collection_interval_counter_;
-
   // How many objects and bytes we moved, used so that we don't need to get the size of the
   // to_space_ when calculating how many objects and bytes we freed.
   size_t bytes_moved_;
diff --git a/runtime/globals.h b/runtime/globals.h
index eb52a46..07fadb9 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -55,9 +55,8 @@
 // but ARM ELF requires 8..
 static constexpr size_t kArmAlignment = 8;
 
-// ARM64 instruction alignment. AArch64 require code to be 4-byte aligned.
-// AArch64 ELF requires at least 4.
-static constexpr size_t kArm64Alignment = 4;
+// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kArm64Alignment = 16;
 
 // MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
 // TODO: Can this be 4?