Revert "Revert "Enable Load Store Elimination for ARM and ARM64""

This patch refactors the implementation of the LoadStoreElimination
optimisation pass. Please note that this pass was disabled and not
functional for any of the backends.

The current implementation tracks aliases and handles DalvikRegs as well
as Heap memory regions. It has been tested and it is known to optimise
out the following:
  * Load - Load
  * Store - Load
  * Store - Store
  * Load Literals

Change-Id: I3aadb12a787164146a95bc314e85fa73ad91e12b
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 346fbb8..dcc67c3 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -467,8 +467,13 @@
   kIsQuinOp,
   kIsSextupleOp,
   kIsIT,
+  kIsMoveOp,
   kMemLoad,
   kMemStore,
+  kMemVolatile,
+  kMemScaledx0,
+  kMemScaledx2,
+  kMemScaledx4,
   kPCRelFixup,  // x86 FIXME: add NEEDS_FIXUP to instruction attributes.
   kRegDef0,
   kRegDef1,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 51446f6..bff7dff 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -42,7 +42,7 @@
 
 /* Default optimizer/debug setting for the compiler. */
 static uint32_t kCompilerOptimizerDisableFlags = 0 |  // Disable specific optimizations
-  (1 << kLoadStoreElimination) |  // TODO: this pass has been broken for awhile - fix or delete.
+  // (1 << kLoadStoreElimination) |
   // (1 << kLoadHoisting) |
   // (1 << kSuppressLoads) |
   // (1 << kNullCheckElimination) |
@@ -96,12 +96,12 @@
     ~0U,
     // 1 = kArm, unused (will use kThumb2).
     ~0U,
-    // 2 = kArm64.     TODO(Arm64): enable optimizations once backend is mature enough.
-    (1 << kLoadStoreElimination) |
+    // 2 = kArm64.
     0,
     // 3 = kThumb2.
     0,
     // 4 = kX86.
+    (1 << kLoadStoreElimination) |
     0,
     // 5 = kX86_64.
     (1 << kLoadStoreElimination) |
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 5083bbc..35c3597 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -217,7 +217,7 @@
                  "ldmia", "!0C!!, <!1R>", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrRRI5,      0x6800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF4,
                  "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrRRR,       0x5800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
@@ -226,14 +226,14 @@
     ENCODING_MAP(kThumbLdrPcRel,    0x4800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
-                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
+                 | IS_LOAD_OFF4 | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
     ENCODING_MAP(kThumbLdrSpRel,    0x9800,
                  kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
-                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
+                 | IS_LOAD_OFF4, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrbRRI5,     0x7800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrbRRR,      0x5c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
@@ -241,7 +241,7 @@
                  "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrhRRI5,     0x8800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF2,
                  "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrhRRR,      0x5a00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
@@ -283,19 +283,19 @@
     ENCODING_MAP(kThumbMovRR,        0x1c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES |IS_MOVE,
                  "movs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_H2H,    0x46c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_H2L,    0x4640,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_L2H,    0x4680,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMul,           0x4340,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
@@ -354,7 +354,7 @@
                  "stmia", "!0C!!, <!1R>", 2, kFixupNone),
     ENCODING_MAP(kThumbStrRRI5,      0x6000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF4,
                  "str", "!0C, [!1C, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrRRR,       0x5000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
@@ -363,10 +363,10 @@
     ENCODING_MAP(kThumbStrSpRel,    0x9000,
                  kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
-                 | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
+                 | IS_STORE_OFF4, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrbRRI5,     0x7000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "strb", "!0C, [!1C, #!2d]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrbRRR,      0x5400,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
@@ -374,7 +374,7 @@
                  "strb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrhRRI5,     0x8000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF2,
                  "strh", "!0C, [!1C, #!2F]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrhRRR,      0x5200,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
@@ -423,11 +423,11 @@
      */
     ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
                  kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF4 |
                  REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad),
     ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
                  kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF |
                  REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad),
     ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
@@ -440,11 +440,11 @@
                  "vmuld", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstrs,       0xed800a00,
                  kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF4,
                  "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstrd,       0xed800b00,
                  kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF4,
                  "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsubs,        0xee300a40,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
@@ -520,19 +520,19 @@
                  "mov", "!0C, #!1M", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRI12,       0xf8c00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "str", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRI12,       0xf8d00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRI8Predec,       0xf8400c00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "str", "!0C, [!1C, #-!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRI8Predec,       0xf8500c00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
                  kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
@@ -549,15 +549,15 @@
                  "add", "!0C,!1C,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2MovRR,       0xea4f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "mov", "!0C, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovs,       0xeeb00a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "vmov.f32 ", " !0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovd,       0xeeb00b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "vmov.f64 ", " !0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ldmia,         0xe8900000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
@@ -613,59 +613,59 @@
                  "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRR,    0xf8500000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrhRRR,    0xf8300000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrshRRR,    0xf9300000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrbRRR,    0xf8100000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrsbRRR,    0xf9100000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRR,    0xf8400000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
                  "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrhRRR,    0xf8200000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
                  "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrbRRR,    0xf8000000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
-                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
                  "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrhRRI12,       0xf8b00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrshRRI12,       0xf9b00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrbRRI12,       0xf8900000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrsbRRI12,       0xf9900000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrhRRI12,       0xf8a00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "strh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrbRRI12,       0xf8800000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "strb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Pop,           0xe8bd0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
@@ -841,7 +841,7 @@
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF | NEEDS_FIXUP,
                  "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2BCond,        0xf0008000,
                  kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
@@ -899,19 +899,19 @@
                  "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ldrex,       0xe8500f00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOADX,
                  "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ldrexd,      0xe8d0007f,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOADX,
                  "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Strex,       0xe8400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
-                 kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
+                 kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STOREX,
                  "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Strexd,      0xe8c00070,
                  kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8,
-                 kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE,
+                 kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STOREX,
                  "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
@@ -927,12 +927,12 @@
                  "bfc", "!0C,#!1d,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2Dmb,         0xf3bf8f50,
                  kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_VOLATILE,
                  "dmb", "#!0B", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrPcReln12,       0xf85f0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD_OFF,
                  "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Stm,          0xe9000000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1,
@@ -1023,17 +1023,17 @@
     ENCODING_MAP(kThumb2LdrdPcRel8,  0xe9df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD_OFF4 | NEEDS_FIXUP,
                  "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0,
-                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
+                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD_OFF4,
                  "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0,
-                 IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
+                 IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE_OFF4,
                  "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
 };
 
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 9652192..e0b8ec6 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -198,6 +198,7 @@
     }
 
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
+    size_t GetInstructionOffset(LIR* lir);
 
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index cf21da7..bba1a8c 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1169,4 +1169,17 @@
   return OpReg(op, r_tgt);
 }
 
+size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) {
+  uint64_t check_flags = GetTargetInstFlags(lir->opcode);
+  DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
+  size_t offset = (check_flags & IS_TERTIARY_OP) ? lir->operands[2] : 0;
+
+  if (check_flags & SCALED_OFFSET_X2) {
+    offset = offset * 2;
+  } else if (check_flags & SCALED_OFFSET_X4) {
+    offset = offset * 4;
+  }
+  return offset;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 462be54..1d7cdab 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -214,7 +214,7 @@
                  "csneg", "!0r, !1r, !2r, !3c", kFixupNone),
     ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
                  kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_VOLATILE,
                  "dmb", "#!0B", kFixupNone),
     ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
                  kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
@@ -274,7 +274,7 @@
                  "fmin", "!0f, !1f, !2f", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "fmov", "!0f, !1f", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
                  kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
@@ -318,7 +318,7 @@
                  "fsub", "!0f, !1f, !2f", kFixupNone),
     ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrb", "!0w, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -326,7 +326,7 @@
                  "ldrb", "!0w, [!1X, !2x]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -334,19 +334,19 @@
                  "ldrsb", "!0r, [!1X, !2x]", kFixupNone),
     ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrh", "!0w, [!1X, #!2F]", kFixupNone),
     ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
-                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD_OFF,
                  "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
                  kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
@@ -360,11 +360,11 @@
                  "ldr", "!0r, !1p", kFixupLoad),
     ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0f, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD_OFF,
                  "ldr", "!0r, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -380,11 +380,11 @@
                  "ldr", "!0r, [!1X], #!2d", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldp4ffXD), CUSTOM_VARIANTS(0x2d400000, 0x6d400000),
                  kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
                  "ldp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD_OFF,
                  "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -400,11 +400,11 @@
                  "ldur", "!0r, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
                  "ldxr", "!0r, [!1X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Ldaxr2rX), SIZE_VARIANTS(0x885ffc00),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOADX,
                  "ldaxr", "!0r, [!1X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
@@ -432,7 +432,7 @@
                  "movz", "!0r, #!1d!2M", kFixupNone),
     ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
                  kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_MOVE,
                  "mov", "!0r, !1r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
                  kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
@@ -508,11 +508,11 @@
                  "smulh", "!0x, !1x, !2x", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
                  kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
                  "stp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
-                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE_OFF,
                  "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
                  kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -524,7 +524,7 @@
                  "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "str", "!0f, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
                  kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -532,7 +532,7 @@
                  "str", "!0f, [!1X, !2x!3G]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "str", "!0r, [!1X, #!2D]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
                  kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -540,7 +540,7 @@
                  "str", "!0r, [!1X, !2x!3G]", kFixupNone),
     ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "strb", "!0w, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -548,7 +548,7 @@
                  "strb", "!0w, [!1X, !2x]", kFixupNone),
     ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE_OFF,
                  "strh", "!0w, [!1X, #!2F]", kFixupNone),
     ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
                  kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
@@ -568,11 +568,11 @@
                  "stur", "!0r, [!1X, #!2d]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
                  kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
                  "stxr", "!0w, !1r, [!2X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stlxr3wrX), SIZE_VARIANTS(0x8800fc00),
                  kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STOREX,
                  "stlxr", "!0w, !1r, [!2X]", kFixupNone),
     ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
                  kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index ac36519..fd2f541 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -297,6 +297,7 @@
     bool WideFPRsAreAliases() OVERRIDE {
       return true;  // 64b architecture.
     }
+    size_t GetInstructionOffset(LIR* lir);
 
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
@@ -381,6 +382,7 @@
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
                           bool is_div, bool check_zero);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+    size_t GetLoadStoreSize(LIR* lir);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index f9f85f4..360acd5 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -788,6 +788,7 @@
 }
 
 LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
 }
 
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index f6c140f..221dbfa 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -87,6 +87,26 @@
   return (bit7 | bit6 | bit5_to_0);
 }
 
+size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
+  bool opcode_is_wide = IS_WIDE(lir->opcode);
+  ArmOpcode opcode = UNWIDE(lir->opcode);
+  DCHECK(!IsPseudoLirOp(opcode));
+  const ArmEncodingMap *encoder = &EncodingMap[opcode];
+  uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
+  return (bits >> 30);
+}
+
+size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
+  size_t offset = lir->operands[2];
+  uint64_t check_flags = GetTargetInstFlags(lir->opcode);
+  DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
+  if (check_flags & SCALED_OFFSET_X0) {
+    DCHECK(check_flags & IS_TERTIARY_OP);
+    offset = offset * (1 << GetLoadStoreSize(lir));
+  }
+  return offset;
+}
+
 LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
   DCHECK(r_dest.IsSingle());
   if (value == 0) {
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 2893157..eec2b32 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -15,15 +15,43 @@
  */
 
 #include "dex/compiler_internals.h"
+#include "dex/quick/mir_to_lir-inl.h"
 
 namespace art {
 
 #define DEBUG_OPT(X)
 
+#define LOAD_STORE_CHECK_REG_DEP(mask, check) (mask.Intersects(*check->u.m.def_mask))
+
 /* Check RAW, WAR, and RAW dependency on the register operands */
 #define CHECK_REG_DEP(use, def, check) (def.Intersects(*check->u.m.use_mask)) || \
                                        (use.Union(def).Intersects(*check->u.m.def_mask))
 
+/* Load Store Elimination filter:
+ *  - Wide Load/Store
+ *  - Exclusive Load/Store
+ *  - Quad operand Load/Store
+ *  - List Load/Store
+ *  - IT blocks
+ *  - Branch
+ *  - Dmb
+ */
+#define LOAD_STORE_FILTER(flags) ((flags & (IS_QUAD_OP|IS_STORE)) == (IS_QUAD_OP|IS_STORE) || \
+                                 (flags & (IS_QUAD_OP|IS_LOAD)) == (IS_QUAD_OP|IS_LOAD) || \
+                                 (flags & REG_USE012) == REG_USE012 || \
+                                 (flags & REG_DEF01) == REG_DEF01 || \
+                                 (flags & REG_DEF_LIST0) || \
+                                 (flags & REG_DEF_LIST1) || \
+                                 (flags & REG_USE_LIST0) || \
+                                 (flags & REG_USE_LIST1) || \
+                                 (flags & REG_DEF_FPCS_LIST0) || \
+                                 (flags & REG_DEF_FPCS_LIST2) || \
+                                 (flags & REG_USE_FPCS_LIST0) || \
+                                 (flags & REG_USE_FPCS_LIST2) || \
+                                 (flags & IS_VOLATILE) || \
+                                 (flags & IS_BRANCH) || \
+                                 (flags & IS_IT))
+
 /* Scheduler heuristics */
 #define MAX_HOIST_DISTANCE 20
 #define LDLD_DISTANCE 4
@@ -43,6 +71,7 @@
   /* Insert a move to replace the load */
   LIR* move_lir;
   move_lir = OpRegCopyNoInsert(dest, src);
+  move_lir->dalvik_offset = orig_lir->dalvik_offset;
   /*
    * Insert the converted instruction after the original since the
    * optimization is scannng in the top-down order and the new instruction
@@ -52,8 +81,53 @@
   InsertLIRAfter(orig_lir, move_lir);
 }
 
+void Mir2Lir::DumpDependentInsnPair(LIR* check_lir, LIR* this_lir, const char* type) {
+  LOG(INFO) << type;
+  LOG(INFO) << "Check LIR:";
+  DumpLIRInsn(check_lir, 0);
+  LOG(INFO) << "This LIR:";
+  DumpLIRInsn(this_lir, 0);
+}
+
+inline void Mir2Lir::EliminateLoad(LIR* lir, int reg_id) {
+  DCHECK(RegStorage::SameRegType(lir->operands[0], reg_id));
+  RegStorage dest_reg, src_reg;
+
+  /* Same Register - Nop */
+  if (lir->operands[0] == reg_id) {
+    NopLIR(lir);
+    return;
+  }
+
+  /* different Regsister - Move + Nop */
+  switch (reg_id & RegStorage::kShapeTypeMask) {
+    case RegStorage::k32BitSolo | RegStorage::kCoreRegister:
+      dest_reg = RegStorage::Solo32(lir->operands[0]);
+      src_reg = RegStorage::Solo32(reg_id);
+      break;
+    case RegStorage::k64BitSolo | RegStorage::kCoreRegister:
+      dest_reg = RegStorage::Solo64(lir->operands[0]);
+      src_reg = RegStorage::Solo64(reg_id);
+      break;
+    case RegStorage::k32BitSolo | RegStorage::kFloatingPoint:
+      dest_reg = RegStorage::FloatSolo32(lir->operands[0]);
+      src_reg = RegStorage::FloatSolo32(reg_id);
+      break;
+    case RegStorage::k64BitSolo | RegStorage::kFloatingPoint:
+      dest_reg = RegStorage::FloatSolo64(lir->operands[0]);
+      src_reg = RegStorage::FloatSolo64(reg_id);
+      break;
+    default:
+      LOG(INFO) << "Load Store: Unsuported register type!";
+      return;
+  }
+  ConvertMemOpIntoMove(lir, dest_reg, src_reg);
+  NopLIR(lir);
+  return;
+}
+
 /*
- * Perform a pass of top-down walk, from the second-last instruction in the
+ * Perform a pass of top-down walk, from the first to the last instruction in the
  * superblock, to eliminate redundant loads and stores.
  *
  * An earlier load can eliminate a later load iff
@@ -66,213 +140,172 @@
  *   2) The native register is not clobbered in between
  *   3) The memory location is not written to in between
  *
- * A later store can be eliminated by an earlier store iff
+ * An earlier store can eliminate a later store iff
  *   1) They are must-aliases
  *   2) The memory location is not written to in between
  */
 void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) {
-  LIR* this_lir;
+  LIR* this_lir, *check_lir;
+  std::vector<int> alias_list;
 
   if (head_lir == tail_lir) {
     return;
   }
 
-  for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) {
-    if (IsPseudoLirOp(this_lir->opcode)) {
+  for (this_lir = head_lir; this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) {
+    if (this_lir->flags.is_nop || IsPseudoLirOp(this_lir->opcode)) {
       continue;
     }
 
-    int sink_distance = 0;
-
     uint64_t target_flags = GetTargetInstFlags(this_lir->opcode);
+    /* Target LIR - skip if instr is:
+     *  - NOP
+     *  - Branch
+     *  - Load and store
+     *  - Wide load
+     *  - Wide store
+     *  - Exclusive load/store
+     */
+    if (LOAD_STORE_FILTER(target_flags) ||
+        ((target_flags & (IS_LOAD | IS_STORE)) == (IS_LOAD | IS_STORE)) ||
+        !(target_flags & (IS_LOAD | IS_STORE))) {
+      continue;
+    }
+    int native_reg_id = this_lir->operands[0];
+    int dest_reg_id = this_lir->operands[1];
+    bool is_this_lir_load = target_flags & IS_LOAD;
+    ResourceMask this_mem_mask = kEncodeMem.Intersection(this_lir->u.m.use_mask->Union(
+                                                        *this_lir->u.m.def_mask));
 
-    /* Skip non-interesting instructions */
-    if ((this_lir->flags.is_nop == true) ||
-        (target_flags & IS_BRANCH) ||
-        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||  // Skip wide loads.
-        ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) ==
-         (REG_USE0 | REG_USE1 | REG_USE2)) ||  // Skip wide stores.
-        // Skip instructions that are neither loads or stores.
-        !(target_flags & (IS_LOAD | IS_STORE)) ||
-        // Skip instructions that do both load and store.
-        ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
+    /* Memory region */
+    if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg)) &&
+      (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeHeapRef)))) {
       continue;
     }
 
-    int native_reg_id;
-    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-      // If x86, location differs depending on whether memory/reg operation.
-      native_reg_id = (target_flags & IS_STORE) ? this_lir->operands[2] : this_lir->operands[0];
-    } else {
-      native_reg_id = this_lir->operands[0];
-    }
-    bool is_this_lir_load = target_flags & IS_LOAD;
-    LIR* check_lir;
-    /* Use the mem mask to determine the rough memory location */
-    ResourceMask this_mem_mask = kEncodeMem.Intersection(
-        this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask));
-
-    /*
-     * Currently only eliminate redundant ld/st for constant and Dalvik
-     * register accesses.
-     */
-    if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg))) {
+    /* Does not redefine the address */
+    if (this_lir->u.m.def_mask->Intersects(*this_lir->u.m.use_mask)) {
       continue;
     }
 
     ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);
+    ResourceMask stop_use_reg_mask = this_lir->u.m.use_mask->Without(kEncodeMem);
 
-    /*
-     * Add pc to the resource mask to prevent this instruction
-     * from sinking past branch instructions. Also take out the memory
-     * region bits since stop_mask is used to check data/control
-     * dependencies.
-     *
-     * Note: on x86(-64) and Arm64 we use the IsBranch bit, as the PC is not exposed.
-     */
-    ResourceMask pc_encoding = GetPCUseDefEncoding();
-    if (pc_encoding == kEncodeNone) {
-      // TODO: Stop the abuse of kIsBranch as a bit specification for ResourceMask.
-      pc_encoding = ResourceMask::Bit(kIsBranch);
+    /* The ARM backend can load/store PC */
+    ResourceMask uses_pc = GetPCUseDefEncoding();
+    if (uses_pc.Intersects(this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask))) {
+      continue;
     }
-    ResourceMask  stop_use_reg_mask = pc_encoding.Union(*this_lir->u.m.use_mask).
-        Without(kEncodeMem);
 
+    /* Initialize alias list */
+    alias_list.clear();
+    ResourceMask alias_reg_list_mask = kEncodeNone;
+    if (!this_mem_mask.Intersects(kEncodeLiteral)) {
+      alias_list.push_back(dest_reg_id);
+      SetupRegMask(&alias_reg_list_mask, dest_reg_id);
+    }
+
+    /* Scan through the BB for posible elimination candidates */
     for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) {
-      /*
-       * Skip already dead instructions (whose dataflow information is
-       * outdated and misleading).
-       */
       if (check_lir->flags.is_nop || IsPseudoLirOp(check_lir->opcode)) {
         continue;
       }
 
-      ResourceMask check_mem_mask = kEncodeMem.Intersection(
-          check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask));
-      ResourceMask alias_condition = this_mem_mask.Intersection(check_mem_mask);
-      bool stop_here = false;
+      if (uses_pc.Intersects(check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask))) {
+        break;
+      }
 
-      /*
-       * Potential aliases seen - check the alias relations
-       */
+      ResourceMask check_mem_mask = kEncodeMem.Intersection(check_lir->u.m.use_mask->Union(
+                                                          *check_lir->u.m.def_mask));
+      ResourceMask alias_mem_mask = this_mem_mask.Intersection(check_mem_mask);
       uint64_t check_flags = GetTargetInstFlags(check_lir->opcode);
-      // TUNING: Support instructions with multiple register targets.
-      if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) {
+      bool stop_here = false;
+      bool pass_over = false;
+
+      /* Check LIR - skip if instr is:
+       *  - Wide Load
+       *  - Wide Store
+       *  - Branch
+       *  - Dmb
+       *  - Exclusive load/store
+       *  - IT blocks
+       *  - Quad loads
+       */
+      if (LOAD_STORE_FILTER(check_flags)) {
         stop_here = true;
-      } else if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) {
-        bool is_check_lir_load = check_flags & IS_LOAD;
-        if  (alias_condition.Equals(kEncodeLiteral)) {
-          /*
-           * Should only see literal loads in the instruction
-           * stream.
-           */
-          DCHECK(!(check_flags & IS_STORE));
-          /* Same value && same register type */
-          if (check_lir->flags.alias_info == this_lir->flags.alias_info &&
-              RegStorage::SameRegType(check_lir->operands[0], native_reg_id)) {
-            /*
-             * Different destination register - insert
-             * a move
-             */
-            if (check_lir->operands[0] != native_reg_id) {
-              // TODO: update for 64-bit regs.
-              ConvertMemOpIntoMove(check_lir, RegStorage::Solo32(check_lir->operands[0]),
-                                   RegStorage::Solo32(native_reg_id));
-            }
-            NopLIR(check_lir);
-          }
-        } else if (alias_condition.Equals(kEncodeDalvikReg)) {
-          /* Must alias */
-          if (check_lir->flags.alias_info == this_lir->flags.alias_info) {
-            /* Only optimize compatible registers */
-            bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id);
-            if ((is_this_lir_load && is_check_lir_load) ||
-                (!is_this_lir_load && is_check_lir_load)) {
-              /* RAR or RAW */
-              if (reg_compatible) {
-                /*
-                 * Different destination register -
-                 * insert a move
-                 */
-                if (check_lir->operands[0] != native_reg_id) {
-                  // TODO: update for 64-bit regs.
-                  ConvertMemOpIntoMove(check_lir, RegStorage::Solo32(check_lir->operands[0]),
-                                       RegStorage::Solo32(native_reg_id));
-                }
-                NopLIR(check_lir);
-              } else {
-                /*
-                 * Destinaions are of different types -
-                 * something complicated going on so
-                 * stop looking now.
-                 */
-                stop_here = true;
-              }
-            } else if (is_this_lir_load && !is_check_lir_load) {
-              /* WAR - register value is killed */
-              stop_here = true;
-            } else if (!is_this_lir_load && !is_check_lir_load) {
-              /* WAW - nuke the earlier store */
-              NopLIR(this_lir);
-              stop_here = true;
-            }
-          /* Partial overlap */
-          } else if (IsDalvikRegisterClobbered(this_lir, check_lir)) {
-            /*
-             * It is actually ok to continue if check_lir
-             * is a read. But it is hard to make a test
-             * case for this so we just stop here to be
-             * conservative.
-             */
-            stop_here = true;
+        /* Possible alias or result of earlier pass */
+      } else if (check_flags & IS_MOVE) {
+        for (auto &reg : alias_list) {
+          if (RegStorage::RegNum(check_lir->operands[1]) == RegStorage::RegNum(reg)) {
+            pass_over = true;
+            alias_list.push_back(check_lir->operands[0]);
+            SetupRegMask(&alias_reg_list_mask, check_lir->operands[0]);
           }
         }
-        /* Memory content may be updated. Stop looking now. */
+      /* Memory regions */
+      } else if (!alias_mem_mask.Equals(kEncodeNone)) {
+        DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
+        bool is_check_lir_load = check_flags & IS_LOAD;
+        bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id);
+
+        if (alias_mem_mask.Equals(kEncodeLiteral)) {
+          DCHECK(check_flags & IS_LOAD);
+          /* Same value && same register type */
+          if (reg_compatible && (this_lir->target == check_lir->target)) {
+            DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "LITERAL"));
+            EliminateLoad(check_lir, native_reg_id);
+          }
+        } else if (((alias_mem_mask.Equals(kEncodeDalvikReg)) || (alias_mem_mask.Equals(kEncodeHeapRef))) &&
+                   alias_reg_list_mask.Intersects((check_lir->u.m.use_mask)->Without(kEncodeMem))) {
+          bool same_offset = (GetInstructionOffset(this_lir) == GetInstructionOffset(check_lir));
+          if (same_offset && !is_check_lir_load) {
+            if (check_lir->operands[0] != native_reg_id) {
+              DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "STORE STOP"));
+              stop_here = true;
+              break;
+            }
+          }
+
+          if (reg_compatible && same_offset &&
+              ((is_this_lir_load && is_check_lir_load)  /* LDR - LDR */ ||
+              (!is_this_lir_load && is_check_lir_load)  /* STR - LDR */ ||
+              (!is_this_lir_load && !is_check_lir_load) /* STR - STR */)) {
+            DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "LOAD STORE"));
+            EliminateLoad(check_lir, native_reg_id);
+          }
+        } else {
+          /* Unsupported memory region */
+        }
+      }
+
+      if (pass_over) {
+        continue;
+      }
+
+      if (stop_here == false) {
+        bool stop_alias = LOAD_STORE_CHECK_REG_DEP(alias_reg_list_mask, check_lir);
+        if (stop_alias) {
+          /* Scan through alias list and if alias remove from alias list. */
+          for (auto &reg : alias_list) {
+            stop_alias = false;
+            ResourceMask alias_reg_mask = kEncodeNone;
+            SetupRegMask(&alias_reg_mask, reg);
+            stop_alias = LOAD_STORE_CHECK_REG_DEP(alias_reg_mask, check_lir);
+            if (stop_alias) {
+              ClearRegMask(&alias_reg_list_mask, reg);
+              alias_list.erase(std::remove(alias_list.begin(), alias_list.end(),
+                                           reg), alias_list.end());
+            }
+          }
+        }
+        ResourceMask stop_search_mask = stop_def_reg_mask.Union(stop_use_reg_mask);
+        stop_search_mask = stop_search_mask.Union(alias_reg_list_mask);
+        stop_here = LOAD_STORE_CHECK_REG_DEP(stop_search_mask, check_lir);
         if (stop_here) {
           break;
-        /* The check_lir has been transformed - check the next one */
-        } else if (check_lir->flags.is_nop) {
-          continue;
         }
-      }
-
-
-      /*
-       * this and check LIRs have no memory dependency. Now check if
-       * their register operands have any RAW, WAR, and WAW
-       * dependencies. If so, stop looking.
-       */
-      if (stop_here == false) {
-        stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask, check_lir);
-      }
-
-      if (stop_here == true) {
-        if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-          // Prevent stores from being sunk between ops that generate ccodes and
-          // ops that use them.
-          uint64_t flags = GetTargetInstFlags(check_lir->opcode);
-          if (sink_distance > 0 && (flags & IS_BRANCH) && (flags & USES_CCODES)) {
-            check_lir = PREV_LIR(check_lir);
-            sink_distance--;
-          }
-        }
-        DEBUG_OPT(dump_dependent_insn_pair(this_lir, check_lir, "REG CLOBBERED"));
-        /* Only sink store instructions */
-        if (sink_distance && !is_this_lir_load) {
-          LIR* new_store_lir =
-              static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
-          *new_store_lir = *this_lir;
-          /*
-           * Stop point found - insert *before* the check_lir
-           * since the instruction list is scanned in the
-           * top-down order.
-           */
-          InsertLIRBefore(check_lir, new_store_lir);
-          NopLIR(this_lir);
-        }
+      } else {
         break;
-      } else if (!check_lir->flags.is_nop) {
-        sink_distance++;
       }
     }
   }
@@ -385,7 +418,7 @@
 
       /* Found a new place to put the load - move it here */
       if (stop_here == true) {
-        DEBUG_OPT(dump_dependent_insn_pair(check_lir, this_lir "HOIST STOP"));
+        DEBUG_OPT(DumpDependentInsnPair(check_lir, this_lir, "HOIST STOP"));
         break;
       }
     }
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 9ce5bb7..ff5a46f 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -147,6 +147,15 @@
 }
 
 /*
+ * Clear the corresponding bit(s).
+ */
+inline void Mir2Lir::ClearRegMask(ResourceMask* mask, int reg) {
+  DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0);
+  DCHECK(reginfo_map_.Get(reg) != nullptr) << "No info for 0x" << reg;
+  *mask = mask->ClearBits(reginfo_map_.Get(reg)->DefUseMask());
+}
+
+/*
  * Set up the proper fields in the resource mask
  */
 inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index ed7fcdd..4d8b91e 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1311,4 +1311,9 @@
       rl.ref ? RefCheck::kCheckRef : RefCheck::kCheckNotRef, FPCheck::kIgnoreFP, fail, report);
 }
 
+size_t Mir2Lir::GetInstructionOffset(LIR* lir) {
+  UNIMPLEMENTED(FATAL) << "Unsuppored GetInstructionOffset()";
+  return 0;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index b832223..d03b859 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -51,6 +51,7 @@
 #define IS_BINARY_OP         (1ULL << kIsBinaryOp)
 #define IS_BRANCH            (1ULL << kIsBranch)
 #define IS_IT                (1ULL << kIsIT)
+#define IS_MOVE              (1ULL << kIsMoveOp)
 #define IS_LOAD              (1ULL << kMemLoad)
 #define IS_QUAD_OP           (1ULL << kIsQuadOp)
 #define IS_QUIN_OP           (1ULL << kIsQuinOp)
@@ -58,6 +59,7 @@
 #define IS_STORE             (1ULL << kMemStore)
 #define IS_TERTIARY_OP       (1ULL << kIsTertiaryOp)
 #define IS_UNARY_OP          (1ULL << kIsUnaryOp)
+#define IS_VOLATILE          (1ULL << kMemVolatile)
 #define NEEDS_FIXUP          (1ULL << kPCRelFixup)
 #define NO_OPERAND           (1ULL << kNoOperand)
 #define REG_DEF0             (1ULL << kRegDef0)
@@ -94,6 +96,20 @@
 #define REG_USE_HI           (1ULL << kUseHi)
 #define REG_DEF_LO           (1ULL << kDefLo)
 #define REG_DEF_HI           (1ULL << kDefHi)
+#define SCALED_OFFSET_X0     (1ULL << kMemScaledx0)
+#define SCALED_OFFSET_X2     (1ULL << kMemScaledx2)
+#define SCALED_OFFSET_X4     (1ULL << kMemScaledx4)
+
+// Special load/stores
+#define IS_LOADX             (IS_LOAD | IS_VOLATILE)
+#define IS_LOAD_OFF          (IS_LOAD | SCALED_OFFSET_X0)
+#define IS_LOAD_OFF2         (IS_LOAD | SCALED_OFFSET_X2)
+#define IS_LOAD_OFF4         (IS_LOAD | SCALED_OFFSET_X4)
+
+#define IS_STOREX            (IS_STORE | IS_VOLATILE)
+#define IS_STORE_OFF         (IS_STORE | SCALED_OFFSET_X0)
+#define IS_STORE_OFF2        (IS_STORE | SCALED_OFFSET_X2)
+#define IS_STORE_OFF4        (IS_STORE | SCALED_OFFSET_X4)
 
 // Common combo register usage patterns.
 #define REG_DEF01            (REG_DEF0 | REG_DEF1)
@@ -552,6 +568,12 @@
 
     virtual ~Mir2Lir() {}
 
+    /**
+     * @brief Decodes the LIR offset.
+     * @return Returns the scaled offset of LIR.
+     */
+    virtual size_t GetInstructionOffset(LIR* lir);
+
     int32_t s4FromSwitchData(const void* switch_data) {
       return *reinterpret_cast<const int32_t*>(switch_data);
     }
@@ -641,7 +663,10 @@
     void SetMemRefType(LIR* lir, bool is_load, int mem_type);
     void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit);
     void SetupRegMask(ResourceMask* mask, int reg);
+    void ClearRegMask(ResourceMask* mask, int reg);
     void DumpLIRInsn(LIR* arg, unsigned char* base_addr);
+    void EliminateLoad(LIR* lir, int reg_id);
+    void DumpDependentInsnPair(LIR* check_lir, LIR* this_lir, const char* type);
     void DumpPromotionMap();
     void CodegenDump();
     LIR* RawLIR(DexOffset dalvik_offset, int opcode, int op0 = 0, int op1 = 0,
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index addd628..706933a 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -192,6 +192,18 @@
     return (reg & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
   }
 
+  static constexpr bool Is32Bit(uint16_t reg) {
+    return ((reg & kShapeMask) == k32BitSolo);
+  }
+
+  static constexpr bool Is64Bit(uint16_t reg) {
+    return ((reg & k64BitMask) == k64Bits);
+  }
+
+  static constexpr bool Is64BitSolo(uint16_t reg) {
+    return ((reg & kShapeMask) == k64BitSolo);
+  }
+
   // Used to retrieve either the low register of a pair, or the only register.
   int GetReg() const {
     DCHECK(!IsPair()) << "reg_ = 0x" << std::hex << reg_;
@@ -265,11 +277,11 @@
   }
 
   static constexpr bool SameRegType(RegStorage reg1, RegStorage reg2) {
-    return (reg1.IsDouble() == reg2.IsDouble()) && (reg1.IsSingle() == reg2.IsSingle());
+    return ((reg1.reg_ & kShapeTypeMask) == (reg2.reg_ & kShapeTypeMask));
   }
 
   static constexpr bool SameRegType(int reg1, int reg2) {
-    return (IsDouble(reg1) == IsDouble(reg2)) && (IsSingle(reg1) == IsSingle(reg2));
+    return ((reg1 & kShapeTypeMask) == (reg2 & kShapeTypeMask));
   }
 
   // Create a 32-bit solo.