Thumb/Thumb2 instruction selection rework.

Change-id: I7428278f07f49e675d0271c58b3cbf1f6a4e9da1
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index 7f6d284..eeee00b 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -79,9 +79,13 @@
                 strcpy(tbuf, "!");
             } else {
                assert(fmt < fmtEnd);
-               assert((unsigned)(nc-'0') < 3);
+               assert((unsigned)(nc-'0') < 4);
                operand = lir->operands[nc-'0'];
                switch(*fmt++) {
+                   case 'n':
+                       operand = ~expandImmediate(operand);
+                       sprintf(tbuf,"%d [0x%x]", operand, operand);
+                       break;
                    case 'm':
                        operand = expandImmediate(operand);
                        sprintf(tbuf,"%d [0x%x]", operand, operand);
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 1ecbbf1..7d7fcab 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -27,7 +27,7 @@
  * r6 (rGLUE) is reserved [holds current &interpState]
  * r7 (rINST) is scratch for Jit
  * r8 (rIBASE) is scratch for Jit, but must be restored when resuming interp
- * r9 is always scratch
+ * r9 is reserved
  * r10 is always scratch
  * r11 (fp) used by gcc unless -fomit-frame-pointer set [available for jit?]
  * r12 is always scratch
@@ -35,21 +35,30 @@
  * r14 (lr) is scratch for Jit
  * r15 (pc) is reserved
  *
+ * Preserved across C calls: r4, r5, r6, r7, r8, r10, r11
+ * Trashed across C calls: r0, r1, r2, r3, r12, r14
+ *
+ * Floating pointer registers
+ * s0-s31
+ * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31}
+ *
+ * s16-s31 (d8-d15) preserved across C calls
+ * s0-s15 (d0-d7) trashed across C calls
+ *
  * For Thumb code use:
- *       r0, r1, r2, r3 to hold operands/results via scoreboard
+ *       r0, r1, r2, r3 to hold operands/results
  *       r4, r7 for temps
  *
  * For Thumb2 code use:
- *       r0, r1, r2, r3, r8, r9, r10, r11 for operands/results via scoreboard
- *       r4, r7, r14 for temps
+ *       r0, r1, r2, r3, r8, r9, r10, r11, r12, r14 for operands/results
+ *       r4, r7 for temps
+ *       s16-s31/d8-d15 for operands/results
+ *       s0-s15/d0-d7 for temps
  *
  * When transitioning from code cache to interp:
  *       restore rIBASE
  *       restore rPC
- *       restore r11 (fp)?
- *
- * Double precision values are stored in consecutive single precision registers
- * such that dr0 -> (sr0,sr1), dr1 -> (sr2,sr3) ... dr16 -> (sr30,sr31)
+ *       restore r11?
  */
 
 /* Offset to distingish FP regs */
@@ -65,7 +74,56 @@
 #define FP_REG_MASK (FP_REG_OFFSET-1)
 /* Mask to convert high reg to low for Thumb */
 #define THUMB_REG_MASK 0x7
+/* non-existent Dalvik register */
+#define vNone   (-1)
+/* non-existant physical register */
+#define rNone   (-1)
 
+typedef enum OpSize {
+    WORD,
+    LONG,
+    SINGLE,
+    DOUBLE,
+    UNSIGNED_HALF,
+    SIGNED_HALF,
+    UNSIGNED_BYTE,
+    SIGNED_BYTE,
+} OpSize;
+
+typedef enum OpKind {
+    OP_MOV,
+    OP_MVN,
+    OP_CMP,
+    OP_LSL,
+    OP_LSR,
+    OP_ASR,
+    OP_ROR,
+    OP_NOT,
+    OP_AND,
+    OP_OR,
+    OP_XOR,
+    OP_NEG,
+    OP_ADD,
+    OP_ADC,
+    OP_SUB,
+    OP_SBC,
+    OP_RSUB,
+    OP_MUL,
+    OP_DIV,
+    OP_REM,
+    OP_BIC,
+    OP_CMN,
+    OP_TST,
+    OP_BKPT,
+    OP_BLX,
+    OP_PUSH,
+    OP_POP,
+    OP_2CHAR,
+    OP_2SHORT,
+    OP_2BYTE,
+    OP_COND_BR,
+    OP_UNCOND_BR,
+} OpKind;
 
 typedef enum NativeRegisterPool {
     r0 = 0,
@@ -189,7 +247,7 @@
     THUMB_BLX_2,          /* blx(1)  [111] H[01] offset_11[10..0] */
     THUMB_BL_1,           /* blx(1)  [111] H[10] offset_11[10..0] */
     THUMB_BL_2,           /* blx(1)  [111] H[11] offset_11[10..0] */
-    THUMB_BLX_R,          /* blx(2)  [010001111] H2[6..6] rm[5..3] SBZ[000] */
+    THUMB_BLX_R,          /* blx(2)  [010001111] rm[6..3] [000] */
     THUMB_BX,             /* bx      [010001110] H2[6..6] rm[5..3] SBZ[000] */
     THUMB_CMN,            /* cmn     [0100001011] rm[5..3] rd[2..0] */
     THUMB_CMP_RI8,        /* cmp(1)  [00101] rn[10..8] imm_8[7..0] */
@@ -224,7 +282,7 @@
     THUMB_ORR,            /* orr     [0100001100] rm[5..3] rd[2..0] */
     THUMB_POP,            /* pop     [1011110] r[8..8] rl[7..0] */
     THUMB_PUSH,           /* push    [1011010] r[8..8] rl[7..0] */
-    THUMB_ROR,            /* ror     [0100000111] rs[5..3] rd[2..0] */
+    THUMB_RORV,           /* ror     [0100000111] rs[5..3] rd[2..0] */
     THUMB_SBC,            /* sbc     [0100000110] rm[5..3] rd[2..0] */
     THUMB_STMIA,          /* stmia   [11000] rn[10..8] reglist [7.. 0] */
     THUMB_STR_RRI5,       /* str(1)  [01100] imm_5[10..6] rn[5..3] rd[2..0] */
@@ -292,18 +350,123 @@
                                        rn[19..16] rt[15..12] [1100] imm[7..0]*/
     THUMB2_LDR_RRI8_PREDEC, /* ldr(Imm,T4) rd,[rn,#-imm8] [111110000101]
                                        rn[19..16] rt[15..12] [1100] imm[7..0]*/
-    THUMB2_CBNZ,            /* cbnz rd,<label> [101110] i [1] imm5[7..3]
+    THUMB2_CBNZ,          /* cbnz rd,<label> [101110] i [1] imm5[7..3]
                                        rn[2..0] */
-    THUMB2_CBZ,             /* cbn rd,<label> [101100] i [1] imm5[7..3]
+    THUMB2_CBZ,           /* cbn rd,<label> [101100] i [1] imm5[7..3]
                                        rn[2..0] */
-    THUMB2_ADD_RRI12,       /* add rd, rn, #imm12 [11110] i [100000] rn[19..16]
+    THUMB2_ADD_RRI12,     /* add rd, rn, #imm12 [11110] i [100000] rn[19..16]
                                        [0] imm3[14..12] rd[11..8] imm8[7..0] */
-    THUMB2_MOV_RR,          /* mov rd, rm [11101010010011110000] rd[11..8]
+    THUMB2_MOV_RR,        /* mov rd, rm [11101010010011110000] rd[11..8]
                                        [0000] rm[3..0] */
-    THUMB2_VMOVS,           /* vmov.f32 vd, vm [111011101] D [110000]
+    THUMB2_VMOVS,         /* vmov.f32 vd, vm [111011101] D [110000]
                                        vd[15..12] 101001] M [0] vm[3..0] */
-    THUMB2_VMOVD,           /* vmov.f64 vd, vm [111011101] D [110000]
+    THUMB2_VMOVD,         /* vmov.f64 vd, vm [111011101] D [110000]
                                        vd[15..12] 101101] M [0] vm[3..0] */
+    THUMB2_LDMIA,         /* ldmia  [111010001001[ rn[19..16] mask[15..0] */
+    THUMB2_STMIA,         /* stmia  [111010001000[ rn[19..16] mask[15..0] */
+    THUMB2_ADD_RRR,       /* add [111010110000] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_SUB_RRR,       /* sub [111010111010] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_SBC_RRR,       /* sbc [111010110110] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_CMP_RR,        /* cmp [111010111011] rn[19..16] [0000] [1111]
+                                   [0000] rm[3..0] */
+    THUMB2_SUB_RRI12,     /* sub rd, rn, #imm12 [11110] i [01010] rn[19..16]
+                                       [0] imm3[14..12] rd[11..8] imm8[7..0] */
+    THUMB2_MVN_IMM_SHIFT, /* mov(T2) rd, #<const> [11110] i [00011011110]
+                                       imm3 rd[11..8] imm8 */
+    THUMB2_SEL,           /* sel rd, rn, rm [111110101010] rn[19-16] rd[11-8]
+                                       rm[3-0] */
+    THUMB2_UBFX,          /* ubfx rd,rn,#lsb,#width [111100111100] rn[19..16]
+                                       [0] imm3[14-12] rd[11-8] w[4-0] */
+    THUMB2_SBFX,          /* ubfx rd,rn,#lsb,#width [111100110100] rn[19..16]
+                                       [0] imm3[14-12] rd[11-8] w[4-0] */
+    THUMB2_LDR_RRR,       /* ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_LDRH_RRR,      /* ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_LDRSH_RRR,     /* ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_LDRB_RRR,      /* ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_LDRSB_RRR,     /* ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_STR_RRR,       /* str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_STRH_RRR,      /* str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_STRB_RRR,      /* str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    THUMB2_LDRH_RRI12,    /* ldrh rt,[rn,#imm12] [111110001011]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    THUMB2_LDRSH_RRI12,   /* ldrsh rt,[rn,#imm12] [111110011011]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    THUMB2_LDRB_RRI12,    /* ldrb rt,[rn,#imm12] [111110001001]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    THUMB2_LDRSB_RRI12,   /* ldrsb rt,[rn,#imm12] [111110011001]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    THUMB2_STRH_RRI12,    /* strh rt,[rn,#imm12] [111110001010]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    THUMB2_STRB_RRI12,    /* strb rt,[rn,#imm12] [111110001000]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    THUMB2_POP,           /* pop     [1110100010111101] list[15-0]*/
+    THUMB2_PUSH,          /* push    [1110100010101101] list[15-0]*/
+    THUMB2_CMP_RI8,       /* cmp rn, #<const> [11110] i [011011] rn[19-16] [0]
+                                       imm3 [1111] imm8[7..0] */
+    THUMB2_ADC_RRR,       /* adc [111010110101] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_AND_RRR,       /* and [111010100000] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_BIC_RRR,       /* bic [111010100010] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_CMN_RR,        /* cmn [111010110001] rn[19..16] [0000] [1111]
+                                   [0000] rm[3..0] */
+    THUMB2_EOR_RRR,       /* eor [111010101000] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_MUL_RRR,       /* mul [111110110000] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_MVN_RR,        /* mvn [11101010011011110] rd[11-8] [0000]
+                                   rm[3..0] */
+    THUMB2_RSUB_RRI8,     /* rsub [111100011100] rn[19..16] [0000] rd[11..8]
+                                   imm8[7..0] */
+    THUMB2_NEG_RR,        /* actually rsub rd, rn, #0 */
+    THUMB2_ORR_RRR,       /* orr [111010100100] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_TST_RR,        /* tst [111010100001] rn[19..16] [0000] [1111]
+                                   [0000] rm[3..0] */
+    THUMB2_LSLV_RRR,      /* lsl [111110100000] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_LSRV_RRR,      /* lsr [111110100010] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_ASRV_RRR,      /* asr [111110100100] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_RORV_RRR,      /* ror [111110100110] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    THUMB2_LSL_RRI5,      /* lsl [11101010010011110] imm[14.12] rd[11..8]
+                                   [00] rm[3..0] */
+    THUMB2_LSR_RRI5,      /* lsr [11101010010011110] imm[14.12] rd[11..8]
+                                   [01] rm[3..0] */
+    THUMB2_ASR_RRI5,      /* asr [11101010010011110] imm[14.12] rd[11..8]
+                                   [10] rm[3..0] */
+    THUMB2_ROR_RRI5,      /* ror [11101010010011110] imm[14.12] rd[11..8]
+                                   [11] rm[3..0] */
+    THUMB2_BIC_RRI8,      /* bic [111100000010] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    THUMB2_AND_RRI8,      /* bic [111100000000] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    THUMB2_ORR_RRI8,      /* orr [111100000100] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    THUMB2_EOR_RRI8,      /* eor [111100001000] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    THUMB2_ADD_RRI8,      /* add [111100001000] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    THUMB2_ADC_RRI8,      /* adc [111100010101] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    THUMB2_SUB_RRI8,      /* sub [111100011011] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    THUMB2_SBC_RRI8,      /* sbc [111100010111] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
     ARM_LAST,
 } ArmOpCode;
 
@@ -316,6 +479,9 @@
     IS_UNARY_OP =         1 << 5,
     IS_BINARY_OP =        1 << 6,
     IS_TERTIARY_OP =      1 << 7,
+    IS_QUAD_OP =          1 << 8,
+    SETS_CCODES =         1 << 9,
+    USES_CCODES =         1 << 10,
 } ArmOpFeatureFlags;
 
 /* Instruction assembly fieldLoc kind */
@@ -328,6 +494,10 @@
     IMM16,         /* Zero-extended immediate using [26,19..16,14..12,7..0] */
     IMM6,          /* Encoded branch target using [9,7..3]0 */
     IMM12,         /* Zero-extended immediate using [26,14..12,7..0] */
+    SHIFT,         /* Shift descriptor, [14..12,7..4] */
+    LSB,           /* least significant bit using [14..12][7..6] */
+    BWIDTH,        /* bit-field width, encoded as width-1 */
+    SHIFT5,        /* Shift count, [14..12,7..6] */
 } ArmEncodingKind;
 
 /* Struct used to define the snippet positions for each Thumb opcode */
@@ -337,7 +507,7 @@
         ArmEncodingKind kind;
         int end;   /* end for BITBLT, 1-bit slice end for FP regs */
         int start; /* start for BITBLT, 4-bit slice end for FP regs */
-    } fieldLoc[3];
+    } fieldLoc[4];
     ArmOpCode opCode;
     int flags;
     char *name;
@@ -355,7 +525,7 @@
 typedef struct ArmLIR {
     LIR generic;
     ArmOpCode opCode;
-    int operands[3];    // [0..2] = [dest, src1, src2]
+    int operands[4];    // [0..3] = [dest, src1, src2, extra]
     bool isNop;         // LIR is optimized away
     int age;            // default is 0, set lazily by the optimizer
     int size;           // 16-bit unit size (1 for thumb, 1 or 2 for thumb2)
@@ -393,4 +563,6 @@
 
 #define CHAIN_CELL_OFFSET_TAG   0xcdab
 
+ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+
 #endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMLIR_H */
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index fc740b6..144a416 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -39,9 +39,9 @@
  * fmt: for pretty-prining
  */
 #define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
-                     operands, name, fmt, size) \
-        {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}}, \
-         opcode, operands, name, fmt, size}
+                     k3, k3s, k3e, operands, name, fmt, size) \
+        {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
+                    {k3, k3s, k3e}}, opcode, operands, name, fmt, size}
 
 /* Instruction dump string format keys: !pf, where "!" is the start
  * of the key, "p" is which numeric operand to use and "f" is the
@@ -51,6 +51,7 @@
  *     0 -> operands[0] (dest)
  *     1 -> operands[1] (src1)
  *     2 -> operands[2] (src2)
+ *     3 -> operands[3] (extra)
  *
  * [f]ormats:
  *     h -> 4-digit hex
@@ -66,6 +67,7 @@
  *     s -> single precision floating point register
  *     S -> double precision floating point register
  *     m -> Thumb2 modified immediate
+ *     n -> complimented Thumb2 modified immediate
  *     M -> Thumb2 16-bit zero-extended immediate
  *
  *  [!] escape.  To insert "!", use "!!"
@@ -73,428 +75,648 @@
 /* NOTE: must be kept in sync with enum ArmOpcode from ArmLIR.h */
 ArmEncodingMap EncodingMap[ARM_LAST] = {
     ENCODING_MAP(ARM_16BIT_DATA,    0x0000,
-                 BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP,
                  "data", "0x!0h(!0d)", 1),
     ENCODING_MAP(THUMB_ADC,           0x4140,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "adc", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES | USES_CCODES,
+                 "adcs", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_ADD_RRI3,      0x1c00,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
-                 "add", "r!0d, r!1d, #!2d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "adds", "r!0d, r!1d, #!2d", 1),
     ENCODING_MAP(THUMB_ADD_RI8,       0x3000,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "add", "r!0d, r!0d, #!1d", 1),
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "adds", "r!0d, r!0d, #!1d", 1),
     ENCODING_MAP(THUMB_ADD_RRR,       0x1800,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
-                 "add", "r!0d, r!1d, r!2d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "adds", "r!0d, r!1d, r!2d", 1),
     ENCODING_MAP(THUMB_ADD_RR_LH,     0x4440,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
-                 "add",
-                 "r!0d, r!1d", 1),
+                 "add", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_ADD_RR_HL,     0x4480,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "add", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_ADD_RR_HH,     0x44c0,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "add", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_ADD_PC_REL,    0xa000,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "add", "r!0d, pc, #!1E", 1),
     ENCODING_MAP(THUMB_ADD_SP_REL,    0xa800,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "add", "r!0d, sp, #!1E", 1),
     ENCODING_MAP(THUMB_ADD_SPI7,      0xb000,
-                 BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | CLOBBER_DEST,
                  "add", "sp, #!0d*4", 1),
     ENCODING_MAP(THUMB_AND_RR,        0x4000,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "and", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "ands", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_ASR,           0x1000,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
-                 "asr", "r!0d, r!1d, #!2d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "asrs", "r!0d, r!1d, #!2d", 1),
     ENCODING_MAP(THUMB_ASRV,          0x4100,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "asr", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "asrs", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_B_COND,        0xd000,
-                 BITBLT, 7, 0, BITBLT, 11, 8, UNUSED, -1, -1,
-                 IS_BINARY_OP | IS_BRANCH,
+                 BITBLT, 7, 0, BITBLT, 11, 8, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | IS_BRANCH | USES_CCODES,
                  "!1c", "!0t", 1),
     ENCODING_MAP(THUMB_B_UNCOND,      0xe000,
-                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  NO_OPERAND | IS_BRANCH,
                  "b", "!0t", 1),
     ENCODING_MAP(THUMB_BIC,           0x4380,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "bic", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "bics", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_BKPT,          0xbe00,
-                 BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | IS_BRANCH,
                  "bkpt", "!0d", 1),
     ENCODING_MAP(THUMB_BLX_1,         0xf000,
-                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | IS_BRANCH,
                  "blx_1", "!0u", 1),
     ENCODING_MAP(THUMB_BLX_2,         0xe800,
-                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | IS_BRANCH,
                  "blx_2", "!0v", 1),
     ENCODING_MAP(THUMB_BL_1,          0xf000,
-                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | IS_BRANCH,
                  "bl_1", "!0u", 1),
     ENCODING_MAP(THUMB_BL_2,          0xf800,
-                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | IS_BRANCH,
                  "bl_2", "!0v", 1),
     ENCODING_MAP(THUMB_BLX_R,         0x4780,
-                 BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | IS_BRANCH,
                  "blx", "r!0d", 1),
     ENCODING_MAP(THUMB_BX,            0x4700,
-                 BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | IS_BRANCH,
                  "bx", "r!0d", 1),
     ENCODING_MAP(THUMB_CMN,           0x42c0,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | SETS_CCODES,
                  "cmn", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_CMP_RI8,       0x2800,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | SETS_CCODES,
                  "cmp", "r!0d, #!1d", 1),
     ENCODING_MAP(THUMB_CMP_RR,        0x4280,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | SETS_CCODES,
                  "cmp", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_CMP_LH,        0x4540,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | SETS_CCODES,
                  "cmp", "r!0d, r!1D", 1),
     ENCODING_MAP(THUMB_CMP_HL,        0x4580,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | SETS_CCODES,
                  "cmp", "r!0D, r!1d", 1),
     ENCODING_MAP(THUMB_CMP_HH,        0x45c0,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | SETS_CCODES,
                  "cmp", "r!0D, r!1D", 1),
     ENCODING_MAP(THUMB_EOR,           0x4040,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "eor", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "eors", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_LDMIA,         0xc800,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST | CLOBBER_SRC1,
                  "ldmia", "r!0d!!, <!1R>", 1),
     ENCODING_MAP(THUMB_LDR_RRI5,      0x6800,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldr", "r!0d, [r!1d, #!2E]", 1),
     ENCODING_MAP(THUMB_LDR_RRR,       0x5800,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldr", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_LDR_PC_REL,    0x4800,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldr", "r!0d, [pc, #!1E]", 1),
     ENCODING_MAP(THUMB_LDR_SP_REL,    0x9800,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldr", "r!0d, [sp, #!1E]", 1),
     ENCODING_MAP(THUMB_LDRB_RRI5,     0x7800,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldrb", "r!0d, [r!1d, #2d]", 1),
     ENCODING_MAP(THUMB_LDRB_RRR,      0x5c00,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldrb", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_LDRH_RRI5,     0x8800,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldrh", "r!0d, [r!1d, #!2F]", 1),
     ENCODING_MAP(THUMB_LDRH_RRR,      0x5a00,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldrh", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_LDRSB_RRR,     0x5600,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldrsb", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_LDRSH_RRR,     0x5e00,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldrsh", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_LSL,           0x0000,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
-                 "lsl", "r!0d, r!1d, #!2d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "lsls", "r!0d, r!1d, #!2d", 1),
     ENCODING_MAP(THUMB_LSLV,          0x4080,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "lsl", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "lsls", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_LSR,           0x0800,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
-                 "lsr", "r!0d, r!1d, #!2d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "lsrs", "r!0d, r!1d, #!2d", 1),
     ENCODING_MAP(THUMB_LSRV,          0x40c0,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "lsr", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "lsrs", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_MOV_IMM,       0x2000,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "mov", "r!0d, #!1d", 1),
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "movs", "r!0d, #!1d", 1),
     ENCODING_MAP(THUMB_MOV_RR,        0x1c00,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "mov", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "movs", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_MOV_RR_H2H,    0x46c0,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "mov", "r!0D, r!1D", 1),
     ENCODING_MAP(THUMB_MOV_RR_H2L,    0x4640,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "mov", "r!0d, r!1D", 1),
     ENCODING_MAP(THUMB_MOV_RR_L2H,    0x4680,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "mov", "r!0D, r!1d", 1),
     ENCODING_MAP(THUMB_MUL,           0x4340,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "mul", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "muls", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_MVN,           0x43c0,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "mvn", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "mvns", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_NEG,           0x4240,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "neg", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "negs", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_ORR,           0x4300,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "orr", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "orrs", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_POP,           0xbc00,
-                 BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP,
                  "pop", "<!0R>", 1),
     ENCODING_MAP(THUMB_PUSH,          0xb400,
-                 BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP,
                  "push", "<!0R>", 1),
-    ENCODING_MAP(THUMB_ROR,           0x41c0,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "ror", "r!0d, r!1d", 1),
+    ENCODING_MAP(THUMB_RORV,           0x41c0,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "rors", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_SBC,           0x4180,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "sbc", "r!0d, r!1d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | USES_CCODES | SETS_CCODES,
+                 "sbcs", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB_STMIA,         0xc000,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_SRC1,
                  "stmia", "r!0d!!, <!1R>", 1),
     ENCODING_MAP(THUMB_STR_RRI5,      0x6000,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "str", "r!0d, [r!1d, #!2E]", 1),
     ENCODING_MAP(THUMB_STR_RRR,       0x5000,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "str", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_STR_SP_REL,    0x9000,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP,
                  "str", "r!0d, [sp, #!1E]", 1),
     ENCODING_MAP(THUMB_STRB_RRI5,     0x7000,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "strb", "r!0d, [r!1d, #!2d]", 1),
     ENCODING_MAP(THUMB_STRB_RRR,      0x5400,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "strb", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_STRH_RRI5,     0x8000,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "strh", "r!0d, [r!1d, #!2F]", 1),
     ENCODING_MAP(THUMB_STRH_RRR,      0x5200,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "strh", "r!0d, [r!1d, r!2d]", 1),
     ENCODING_MAP(THUMB_SUB_RRI3,      0x1e00,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
-                 "sub", "r!0d, r!1d, #!2d]", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "subs", "r!0d, r!1d, #!2d]", 1),
     ENCODING_MAP(THUMB_SUB_RI8,       0x3800,
-                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
-                 IS_BINARY_OP | CLOBBER_DEST,
-                 "sub", "r!0d, #!1d", 1),
+                 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "subs", "r!0d, #!1d", 1),
     ENCODING_MAP(THUMB_SUB_RRR,       0x1a00,
-                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
-                 "sub", "r!0d, r!1d, r!2d", 1),
+                 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "subs", "r!0d, r!1d, r!2d", 1),
     ENCODING_MAP(THUMB_SUB_SPI7,      0xb080,
-                 BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | CLOBBER_DEST,
                  "sub", "sp, #!0d", 1),
     ENCODING_MAP(THUMB_SWI,           0xdf00,
-                 BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_UNARY_OP | IS_BRANCH,
                  "swi", "!0d", 1),
     ENCODING_MAP(THUMB_TST,           0x4200,
-                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
-                 IS_UNARY_OP,
+                 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_UNARY_OP | SETS_CCODES,
                  "tst", "r!0d, r!1d", 1),
     ENCODING_MAP(THUMB2_VLDRS,       0xed900a00,
-                 SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
+                 SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vldr", "!0s, [r!1d, #!2E]", 2),
     ENCODING_MAP(THUMB2_VLDRD,       0xed900b00,
-                 DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
+                 DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vldr", "!0S, [r!1d, #!2E]", 2),
     ENCODING_MAP(THUMB2_VMULS,        0xee200a00,
-                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
+                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vmuls", "!0s, !1s, !2s", 2),
     ENCODING_MAP(THUMB2_VMULD,        0xee200b00,
-                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
+                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vmuld", "!0S, !1S, !2S", 2),
     ENCODING_MAP(THUMB2_VSTRS,       0xed800a00,
-                 SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
+                 SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "vstr", "!0s, [r!1d, #!2E]", 2),
     ENCODING_MAP(THUMB2_VSTRD,       0xed800b00,
-                 DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
+                 DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "vstr", "!0S, [r!1d, #!2E]", 2),
     ENCODING_MAP(THUMB2_VSUBS,        0xee300a40,
-                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
+                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vsub", "!0s, !1s, !2s", 2),
     ENCODING_MAP(THUMB2_VSUBD,        0xee300b40,
-                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
+                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vsub", "!0S, !1S, !2S", 2),
     ENCODING_MAP(THUMB2_VADDS,        0xee300a00,
-                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
+                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vadd", "!0s, !1s, !2s", 2),
     ENCODING_MAP(THUMB2_VADDD,        0xee300b00,
-                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
+                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vadd", "!0S, !1S, !2S", 2),
     ENCODING_MAP(THUMB2_VDIVS,        0xee800a00,
-                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
+                 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vdivs", "!0s, !1s, !2s", 2),
     ENCODING_MAP(THUMB2_VDIVD,        0xee800b00,
-                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
+                 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "vdivs", "!0S, !1S, !2S", 2),
     ENCODING_MAP(THUMB2_VCVTIF,       0xeeb80ac0,
-                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vcvt.f32", "!0s, !1s", 2),
     ENCODING_MAP(THUMB2_VCVTID,       0xeeb80bc0,
-                 DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+                 DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vcvt.f64", "!0S, !1s", 2),
     ENCODING_MAP(THUMB2_VCVTFI,       0xeebd0ac0,
-                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vcvt.s32.f32 ", "!0s, !1s", 2),
     ENCODING_MAP(THUMB2_VCVTDI,       0xeebd0bc0,
-                 SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
+                 SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vcvt.s32.f64 ", "!0s, !1S", 2),
     ENCODING_MAP(THUMB2_VCVTFD,       0xeeb70ac0,
-                 DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+                 DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vcvt.f64.f32 ", "!0S, !1s", 2),
     ENCODING_MAP(THUMB2_VCVTDF,       0xeeb70bc0,
-                 SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
+                 SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vcvt.f32.f64 ", "!0s, !1S", 2),
     ENCODING_MAP(THUMB2_VSQRTS,       0xeeb10ac0,
-                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vsqrt.f32 ", "!0s, !1s", 2),
     ENCODING_MAP(THUMB2_VSQRTD,       0xeeb10bc0,
-                 DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
+                 DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vsqrt.f64 ", "!0S, !1S", 2),
-    ENCODING_MAP(THUMB2_MOV_IMM_SHIFT,       0xf04f0000,
-                 BITBLT, 11, 8, MODIMM, -1, -1, UNUSED, -1, -1,
+    ENCODING_MAP(THUMB2_MOV_IMM_SHIFT, 0xf04f0000, /* no setflags encoding */
+                 BITBLT, 11, 8, MODIMM, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "mov", "r!0d, #!1m", 2),
     ENCODING_MAP(THUMB2_MOV_IMM16,       0xf2400000,
-                 BITBLT, 11, 8, IMM16, -1, -1, UNUSED, -1, -1,
+                 BITBLT, 11, 8, IMM16, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "mov", "r!0d, #!1M", 2),
     ENCODING_MAP(THUMB2_STR_RRI12,       0xf8c00000,
-                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "str", "r!0d,[r!1d, #!2d", 2),
     ENCODING_MAP(THUMB2_LDR_RRI12,       0xf8d00000,
-                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldr", "r!0d,[r!1d, #!2d", 2),
     ENCODING_MAP(THUMB2_STR_RRI8_PREDEC,       0xf8400c00,
-                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP,
                  "str", "r!0d,[r!1d, #-!2d]", 2),
     ENCODING_MAP(THUMB2_LDR_RRI8_PREDEC,       0xf8500c00,
-                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldr", "r!0d,[r!1d, #-!2d]", 2),
     ENCODING_MAP(THUMB2_CBNZ,       0xb900,
-                 BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP,  /* Note: does not affect flags */
                  "cbnz", "r!0d,!1t", 1),
     ENCODING_MAP(THUMB2_CBZ,       0xb100,
-                 BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
-                 IS_BINARY_OP,
+                 BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP, /* Note: does not affect flags */
                  "cbz", "r!0d,!1t", 1),
-    ENCODING_MAP(THUMB2_ADD_RRI12,       0xf1000000,
-                 BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1,
-                 IS_TERTIARY_OP | CLOBBER_DEST,
+    ENCODING_MAP(THUMB2_ADD_RRI12,       0xf2000000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,/* Note: doesn't affect flags */
                  "add", "r!0d,r!1d,#!2d", 2),
-    ENCODING_MAP(THUMB2_MOV_RR,       0xea4f0000,
-                 BITBLT, 11, 8, BITBLT, 3, 0, UNUSED, -1, -1,
+    ENCODING_MAP(THUMB2_MOV_RR,       0xea4f0000, /* no setflags encoding */
+                 BITBLT, 11, 8, BITBLT, 3, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "mov", "r!0d, r!1d", 2),
     ENCODING_MAP(THUMB2_VMOVS,       0xeeb00a40,
-                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vmov.f32 ", "!0s, !1s", 2),
     ENCODING_MAP(THUMB2_VMOVD,       0xeeb00b40,
-                 DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
+                 DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
                  "vmov.f64 ", "!0s, !1s", 2),
+    ENCODING_MAP(THUMB2_LDMIA,         0xe8900000,
+                 BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | CLOBBER_SRC1,
+                 "ldmia", "r!0d!!, <!1R>", 2),
+    ENCODING_MAP(THUMB2_STMIA,         0xe8800000,
+                 BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_SRC1,
+                 "stmia", "r!0d!!, <!1R>", 2),
+    ENCODING_MAP(THUMB2_ADD_RRR,  0xeb100000, /* setflags encoding */
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST | SETS_CCODES,
+                 "adds", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_SUB_RRR,       0xebb00000, /* setflags enconding */
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST | SETS_CCODES,
+                 "subs", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_SBC_RRR,       0xeb700000, /* setflags encoding */
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST | USES_CCODES | SETS_CCODES,
+                 "sbcs", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_CMP_RR,       0xebb00f00,
+                 BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | SETS_CCODES,
+                 "cmp", "r!0d, r!1d", 2),
+    ENCODING_MAP(THUMB2_SUB_RRI12,       0xf2a00000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,/* Note: doesn't affect flags */
+                 "sub", "r!0d,r!1d,#!2d", 2),
+    ENCODING_MAP(THUMB2_MVN_IMM_SHIFT,  0xf06f0000, /* no setflags encoding */
+                 BITBLT, 11, 8, MODIMM, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST,
+                 "mvn", "r!0d, #!1n", 2),
+    ENCODING_MAP(THUMB2_SEL,       0xfaa0f080,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | USES_CCODES,
+                 "sel", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_UBFX,       0xf3c00000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, LSB, -1, -1, BWIDTH, 4, 0,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "ubfx", "r!0d, r!1d, #!2d, #!3d", 2),
+    ENCODING_MAP(THUMB2_SBFX,       0xf3400000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, LSB, -1, -1, BWIDTH, 4, 0,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "sbfx", "r!0d, r!1d, #!2d, #!3d", 2),
+    ENCODING_MAP(THUMB2_LDR_RRR,    0xf8500000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "ldr", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_LDRH_RRR,    0xf8300000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "ldrh", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_LDRSH_RRR,    0xf9300000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "ldrsh", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_LDRB_RRR,    0xf8100000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "ldrb", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_LDRSB_RRR,    0xf9100000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "ldrsb", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_STR_RRR,    0xf8400000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "str", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_STRH_RRR,    0xf8200000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "strh", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_STRB_RRR,    0xf8000000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 3, 0, BITBLT, 5, 4,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "strb", "r!0d,[r!1d, r!2d, LSL #!3d]", 2),
+    ENCODING_MAP(THUMB2_LDRH_RRI12,       0xf8b00000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "ldrh", "r!0d,[r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_LDRSH_RRI12,       0xf9b00000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "ldrsh", "r!0d,[r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_LDRB_RRI12,       0xf8900000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "ldrb", "r!0d,[r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_LDRSB_RRI12,       0xf9900000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "ldrsb", "r!0d,[r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_STRH_RRI12,       0xf8a00000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "strh", "r!0d,[r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_STRB_RRI12,       0xf8800000,
+                 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "strb", "r!0d,[r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_POP,           0xe8bd0000,
+                 BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_UNARY_OP,
+                 "pop", "<!0R>", 2),
+    ENCODING_MAP(THUMB2_PUSH,          0xe8ad0000,
+                 BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_UNARY_OP,
+                 "push", "<!0R>", 2),
+    ENCODING_MAP(THUMB2_CMP_RI8, 0xf1b00f00,
+                 BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "cmp", "r!0d, #!1m", 2),
+    ENCODING_MAP(THUMB2_ADC_RRR,  0xeb500000, /* setflags encoding */
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST | SETS_CCODES,
+                 "acds", "r!0d, r!1d, r!2d, shift !3d", 2),
+    ENCODING_MAP(THUMB2_AND_RRR,  0xea000000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "and", "r!0d, r!1d, r!2d, shift !3d", 2),
+    ENCODING_MAP(THUMB2_BIC_RRR,  0xea200000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "bic", "r!0d, r!1d, r!2d, shift !3d", 2),
+    ENCODING_MAP(THUMB2_CMN_RR,  0xeb000000,
+                 BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "cmn", "r!0d, r!1d, shift !2d", 2),
+    ENCODING_MAP(THUMB2_EOR_RRR,  0xea800000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "eor", "r!0d, r!1d, r!2d, shift !3d", 2),
+    ENCODING_MAP(THUMB2_MUL_RRR,  0xfb00f000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "mul", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_MVN_RR,  0xea6f0000,
+                 BITBLT, 11, 8, BITBLT, 3, 0, SHIFT, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "mvn", "r!0d, r!1d, shift !2d", 2),
+    ENCODING_MAP(THUMB2_RSUB_RRI8,       0xf1d00000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "rsb", "r!0d,r!1d,#!2m", 2),
+    ENCODING_MAP(THUMB2_NEG_RR,       0xf1d00000, /* instance of rsub */
+                 BITBLT, 11, 8, BITBLT, 19, 16, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "neg", "r!0d,r!1d", 2),
+    ENCODING_MAP(THUMB2_ORR_RRR,  0xea400000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
+                 IS_QUAD_OP | CLOBBER_DEST,
+                 "orr", "r!0d, r!1d, r!2d, shift !3d", 2),
+    ENCODING_MAP(THUMB2_TST_RR,       0xea100f00,
+                 BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | SETS_CCODES,
+                 "tst", "r!0d, r!1d, shift !2d", 2),
+    ENCODING_MAP(THUMB2_LSLV_RRR,  0xfa00f000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "lsl", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_LSRV_RRR,  0xfa20f000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "lsr", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_ASRV_RRR,  0xfa40f000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "asr", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_RORV_RRR,  0xfa60f000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "ror", "r!0d, r!1d, r!2d", 2),
+    ENCODING_MAP(THUMB2_LSL_RRI5,  0xea4f0000,
+                 BITBLT, 11, 8, BITBLT, 3, 0, SHIFT5, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "lsl", "r!0d, r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_LSR_RRI5,  0xea4f0010,
+                 BITBLT, 11, 8, BITBLT, 3, 0, SHIFT5, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "lsr", "r!0d, r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_ASR_RRI5,  0xea4f0020,
+                 BITBLT, 11, 8, BITBLT, 3, 0, SHIFT5, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "asr", "r!0d, r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_ROR_RRI5,  0xea4f0030,
+                 BITBLT, 11, 8, BITBLT, 3, 0, SHIFT5, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "ror", "r!0d, r!1d, #!2d", 2),
+    ENCODING_MAP(THUMB2_BIC_RRI8,  0xf0200000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "bic", "r!0d, r!1d, #!2m", 2),
+    ENCODING_MAP(THUMB2_AND_RRI8,  0xf0000000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "and", "r!0d, r!1d, #!2m", 2),
+    ENCODING_MAP(THUMB2_ORR_RRI8,  0xf0400000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "orr", "r!0d, r!1d, #!2m", 2),
+    ENCODING_MAP(THUMB2_EOR_RRI8,  0xf0800000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "eor", "r!0d, r!1d, #!2m", 2),
+    ENCODING_MAP(THUMB2_ADD_RRI8,  0xf1100000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "adds", "r!0d, r!1d, #!2m", 2),
+    ENCODING_MAP(THUMB2_ADC_RRI8,  0xf1500000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES | USES_CCODES,
+                 "adcs", "r!0d, r!1d, #!2m", 2),
+    ENCODING_MAP(THUMB2_SUB_RRI8,  0xf1b00000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES,
+                 "subs", "r!0d, r!1d, #!2m", 2),
+    ENCODING_MAP(THUMB2_SBC_RRI8,  0xf1700000,
+                 BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES | USES_CCODES,
+                 "sbcs", "r!0d, r!1d, #!2m", 2),
 };
 
+
 #define PADDING_MOV_R0_R0               0x1C00
 
 /* Write the numbers in the literal pool to the codegen stream */
@@ -606,11 +828,30 @@
         ArmEncodingMap *encoder = &EncodingMap[lir->opCode];
         u4 bits = encoder->skeleton;
         int i;
-        for (i = 0; i < 3; i++) {
+        for (i = 0; i < 4; i++) {
             u4 value;
             switch(encoder->fieldLoc[i].kind) {
                 case UNUSED:
                     break;
+                case SHIFT5:
+                    value = ((lir->operands[i] & 0x1c) >> 2) << 12;
+                    value |= (lir->operands[i] & 0x03) << 6;
+                    bits |= value;
+                    break;
+                case SHIFT:
+                    value = ((lir->operands[i] & 0x70) >> 4) << 12;
+                    value |= (lir->operands[i] & 0x0f) << 4;
+                    bits |= value;
+                    break;
+                case BWIDTH:
+                    value = lir->operands[i] - 1;
+                    bits |= value;
+                    break;
+                case LSB:
+                    value = ((lir->operands[i] & 0x1c) >> 2) << 12;
+                    value |= (lir->operands[i] & 0x03) << 6;
+                    bits |= value;
+                    break;
                 case IMM6:
                     value = ((lir->operands[i] & 0x20) >> 5) << 9;
                     value |= (lir->operands[i] & 0x1f) << 3;
diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c
index 83b9b24..ff6a3a6 100644
--- a/vm/compiler/codegen/arm/Codegen.c
+++ b/vm/compiler/codegen/arm/Codegen.c
@@ -481,10 +481,8 @@
 }
 #endif
 
-/*****************************************************************************/
-
 /*
- * The following are building blocks to construct low-level IRs with 0 - 3
+ * The following are building blocks to construct low-level IRs with 0 - 4
  * operands.
  */
 static ArmLIR *newLIR0(CompilationUnit *cUnit, ArmOpCode opCode)
@@ -534,18 +532,21 @@
     return insn;
 }
 
-static ArmLIR *newLIR23(CompilationUnit *cUnit, ArmOpCode opCode,
-                            int srcdest, int src2)
+static ArmLIR *newLIR4(CompilationUnit *cUnit, ArmOpCode opCode,
+                           int dest, int src1, int src2, int info)
 {
-    assert(!isPseudoOpCode(opCode));
-    if (EncodingMap[opCode].flags & IS_BINARY_OP)
-        return newLIR2(cUnit, opCode, srcdest, src2);
-    else
-        return newLIR3(cUnit, opCode, srcdest, srcdest, src2);
+    ArmLIR *insn = dvmCompilerNew(sizeof(ArmLIR), true);
+    assert(isPseudoOpCode(opCode) ||
+           (EncodingMap[opCode].flags & IS_QUAD_OP));
+    insn->opCode = opCode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    insn->operands[2] = src2;
+    insn->operands[3] = info;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
 }
 
-/*****************************************************************************/
-
 /*
  * The following are building blocks to insert constants into the pool or
  * instruction streams.
@@ -607,34 +608,6 @@
     branch->generic.target = (LIR *) pcrLabel;
 }
 
-/*
- * Perform a binary operation on 64-bit operands and leave the results in the
- * r0/r1 pair.
- */
-static void genBinaryOpWide(CompilationUnit *cUnit, int vDest,
-                            ArmOpCode preinst, ArmOpCode inst,
-                            int reg0, int reg2)
-{
-    int reg1 = NEXT_REG(reg0);
-    int reg3 = NEXT_REG(reg2);
-    newLIR23(cUnit, preinst, reg0, reg2);
-    newLIR23(cUnit, inst, reg1, reg3);
-    storeValuePair(cUnit, reg0, reg1, vDest, reg2);
-}
-
-/* Perform a binary operation on 32-bit operands and leave the results in r0. */
-static void genBinaryOp(CompilationUnit *cUnit, int vDest, ArmOpCode inst,
-                        int reg0, int reg1, int regDest)
-{
-    if (EncodingMap[inst].flags & IS_BINARY_OP) {
-        newLIR2(cUnit, inst, reg0, reg1);
-        storeValue(cUnit, reg0, vDest, reg1);
-    } else {
-        newLIR3(cUnit, inst, regDest, reg0, reg1);
-        storeValue(cUnit, regDest, vDest, reg1);
-    }
-}
-
 /* Create the PC reconstruction slot if not already done */
 static inline ArmLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset,
                                          ArmLIR *branch,
@@ -660,14 +633,16 @@
  * Perform a "reg cmp reg" operation and jump to the PCR region if condition
  * satisfies.
  */
-static inline ArmLIR *inertRegRegCheck(CompilationUnit *cUnit,
+static inline ArmLIR *insertRegRegCheck(CompilationUnit *cUnit,
                                            ArmConditionCode cond,
                                            int reg1, int reg2, int dOffset,
                                            ArmLIR *pcrLabel)
 {
-    newLIR2(cUnit, THUMB_CMP_RR, reg1, reg2);
-    ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond);
-    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+    ArmLIR *res;
+    res = opRegReg(cUnit, OP_CMP, reg1, reg2);
+    ArmLIR *branch = opImmImm(cUnit, OP_COND_BR, 0, cond);
+    genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+    return res;
 }
 
 /*
@@ -700,7 +675,7 @@
 static ArmLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex,
                                   int rBound, int dOffset, ArmLIR *pcrLabel)
 {
-    return inertRegRegCheck(cUnit, ARM_COND_CS, rIndex, rBound, dOffset,
+    return insertRegRegCheck(cUnit, ARM_COND_CS, rIndex, rBound, dOffset,
                             pcrLabel);
 }
 
@@ -708,7 +683,7 @@
 static inline ArmLIR *genTrap(CompilationUnit *cUnit, int dOffset,
                                   ArmLIR *pcrLabel)
 {
-    ArmLIR *branch = newLIR0(cUnit, THUMB_B_UNCOND);
+    ArmLIR *branch = opNone(cUnit, OP_UNCOND_BR);
     return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
 }
 
@@ -727,23 +702,21 @@
     /*
      * Ping reg0 to the first register of the alternate register pair
      */
-    reg0 = (reg2 + 2) & 0x2;
+    reg0 = (reg2 + 2) & 0xa;
     reg1 = NEXT_REG(reg0);
 
     loadValue(cUnit, dInsn->vB, reg2);
     loadConstant(cUnit, reg3, fieldOffset);
     genNullCheck(cUnit, dInsn->vB, reg2, mir->offset, NULL); /* null object? */
-    newLIR3(cUnit, THUMB_ADD_RRR, reg2, reg2, reg3);
+    opRegReg(cUnit, OP_ADD, reg2, reg3);
 #if !defined(WITH_SELF_VERIFICATION)
-    newLIR2(cUnit, THUMB_LDMIA, reg2, (1<<reg0 | 1<<reg1));
-    storeValuePair(cUnit, reg0, reg1, dInsn->vA, reg3);
+    loadMultiple(cUnit, reg2, (1<<reg0 | 1<<reg1));
 #else
     int regMap = reg1 << 8 | reg0 << 4 | reg2;
     selfVerificationMemOpWrapper(cUnit, regMap,
         &selfVerificationLoadDoubleword);
-
-    storeValuePair(cUnit, reg0, reg1, dInsn->vA, reg3);
 #endif
+    storeValuePair(cUnit, reg0, reg1, dInsn->vA, reg3);
 }
 
 /* Store a wide field to an object instance */
@@ -761,7 +734,7 @@
     /*
      * Ping reg0 to the first register of the alternate register pair
      */
-    reg0 = (reg2 + 2) & 0x2;
+    reg0 = (reg2 + 2) & 0xa;
     reg1 = NEXT_REG(reg0);
 
 
@@ -770,9 +743,9 @@
     updateLiveRegisterPair(cUnit, dInsn->vA, reg0, reg1);
     loadConstant(cUnit, reg3, fieldOffset);
     genNullCheck(cUnit, dInsn->vB, reg2, mir->offset, NULL); /* null object? */
-    newLIR3(cUnit, THUMB_ADD_RRR, reg2, reg2, reg3);
+    opRegReg(cUnit, OP_ADD, reg2, reg3);
 #if !defined(WITH_SELF_VERIFICATION)
-    newLIR2(cUnit, THUMB_STMIA, reg2, (1<<reg0 | 1<<reg1));
+    storeMultiple(cUnit, reg2, (1<<reg0 | 1<<reg1));
 #else
     int regMap = reg1 << 8 | reg0 << 4 | reg2;
     selfVerificationMemOpWrapper(cUnit, regMap,
@@ -783,14 +756,8 @@
 /*
  * Load a field from an object instance
  *
- * Inst should be one of:
- *      THUMB_LDR_RRR
- *      THUMB_LDRB_RRR
- *      THUMB_LDRH_RRR
- *      THUMB_LDRSB_RRR
- *      THUMB_LDRSH_RRR
  */
-static void genIGet(CompilationUnit *cUnit, MIR *mir, ArmOpCode inst,
+static void genIGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
                     int fieldOffset)
 {
     DecodedInstruction *dInsn = &mir->dalvikInsn;
@@ -798,33 +765,26 @@
 
     reg0 = selectFirstRegister(cUnit, dInsn->vB, false);
     reg1 = NEXT_REG(reg0);
-    /* TUNING: write a utility routine to load via base + constant offset */
     loadValue(cUnit, dInsn->vB, reg0);
-    loadConstant(cUnit, reg1, fieldOffset);
-    genNullCheck(cUnit, dInsn->vB, reg0, mir->offset, NULL); /* null object? */
 #if !defined(WITH_SELF_VERIFICATION)
-    newLIR3(cUnit, inst, reg0, reg0, reg1);
-    storeValue(cUnit, reg0, dInsn->vA, reg1);
+    loadBaseDisp(cUnit, mir, reg0, fieldOffset, reg1, size, true, dInsn->vB);
 #else
+    genNullCheck(cUnit, dInsn->vB, reg0, mir->offset, NULL); /* null object? */
     /* Combine address and offset */
-    newLIR3(cUnit, THUMB_ADD_RRR, reg0, reg0, reg1);
+    loadConstant(cUnit, reg1, fieldOffset);
+    opRegReg(cUnit, OP_ADD, reg0, reg1);
 
-    int regMap = reg0 << 4 | reg0;
+    int regMap = reg1 << 4 | reg0;
     selfVerificationMemOpWrapper(cUnit, regMap, &selfVerificationLoad);
-
-    storeValue(cUnit, reg0, dInsn->vA, reg1);
 #endif
+    storeValue(cUnit, reg1, dInsn->vA, reg0);
 }
 
 /*
  * Store a field to an object instance
  *
- * Inst should be one of:
- *      THUMB_STR_RRR
- *      THUMB_STRB_RRR
- *      THUMB_STRH_RRR
  */
-static void genIPut(CompilationUnit *cUnit, MIR *mir, ArmOpCode inst,
+static void genIPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
                     int fieldOffset)
 {
     DecodedInstruction *dInsn = &mir->dalvikInsn;
@@ -834,39 +794,30 @@
     reg1 = NEXT_REG(reg0);
     reg2 = NEXT_REG(reg1);
 
-    /* TUNING: write a utility routine to load via base + constant offset */
     loadValue(cUnit, dInsn->vB, reg0);
-    loadConstant(cUnit, reg1, fieldOffset);
     loadValue(cUnit, dInsn->vA, reg2);
     updateLiveRegister(cUnit, dInsn->vA, reg2);
     genNullCheck(cUnit, dInsn->vB, reg0, mir->offset, NULL); /* null object? */
 #if !defined(WITH_SELF_VERIFICATION)
-    newLIR3(cUnit, inst, reg2, reg0, reg1);
+    storeBaseDisp(cUnit, reg0, fieldOffset, reg2, size, reg1);
 #else
     /* Combine address and offset */
-    newLIR3(cUnit, THUMB_ADD_RRR, reg0, reg0, reg1);
+    loadConstant(cUnit, reg1, fieldOffset);
+    opRegReg(cUnit, OP_ADD, reg0, reg1);
 
     int regMap = reg2 << 4 | reg0;
     selfVerificationMemOpWrapper(cUnit, regMap, &selfVerificationStore);
 
-    newLIR3(cUnit, THUMB_SUB_RRR, reg0, reg0, reg1);
+    opRegReg(cUnit, OP_SUB, reg0, reg1);
 #endif
 }
 
 
-/* TODO: This should probably be done as an out-of-line instruction handler. */
-
 /*
  * Generate array load
  *
- * Inst should be one of:
- *      THUMB_LDR_RRR
- *      THUMB_LDRB_RRR
- *      THUMB_LDRH_RRR
- *      THUMB_LDRSB_RRR
- *      THUMB_LDRSH_RRR
  */
-static void genArrayGet(CompilationUnit *cUnit, MIR *mir, ArmOpCode inst,
+static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
                         int vArray, int vIndex, int vDest, int scale)
 {
     int lenOffset = offsetof(ArrayObject, length);
@@ -882,77 +833,71 @@
     loadValue(cUnit, vIndex, reg3);
 
     /* null object? */
-    ArmLIR * pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset,
-                                         NULL);
-    newLIR3(cUnit, THUMB_LDR_RRI5, reg0, reg2, lenOffset >> 2);  /* Get len */
-    newLIR2(cUnit, THUMB_ADD_RI8, reg2, dataOffset); /* reg2 -> array data */
+    ArmLIR * pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset, NULL);
+    loadWordDisp(cUnit, reg2, lenOffset, reg0);  /* Get len */
+    opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone); /* reg2 -> array data */
     genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
-    if (scale) {
-        newLIR3(cUnit, THUMB_LSL, reg3, reg3, scale);
-    }
 #if !defined(WITH_SELF_VERIFICATION)
-    if (scale==3) {
-        newLIR3(cUnit, inst, reg0, reg2, reg3);
-        newLIR2(cUnit, THUMB_ADD_RI8, reg2, 4);
-        newLIR3(cUnit, inst, reg1, reg2, reg3);
+    if ((size == LONG) || (size == DOUBLE)) {
+        //TUNING: redo.  Make specific wide routine, perhaps use ldmia/fp regs
+        opRegRegImm(cUnit, OP_LSL, reg3, reg3, scale, rNone);
+        loadBaseIndexed(cUnit, reg2, reg3, reg0, 0, WORD);
+        opRegImm(cUnit, OP_ADD, reg2, 4, rNone);
+        loadBaseIndexed(cUnit, reg2, reg3, reg1, 0, WORD);
         storeValuePair(cUnit, reg0, reg1, vDest, reg3);
     } else {
-        newLIR3(cUnit, inst, reg0, reg2, reg3);
+        loadBaseIndexed(cUnit, reg2, reg3, reg0, scale, size);
         storeValue(cUnit, reg0, vDest, reg3);
     }
 #else
-    void* funct;
-    switch (scale) {
-        case 0:
-            if (inst == THUMB_LDRSB_RRR)
-                funct = (void*) &selfVerificationLoadSignedByte;
-            else
-                funct = (void*) &selfVerificationLoadByte;
-            break;
-        case 1:
-            if (inst == THUMB_LDRSH_RRR)
-                funct = (void*) &selfVerificationLoadSignedHalfword;
-            else
-                funct = (void*) &selfVerificationLoadHalfword;
-            break;
-        case 2:
-            funct = (void*) &selfVerificationLoad;
-            break;
-        case 3:
+    //TODO: probably want to move this into loadBaseIndexed
+    void *funct = NULL;
+    switch(size) {
+        case LONG:
+        case DOUBLE:
             funct = (void*) &selfVerificationLoadDoubleword;
             break;
-        default:
-            LOGE("ERROR: bad scale value in genArrayGet: %d", scale);
+        case WORD:
             funct = (void*) &selfVerificationLoad;
             break;
+        case UNSIGNED_HALF:
+            funct = (void*) &selfVerificationLoadHalfword;
+            break;
+        case SIGNED_HALF:
+            funct = (void*) &selfVerificationLoadSignedHalfword;
+            break;
+        case UNSIGNED_BYTE:
+            funct = (void*) &selfVerificationLoadByte;
+            break;
+        case SIGNED_BYTE:
+            funct = (void*) &selfVerificationLoadSignedByte;
+            break;
+        default:
+            assert(0);
+            dvmAbort();
     }
-
-    /* Combine address and offset */
-    newLIR3(cUnit, THUMB_ADD_RRR, reg2, reg2, reg3);
+    /* Combine address and index */
+    if (scale)
+        opRegRegImm(cUnit, OP_LSL, reg3, reg3, scale, rNone);
+    opRegReg(cUnit, OP_ADD, reg2, reg3);
 
     int regMap = reg1 << 8 | reg0 << 4 | reg2;
     selfVerificationMemOpWrapper(cUnit, regMap, funct);
 
-    newLIR3(cUnit, THUMB_SUB_RRR, reg2, reg2, reg3);
+    opRegReg(cUnit, OP_SUB, reg2, reg3);
 
-    if (scale==3)
+    if ((size == LONG) || (size == DOUBLE))
         storeValuePair(cUnit, reg0, reg1, vDest, reg3);
     else
         storeValue(cUnit, reg0, vDest, reg3);
 #endif
 }
 
-/* TODO: This should probably be done as an out-of-line instruction handler. */
-
 /*
  * Generate array store
  *
- * Inst should be one of:
- *      THUMB_STR_RRR
- *      THUMB_STRB_RRR
- *      THUMB_STRH_RRR
  */
-static void genArrayPut(CompilationUnit *cUnit, MIR *mir, ArmOpCode inst,
+static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
                         int vArray, int vIndex, int vSrc, int scale)
 {
     int lenOffset = offsetof(ArrayObject, length);
@@ -970,60 +915,65 @@
     /* null object? */
     ArmLIR * pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset,
                                          NULL);
-    newLIR3(cUnit, THUMB_LDR_RRI5, reg0, reg2, lenOffset >> 2);  /* Get len */
-    newLIR2(cUnit, THUMB_ADD_RI8, reg2, dataOffset); /* reg2 -> array data */
+    loadWordDisp(cUnit, reg2, lenOffset, reg0);  /* Get len */
+    opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone); /* reg2 -> array data */
     genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
     /* at this point, reg2 points to array, reg3 is unscaled index */
-    if (scale==3) {
+#if !defined(WITH_SELF_VERIFICATION)
+    if ((size == LONG) || (size == DOUBLE)) {
+        //TUNING: redo.  Make specific wide routine, perhaps use ldmia/fp regs
+        loadValuePair(cUnit, vSrc, reg0, reg1);
+        updateLiveRegisterPair(cUnit, vSrc, reg0, reg1);
+        if (scale)
+            opRegRegImm(cUnit, OP_LSL, reg3, reg3, scale, rNone);
+        storeBaseIndexed(cUnit, reg2, reg3, reg0, 0, WORD);
+        opRegImm(cUnit, OP_ADD, reg2, 4, rNone);
+        storeBaseIndexed(cUnit, reg2, reg3, reg1, 0, WORD);
+    } else {
+        loadValue(cUnit, vSrc, reg0);
+        updateLiveRegister(cUnit, vSrc, reg0);
+        storeBaseIndexed(cUnit, reg2, reg3, reg0, scale, size);
+    }
+#else
+    //TODO: probably want to move this into storeBaseIndexed
+    void *funct = NULL;
+    switch(size) {
+        case LONG:
+        case DOUBLE:
+            funct = (void*) &selfVerificationStoreDoubleword;
+            break;
+        case WORD:
+            funct = (void*) &selfVerificationStore;
+            break;
+        case SIGNED_HALF:
+        case UNSIGNED_HALF:
+            funct = (void*) &selfVerificationStoreHalfword;
+            break;
+        case SIGNED_BYTE:
+        case UNSIGNED_BYTE:
+            funct = (void*) &selfVerificationStoreByte;
+            break;
+        default:
+            assert(0);
+            dvmAbort();
+    }
+
+    /* Combine address and index */
+    if ((size == LONG) || (size == DOUBLE)) {
         loadValuePair(cUnit, vSrc, reg0, reg1);
         updateLiveRegisterPair(cUnit, vSrc, reg0, reg1);
     } else {
         loadValue(cUnit, vSrc, reg0);
         updateLiveRegister(cUnit, vSrc, reg0);
     }
-    if (scale) {
-        newLIR3(cUnit, THUMB_LSL, reg3, reg3, scale);
-    }
-    /*
-     * at this point, reg2 points to array, reg3 is scaled index, and
-     * reg0[reg1] is data
-     */
-#if !defined(WITH_SELF_VERIFICATION)
-    if (scale==3) {
-        newLIR3(cUnit, inst, reg0, reg2, reg3);
-        newLIR2(cUnit, THUMB_ADD_RI8, reg2, 4);
-        newLIR3(cUnit, inst, reg1, reg2, reg3);
-    } else {
-        newLIR3(cUnit, inst, reg0, reg2, reg3);
-    }
-#else
-    void *funct;
-    switch (scale) {
-        case 0:
-            funct = (void*) &selfVerificationStoreByte;
-            break;
-        case 1:
-            funct = (void*) &selfVerificationStoreHalfword;
-            break;
-        case 2:
-            funct = (void*) &selfVerificationStore;
-            break;
-        case 3:
-            funct = (void*) &selfVerificationStoreDoubleword;
-            break;
-        default:
-            LOGE("ERROR: bad scale value in genArrayPut: %d", scale);
-            funct = (void*) &selfVerificationStore;
-            break;
-    }
-
-    /* Combine address and offset */
-    newLIR3(cUnit, THUMB_ADD_RRR, reg2, reg2, reg3);
+    if (scale)
+        opRegRegImm(cUnit, OP_LSL, reg3, reg3, scale, rNone);
+    opRegReg(cUnit, OP_ADD, reg2, reg3);
 
     int regMap = reg1 << 8 | reg0 << 4 | reg2;
     selfVerificationMemOpWrapper(cUnit, regMap, funct);
 
-    newLIR3(cUnit, THUMB_SUB_RRR, reg2, reg2, reg3);
+    opRegReg(cUnit, OP_SUB, reg2, reg3);
 #endif
 }
 
@@ -1098,8 +1048,7 @@
             break;
         case OP_NEG_FLOAT: {
             loadValue(cUnit, vSrc2, reg0);
-            loadConstant(cUnit, reg1, 0x80000000);
-            newLIR3(cUnit, THUMB_ADD_RRR, reg0, reg0, reg1);
+            opRegImm(cUnit, OP_ADD, reg0, 0x80000000, reg1);
             storeValue(cUnit, reg0, vDest, reg1);
             return false;
         }
@@ -1109,7 +1058,7 @@
     loadConstant(cUnit, r2, (int)funct);
     loadValue(cUnit, vSrc1, r0);
     loadValue(cUnit, vSrc2, r1);
-    newLIR1(cUnit, THUMB_BLX_R, r2);
+    opReg(cUnit, OP_BLX, r2);
     storeValue(cUnit, r0, vDest, r1);
     return false;
 }
@@ -1154,8 +1103,7 @@
             break;
         case OP_NEG_DOUBLE: {
             loadValuePair(cUnit, vSrc2, reg0, reg1);
-            loadConstant(cUnit, reg2, 0x80000000);
-            newLIR3(cUnit, THUMB_ADD_RRR, reg1, reg1, reg2);
+            opRegImm(cUnit, OP_ADD, reg1, 0x80000000, reg2);
             storeValuePair(cUnit, reg0, reg1, vDest, reg2);
             return false;
         }
@@ -1169,7 +1117,7 @@
     loadConstant(cUnit, r4PC, (int)funct);
     loadValuePair(cUnit, vSrc1, r0, r1);
     loadValuePair(cUnit, vSrc2, r2, r3);
-    newLIR1(cUnit, THUMB_BLX_R, r4PC);
+    opReg(cUnit, OP_BLX, r4PC);
     storeValuePair(cUnit, r0, r1, vDest, r2);
     return false;
 }
@@ -1177,8 +1125,8 @@
 static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, int vDest,
                            int vSrc1, int vSrc2)
 {
-    int firstOp = THUMB_BKPT;
-    int secondOp = THUMB_BKPT;
+    OpKind firstOp = OP_BKPT;
+    OpKind secondOp = OP_BKPT;
     bool callOut = false;
     void *callTgt;
     int retReg = r0;
@@ -1188,18 +1136,18 @@
 
     switch (mir->dalvikInsn.opCode) {
         case OP_NOT_LONG:
-            firstOp = THUMB_MVN;
-            secondOp = THUMB_MVN;
+            firstOp = OP_MVN;
+            secondOp = OP_MVN;
             break;
         case OP_ADD_LONG:
         case OP_ADD_LONG_2ADDR:
-            firstOp = THUMB_ADD_RRR;
-            secondOp = THUMB_ADC;
+            firstOp = OP_ADD;
+            secondOp = OP_ADC;
             break;
         case OP_SUB_LONG:
         case OP_SUB_LONG_2ADDR:
-            firstOp = THUMB_SUB_RRR;
-            secondOp = THUMB_SBC;
+            firstOp = OP_SUB;
+            secondOp = OP_SBC;
             break;
         case OP_MUL_LONG:
         case OP_MUL_LONG_2ADDR:
@@ -1224,18 +1172,18 @@
             break;
         case OP_AND_LONG:
         case OP_AND_LONG_2ADDR:
-            firstOp = THUMB_AND_RR;
-            secondOp = THUMB_AND_RR;
+            firstOp = OP_AND;
+            secondOp = OP_AND;
             break;
         case OP_OR_LONG:
         case OP_OR_LONG_2ADDR:
-            firstOp = THUMB_ORR;
-            secondOp = THUMB_ORR;
+            firstOp = OP_OR;
+            secondOp = OP_OR;
             break;
         case OP_XOR_LONG:
         case OP_XOR_LONG_2ADDR:
-            firstOp = THUMB_EOR;
-            secondOp = THUMB_EOR;
+            firstOp = OP_XOR;
+            secondOp = OP_XOR;
             break;
         case OP_NEG_LONG: {
             reg0 = selectFirstRegister(cUnit, vSrc2, true);
@@ -1245,8 +1193,8 @@
 
             loadValuePair(cUnit, vSrc2, reg0, reg1);
             loadConstant(cUnit, reg3, 0);
-            newLIR3(cUnit, THUMB_SUB_RRR, reg2, reg3, reg0);
-            newLIR2(cUnit, THUMB_SBC, reg3, reg1);
+            opRegRegReg(cUnit, OP_SUB, reg2, reg3, reg0);
+            opRegReg(cUnit, OP_SBC, reg3, reg1);
             storeValuePair(cUnit, reg2, reg3, vDest, reg0);
             return false;
         }
@@ -1262,16 +1210,18 @@
 
         loadValuePair(cUnit, vSrc1, reg0, reg1);
         loadValuePair(cUnit, vSrc2, reg2, reg3);
-        genBinaryOpWide(cUnit, vDest, firstOp, secondOp, reg0, reg2);
+        opRegReg(cUnit, firstOp, reg0, reg2);
+        opRegReg(cUnit, secondOp, reg1, reg3);
+        storeValuePair(cUnit, reg0, reg1, vDest, reg2);
     /*
-     * Don't optimize the regsiter usage here as they are governed by the EABI
+     * Don't optimize the register usage here as they are governed by the EABI
      * calling convention.
      */
     } else {
         loadValuePair(cUnit, vSrc2, r2, r3);
         loadConstant(cUnit, r4PC, (int) callTgt);
         loadValuePair(cUnit, vSrc1, r0, r1);
-        newLIR1(cUnit, THUMB_BLX_R, r4PC);
+        opReg(cUnit, OP_BLX, r4PC);
         storeValuePair(cUnit, retReg, retReg+1, vDest, r4PC);
     }
     return false;
@@ -1280,9 +1230,10 @@
 static bool genArithOpInt(CompilationUnit *cUnit, MIR *mir, int vDest,
                           int vSrc1, int vSrc2)
 {
-    int armOp = THUMB_BKPT;
+    OpKind op = OP_BKPT;
     bool callOut = false;
     bool checkZero = false;
+    bool threeOperand = false;
     int retReg = r0;
     void *callTgt;
     int reg0, reg1, regDest;
@@ -1293,22 +1244,24 @@
 
     switch (mir->dalvikInsn.opCode) {
         case OP_NEG_INT:
-            armOp = THUMB_NEG;
+            op = OP_NEG;
             break;
         case OP_NOT_INT:
-            armOp = THUMB_MVN;
+            op = OP_MVN;
             break;
         case OP_ADD_INT:
         case OP_ADD_INT_2ADDR:
-            armOp = THUMB_ADD_RRR;
+            op = OP_ADD;
+            threeOperand = true;
             break;
         case OP_SUB_INT:
         case OP_SUB_INT_2ADDR:
-            armOp = THUMB_SUB_RRR;
+            op = OP_SUB;
+            threeOperand = true;
             break;
         case OP_MUL_INT:
         case OP_MUL_INT_2ADDR:
-            armOp = THUMB_MUL;
+            op = OP_MUL;
             break;
         case OP_DIV_INT:
         case OP_DIV_INT_2ADDR:
@@ -1327,27 +1280,27 @@
             break;
         case OP_AND_INT:
         case OP_AND_INT_2ADDR:
-            armOp = THUMB_AND_RR;
+            op = OP_AND;
             break;
         case OP_OR_INT:
         case OP_OR_INT_2ADDR:
-            armOp = THUMB_ORR;
+            op = OP_OR;
             break;
         case OP_XOR_INT:
         case OP_XOR_INT_2ADDR:
-            armOp = THUMB_EOR;
+            op = OP_XOR;
             break;
         case OP_SHL_INT:
         case OP_SHL_INT_2ADDR:
-            armOp = THUMB_LSLV;
+            op = OP_LSL;
             break;
         case OP_SHR_INT:
         case OP_SHR_INT_2ADDR:
-            armOp = THUMB_ASRV;
+            op = OP_ASR;
             break;
         case OP_USHR_INT:
         case OP_USHR_INT_2ADDR:
-            armOp = THUMB_LSRV;
+            op = OP_LSR;
             break;
         default:
             LOGE("Invalid word arith op: 0x%x(%d)",
@@ -1363,7 +1316,13 @@
 
             loadValue(cUnit, vSrc1, reg0); /* Should be optimized away */
             loadValue(cUnit, vSrc2, reg1);
-            genBinaryOp(cUnit, vDest, armOp, reg0, reg1, regDest);
+            if (threeOperand) {
+                opRegRegReg(cUnit, op, regDest, reg0, reg1);
+                storeValue(cUnit, regDest, vDest, reg1);
+            } else {
+                opRegReg(cUnit, op, reg0, reg1);
+                storeValue(cUnit, reg0, vDest, reg1);
+            }
         } else {
             reg0 = selectFirstRegister(cUnit, vSrc2, false);
             reg1 = NEXT_REG(reg0);
@@ -1371,7 +1330,13 @@
 
             loadValue(cUnit, vSrc1, reg1); /* Load this value first */
             loadValue(cUnit, vSrc2, reg0); /* May be optimized away */
-            genBinaryOp(cUnit, vDest, armOp, reg1, reg0, regDest);
+            if (threeOperand) {
+                opRegRegReg(cUnit, op, regDest, reg1, reg0);
+                storeValue(cUnit, regDest, vDest, reg1);
+            } else {
+                opRegReg(cUnit, op, reg1, reg0);
+                storeValue(cUnit, reg1, vDest, reg0);
+            }
         }
     } else {
         /*
@@ -1396,7 +1361,7 @@
         if (checkZero) {
             genNullCheck(cUnit, vSrc2, r1, mir->offset, NULL);
         }
-        newLIR1(cUnit, THUMB_BLX_R, r2);
+        opReg(cUnit, OP_BLX, r2);
         storeValue(cUnit, retReg, vDest, r2);
     }
     return false;
@@ -1455,7 +1420,7 @@
     } else {
         loadValuePair(cUnit, mir->dalvikInsn.vB, r0, r1);
     }
-    newLIR1(cUnit, THUMB_BLX_R, r2);
+    opReg(cUnit, OP_BLX, r2);
     if (tgtSize == 1) {
         storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
     } else {
@@ -1464,136 +1429,6 @@
     return false;
 }
 
-static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
-{
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
-    int offset = offsetof(InterpState, retval);
-    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
-    int reg1 = NEXT_REG(regObj);
-    loadValue(cUnit, dInsn->arg[0], regObj);
-    genNullCheck(cUnit, dInsn->arg[0], regObj, mir->offset, NULL);
-    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, reg1);
-    newLIR3(cUnit, THUMB_STR_RRI5, reg1, rGLUE, offset >> 2);
-    return false;
-}
-
-/*
- * NOTE: The amount of code for this body suggests it ought to
- * be handled in a template (and could also be coded quite a bit
- * more efficiently in ARM).  However, the code is dependent on the
- * internal structure layout of string objects which are most safely
- * known at run time.
- * TUNING:  One possibility (which could also be used for StringCompareTo
- * and StringEquals) is to generate string access helper subroutines on
- * Jit startup, and then call them from the translated inline-executes.
- */
-static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir)
-{
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
-    int offset = offsetof(InterpState, retval);
-    int contents = offsetof(ArrayObject, contents);
-    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
-    int regIdx = NEXT_REG(regObj);
-    int regMax = NEXT_REG(regIdx);
-    int regOff = NEXT_REG(regMax);
-    loadValue(cUnit, dInsn->arg[0], regObj);
-    loadValue(cUnit, dInsn->arg[1], regIdx);
-    ArmLIR * pcrLabel = genNullCheck(cUnit, dInsn->arg[0], regObj,
-                                         mir->offset, NULL);
-    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, regMax);
-    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_offset, regOff);
-    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_value, regObj);
-    genBoundsCheck(cUnit, regIdx, regMax, mir->offset, pcrLabel);
-
-    newLIR2(cUnit, THUMB_ADD_RI8, regObj, contents);
-    newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regOff);
-    newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regIdx);
-    newLIR3(cUnit, THUMB_LDRH_RRR, regMax, regObj, regIdx);
-    newLIR3(cUnit, THUMB_STR_RRI5, regMax, rGLUE, offset >> 2);
-    return false;
-}
-
-static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir)
-{
-    int offset = offsetof(InterpState, retval);
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
-    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
-    int sign = NEXT_REG(reg0);
-    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
-    loadValue(cUnit, dInsn->arg[0], reg0);
-    newLIR3(cUnit, THUMB_ASR, sign, reg0, 31);
-    newLIR3(cUnit, THUMB_ADD_RRR, reg0, reg0, sign);
-    newLIR2(cUnit, THUMB_EOR, reg0, sign);
-    newLIR3(cUnit, THUMB_STR_RRI5, reg0, rGLUE, offset >> 2);
-    return false;
-}
-
-static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
-{
-    int offset = offsetof(InterpState, retval);
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
-    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
-    int signMask = NEXT_REG(reg0);
-    loadValue(cUnit, dInsn->arg[0], reg0);
-    loadConstant(cUnit, signMask, 0x7fffffff);
-    newLIR2(cUnit, THUMB_AND_RR, reg0, signMask);
-    newLIR3(cUnit, THUMB_STR_RRI5, reg0, rGLUE, offset >> 2);
-    return false;
-}
-
-static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir)
-{
-    int offset = offsetof(InterpState, retval);
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
-    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
-    int ophi = NEXT_REG(oplo);
-    int signMask = NEXT_REG(ophi);
-    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
-    loadConstant(cUnit, signMask, 0x7fffffff);
-    newLIR3(cUnit, THUMB_STR_RRI5, oplo, rGLUE, offset >> 2);
-    newLIR2(cUnit, THUMB_AND_RR, ophi, signMask);
-    newLIR3(cUnit, THUMB_STR_RRI5, ophi, rGLUE, (offset >> 2)+1);
-    return false;
-}
-
- /* No select in thumb, so we need to branch.  Thumb2 will do better */
-static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
-{
-    int offset = offsetof(InterpState, retval);
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
-    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
-    int reg1 = NEXT_REG(reg0);
-    loadValue(cUnit, dInsn->arg[0], reg0);
-    loadValue(cUnit, dInsn->arg[1], reg1);
-    newLIR2(cUnit, THUMB_CMP_RR, reg0, reg1);
-    ArmLIR *branch1 = newLIR2(cUnit, THUMB_B_COND, 2,
-           isMin ? ARM_COND_LT : ARM_COND_GT);
-    newLIR2(cUnit, THUMB_MOV_RR, reg0, reg1);
-    ArmLIR *target =
-        newLIR3(cUnit, THUMB_STR_RRI5, reg0, rGLUE, offset >> 2);
-    branch1->generic.target = (LIR *)target;
-    return false;
-}
-
-static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir)
-{
-    int offset = offsetof(InterpState, retval);
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
-    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
-    int ophi = NEXT_REG(oplo);
-    int sign = NEXT_REG(ophi);
-    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
-    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
-    newLIR3(cUnit, THUMB_ASR, sign, ophi, 31);
-    newLIR3(cUnit, THUMB_ADD_RRR, oplo, oplo, sign);
-    newLIR2(cUnit, THUMB_ADC, ophi, sign);
-    newLIR2(cUnit, THUMB_EOR, oplo, sign);
-    newLIR2(cUnit, THUMB_EOR, ophi, sign);
-    newLIR3(cUnit, THUMB_STR_RRI5, oplo, rGLUE, offset >> 2);
-    newLIR3(cUnit, THUMB_STR_RRI5, ophi, rGLUE, (offset >> 2)+1);
-    return false;
-}
-
 static void genProcessArgsNoRange(CompilationUnit *cUnit, MIR *mir,
                                   DecodedInstruction *dInsn,
                                   ArmLIR **pcrLabel)
@@ -1608,15 +1443,14 @@
     }
     if (regMask) {
         /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
-        newLIR2(cUnit, THUMB_MOV_RR, r7, rFP);
-        newLIR2(cUnit, THUMB_SUB_RI8, r7,
-                sizeof(StackSaveArea) + (dInsn->vA << 2));
+        opRegRegImm(cUnit, OP_SUB, r7, rFP,
+                    sizeof(StackSaveArea) + (dInsn->vA << 2), rNone);
         /* generate null check */
         if (pcrLabel) {
             *pcrLabel = genNullCheck(cUnit, dInsn->arg[0], r0, mir->offset,
                                      NULL);
         }
-        newLIR2(cUnit, THUMB_STMIA, r7, regMask);
+        storeMultiple(cUnit, r7, regMask);
     }
 }
 
@@ -1631,25 +1465,13 @@
      * r4PC     : &rFP[vC]
      * r7: &newFP[0]
      */
-    if (srcOffset < 8) {
-        newLIR3(cUnit, THUMB_ADD_RRI3, r4PC, rFP, srcOffset);
-    } else {
-        loadConstant(cUnit, r4PC, srcOffset);
-        newLIR3(cUnit, THUMB_ADD_RRR, r4PC, rFP, r4PC);
-    }
+    opRegRegImm(cUnit, OP_ADD, r4PC, rFP, srcOffset, rNone);
     /* load [r0 .. min(numArgs,4)] */
     regMask = (1 << ((numArgs < 4) ? numArgs : 4)) - 1;
-    newLIR2(cUnit, THUMB_LDMIA, r4PC, regMask);
+    loadMultiple(cUnit, r4PC, regMask);
 
-    if (sizeof(StackSaveArea) + (numArgs << 2) < 256) {
-        newLIR2(cUnit, THUMB_MOV_RR, r7, rFP);
-        newLIR2(cUnit, THUMB_SUB_RI8, r7,
-                sizeof(StackSaveArea) + (numArgs << 2));
-    } else {
-        loadConstant(cUnit, r7, sizeof(StackSaveArea) + (numArgs << 2));
-        newLIR3(cUnit, THUMB_SUB_RRR, r7, rFP, r7);
-    }
-
+    opRegRegImm(cUnit, OP_SUB, r7, rFP,
+                sizeof(StackSaveArea) + (numArgs << 2), rNone);
     /* generate null check */
     if (pcrLabel) {
         *pcrLabel = genNullCheck(cUnit, dInsn->vC, r0, mir->offset, NULL);
@@ -1663,37 +1485,37 @@
         ArmLIR *loopLabel = NULL;
         /*
          * r0 contains "this" and it will be used later, so push it to the stack
-         * first. Pushing r5 is just for stack alignment purposes.
+         * first. Pushing r5 (rFP) is just for stack alignment purposes.
          */
-        newLIR1(cUnit, THUMB_PUSH, 1 << r0 | 1 << 5);
+        opImm(cUnit, OP_PUSH, (1 << r0 | 1 << rFP));
         /* No need to generate the loop structure if numArgs <= 11 */
         if (numArgs > 11) {
             loadConstant(cUnit, 5, ((numArgs - 4) >> 2) << 2);
             loopLabel = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
         }
-        newLIR2(cUnit, THUMB_STMIA, r7, regMask);
-        newLIR2(cUnit, THUMB_LDMIA, r4PC, regMask);
+        storeMultiple(cUnit, r7, regMask);
+        loadMultiple(cUnit, r4PC, regMask);
         /* No need to generate the loop structure if numArgs <= 11 */
         if (numArgs > 11) {
-            newLIR2(cUnit, THUMB_SUB_RI8, 5, 4);
+            opRegImm(cUnit, OP_SUB, rFP, 4, rNone);
             genConditionalBranch(cUnit, ARM_COND_NE, loopLabel);
         }
     }
 
     /* Save the last batch of loaded values */
-    newLIR2(cUnit, THUMB_STMIA, r7, regMask);
+    storeMultiple(cUnit, r7, regMask);
 
     /* Generate the loop epilogue - don't use r0 */
     if ((numArgs > 4) && (numArgs % 4)) {
         regMask = ((1 << (numArgs & 0x3)) - 1) << 1;
-        newLIR2(cUnit, THUMB_LDMIA, r4PC, regMask);
+        loadMultiple(cUnit, r4PC, regMask);
     }
     if (numArgs >= 8)
-        newLIR1(cUnit, THUMB_POP, 1 << r0 | 1 << 5);
+        opImm(cUnit, OP_POP, (1 << r0 | 1 << rFP));
 
     /* Save the modulo 4 arguments */
     if ((numArgs > 4) && (numArgs % 4)) {
-        newLIR2(cUnit, THUMB_STMIA, r7, regMask);
+        storeMultiple(cUnit, r7, regMask);
     }
 }
 
@@ -1709,7 +1531,7 @@
     ArmLIR *retChainingCell = &labelList[bb->fallThrough->id];
 
     /* r1 = &retChainingCell */
-    ArmLIR *addrRetChain = newLIR3(cUnit, THUMB_ADD_PC_REL, r1, 0, 0);
+    ArmLIR *addrRetChain = opRegRegImm(cUnit, OP_ADD, r1, rpc, 0, rNone);
     /* r4PC = dalvikCallsite */
     loadConstant(cUnit, r4PC,
                  (int) (cUnit->method->insns + mir->offset));
@@ -1769,11 +1591,12 @@
                  (int) (cUnit->method->insns + mir->offset));
 
     /* r1 = &retChainingCell */
-    ArmLIR *addrRetChain = newLIR3(cUnit, THUMB_ADD_PC_REL, r1, 0, 0);
+    ArmLIR *addrRetChain = opRegRegImm(cUnit, OP_ADD, r1, rpc, 0, rNone);
     addrRetChain->generic.target = (LIR *) retChainingCell;
 
     /* r2 = &predictedChainingCell */
-    ArmLIR *predictedChainingCell = newLIR3(cUnit, THUMB_ADD_PC_REL, r2, 0, 0);
+    ArmLIR *predictedChainingCell = opRegRegImm(cUnit, OP_ADD, r2, rpc, 0,
+                                                rNone);
     predictedChainingCell->generic.target = (LIR *) predChainingCell;
 
     genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN);
@@ -1808,23 +1631,16 @@
      */
 
     /* r0 <- calleeMethod */
-    if (methodIndex < 32) {
-        newLIR3(cUnit, THUMB_LDR_RRI5, r0, r7, methodIndex);
-    } else {
-        loadConstant(cUnit, r0, methodIndex<<2);
-        newLIR3(cUnit, THUMB_LDR_RRR, r0, r7, r0);
-    }
+    loadWordDisp(cUnit, r7, methodIndex * 4, r0);
 
     /* Check if rechain limit is reached */
-    newLIR2(cUnit, THUMB_CMP_RI8, r1, 0);
+    opRegImm(cUnit, OP_CMP, r1, 0, rNone);
 
     ArmLIR *bypassRechaining =
-        newLIR2(cUnit, THUMB_B_COND, 0, ARM_COND_GT);
+        opImmImm(cUnit, OP_COND_BR, 0, ARM_COND_GT);
 
-    newLIR3(cUnit, THUMB_LDR_RRI5, r7, rGLUE,
-            offsetof(InterpState,
-                     jitToInterpEntries.dvmJitToPatchPredictedChain)
-            >> 2);
+    loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                 jitToInterpEntries.dvmJitToPatchPredictedChain), r7);
 
     /*
      * r0 = calleeMethod
@@ -1835,10 +1651,10 @@
      * when patching the chaining cell and will be clobbered upon
      * returning so it will be reconstructed again.
      */
-    newLIR1(cUnit, THUMB_BLX_R, r7);
+    opReg(cUnit, OP_BLX, r7);
 
     /* r1 = &retChainingCell */
-    addrRetChain = newLIR3(cUnit, THUMB_ADD_PC_REL, r1, 0, 0);
+    addrRetChain = opRegRegImm(cUnit, OP_ADD, r1, rpc, 0, rNone);
     addrRetChain->generic.target = (LIR *) retChainingCell;
 
     bypassRechaining->generic.target = (LIR *) addrRetChain;
@@ -1870,16 +1686,14 @@
                                           MIR *mir)
 {
     /* r3 now contains this->clazz */
-    newLIR3(cUnit, THUMB_LDR_RRI5, r3, r0,
-            offsetof(Object, clazz) >> 2);
+    loadWordDisp(cUnit, r0, offsetof(Object, clazz), r3);
 
     /*
      * r2 now contains predicted class. The starting offset of the
      * cached value is 4 bytes into the chaining cell.
      */
     ArmLIR *getPredictedClass =
-        newLIR3(cUnit, THUMB_LDR_PC_REL, r2, 0,
-                offsetof(PredictedChainingCell, clazz));
+         loadWordDisp(cUnit, rpc, offsetof(PredictedChainingCell, clazz), r2);
     getPredictedClass->generic.target = (LIR *) predChainingCell;
 
     /*
@@ -1887,14 +1701,12 @@
      * cached value is 8 bytes into the chaining cell.
      */
     ArmLIR *getPredictedMethod =
-        newLIR3(cUnit, THUMB_LDR_PC_REL, r0, 0,
-                offsetof(PredictedChainingCell, method));
+        loadWordDisp(cUnit, rpc, offsetof(PredictedChainingCell, method), r0);
     getPredictedMethod->generic.target = (LIR *) predChainingCell;
 
     /* Load the stats counter to see if it is time to unchain and refresh */
     ArmLIR *getRechainingRequestCount =
-        newLIR3(cUnit, THUMB_LDR_PC_REL, r7, 0,
-                offsetof(PredictedChainingCell, counter));
+        loadWordDisp(cUnit, rpc, offsetof(PredictedChainingCell, counter), r7);
     getRechainingRequestCount->generic.target =
         (LIR *) predChainingCell;
 
@@ -1903,14 +1715,13 @@
                  (int) (cUnit->method->insns + mir->offset));
 
     /* r1 = &retChainingCell */
-    ArmLIR *addrRetChain = newLIR3(cUnit, THUMB_ADD_PC_REL,
-                                       r1, 0, 0);
+    ArmLIR *addrRetChain = opRegRegImm(cUnit, OP_ADD, r1, rpc, 0, rNone);
     addrRetChain->generic.target = (LIR *) retChainingCell;
 
     /* Check if r2 (predicted class) == r3 (actual class) */
-    newLIR2(cUnit, THUMB_CMP_RR, r2, r3);
+    opRegReg(cUnit, OP_CMP, r2, r3);
 
-    return newLIR2(cUnit, THUMB_B_COND, 0, ARM_COND_EQ);
+    return opImmImm(cUnit, OP_COND_BR, 0, ARM_COND_EQ);
 }
 
 /* Geneate a branch to go back to the interpreter */
@@ -1918,9 +1729,10 @@
 {
     /* r0 = dalvik pc */
     loadConstant(cUnit, r0, (int) (cUnit->method->insns + offset));
-    newLIR3(cUnit, THUMB_LDR_RRI5, r1, rGLUE,
-            offsetof(InterpState, jitToInterpEntries.dvmJitToInterpPunt) >> 2);
-    newLIR1(cUnit, THUMB_BLX_R, r1);
+    loadWordDisp(cUnit, r0, offsetof(Object, clazz), r3);
+    loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                 jitToInterpEntries.dvmJitToInterpPunt), r1);
+    opReg(cUnit, OP_BLX, r1);
 }
 
 /*
@@ -1938,16 +1750,117 @@
     }
     int entryAddr = offsetof(InterpState,
                              jitToInterpEntries.dvmJitToInterpSingleStep);
-    newLIR3(cUnit, THUMB_LDR_RRI5, r2, rGLUE, entryAddr >> 2);
+    loadWordDisp(cUnit, rGLUE, entryAddr, r2);
     /* r0 = dalvik pc */
     loadConstant(cUnit, r0, (int) (cUnit->method->insns + mir->offset));
     /* r1 = dalvik pc of following instruction */
     loadConstant(cUnit, r1, (int) (cUnit->method->insns + mir->next->offset));
-    newLIR1(cUnit, THUMB_BLX_R, r2);
+    opReg(cUnit, OP_BLX, r2);
 }
 
+/* Generate conditional branch instructions */
+static ArmLIR *genConditionalBranch(CompilationUnit *cUnit,
+                                    ArmConditionCode cond,
+                                    ArmLIR *target)
+{
+    ArmLIR *branch = opImmImm(cUnit, OP_COND_BR, 0, cond);
+    branch->generic.target = (LIR *) target;
+    return branch;
+}
 
-/*****************************************************************************/
+/* Generate unconditional branch instructions */
+static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target)
+{
+    ArmLIR *branch = opNone(cUnit, OP_UNCOND_BR);
+    branch->generic.target = (LIR *) target;
+    return branch;
+}
+
+/* Load the address of a Dalvik register on the frame */
+static ArmLIR *loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    return opRegRegImm(cUnit, OP_ADD, rDest, rFP, vSrc*4, rNone);
+}
+
+/* Load a single value from rFP[src] and store them into rDest */
+static ArmLIR *loadValue(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    return loadBaseDisp(cUnit, NULL, rFP, vSrc * 4, rDest, WORD, false, -1);
+}
+
+/* Load a word at base + displacement.  Displacement must be word multiple */
+static ArmLIR *loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement,
+                            int rDest)
+{
+    return loadBaseDisp(cUnit, NULL, rBase, displacement, rDest, WORD, false,
+                        -1);
+}
+
+static ArmLIR *storeWordDisp(CompilationUnit *cUnit, int rBase,
+                             int displacement, int rSrc, int rScratch)
+{
+    return storeBaseDisp(cUnit, rBase, displacement, rSrc, WORD, rScratch);
+}
+
+/* Store a value from rSrc to vDest */
+static ArmLIR *storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
+                          int rScratch)
+{
+    killNullCheckedRegister(cUnit, vDest);
+    updateLiveRegister(cUnit, vDest, rSrc);
+    return storeBaseDisp(cUnit, rFP, vDest * 4, rSrc, WORD, rScratch);
+}
+/*
+ * Load a pair of values of rFP[src..src+1] and store them into rDestLo and
+ * rDestHi
+ */
+static ArmLIR *loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
+                             int rDestHi)
+{
+    ArmLIR *res;
+    /* Use reg + imm5*4 to load the values if possible */
+    if (vSrc <= 30) {
+        res = loadWordDisp(cUnit, rFP, vSrc*4, rDestLo);
+        loadWordDisp(cUnit, rFP, (vSrc+1)*4, rDestHi);
+    } else {
+        assert(rDestLo < rDestHi);
+        res = loadValueAddress(cUnit, vSrc, rDestLo);
+        loadMultiple(cUnit, rDestLo, (1<<rDestLo) | (1<<rDestHi));
+    }
+    return res;
+}
+
+/*
+ * Store a pair of values of rSrc and rSrc+1 and store them into vDest and
+ * vDest+1
+ */
+static ArmLIR *storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
+                              int vDest, int rScratch)
+{
+    ArmLIR *res;
+    killNullCheckedRegister(cUnit, vDest);
+    killNullCheckedRegister(cUnit, vDest+1);
+    updateLiveRegisterPair(cUnit, vDest, rSrcLo, rSrcHi);
+
+    /* Use reg + imm5*4 to store the values if possible */
+    if (vDest <= 30) {
+        res = storeWordDisp(cUnit, rFP, vDest*4, rSrcLo, rScratch);
+        storeWordDisp(cUnit, rFP, (vDest+1)*4, rSrcHi, rScratch);
+    } else {
+        assert(rSrcLo < rSrcHi);
+        res = loadValueAddress(cUnit, vDest, rScratch);
+        storeMultiple(cUnit, rScratch, (1<<rSrcLo) | (1 << rSrcHi));
+    }
+    return res;
+}
+
+static ArmLIR *genRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
+{
+    ArmLIR *res = dvmCompilerRegCopy(cUnit, rDest, rSrc);
+    dvmCompilerAppendLIR(cUnit, (LIR*)res);
+    return res;
+}
+
 /*
  * The following are the first-level codegen routines that analyze the format
  * of each bytecode then either dispatch special purpose codegen routines
@@ -2007,7 +1920,7 @@
             reg1 = NEXT_REG(reg0);
             reg2 = NEXT_REG(reg1);
             loadConstant(cUnit, reg0, mir->dalvikInsn.vB);
-            newLIR3(cUnit, THUMB_ASR, reg1, reg0, 31);
+            opRegRegImm(cUnit, OP_ASR, reg1, reg0, 31, rNone);
             storeValuePair(cUnit, reg0, reg1, mir->dalvikInsn.vA, reg2);
             break;
         }
@@ -2064,10 +1977,6 @@
     int regvNoneWide = selectFirstRegister(cUnit, vNone, true);
 
     switch (mir->dalvikInsn.opCode) {
-        /*
-         * TODO: Verify that we can ignore the resolution check here because
-         * it will have already successfully been interpreted once
-         */
         case OP_CONST_STRING_JUMBO:
         case OP_CONST_STRING: {
             void *strPtr = (void*)
@@ -2077,10 +1986,6 @@
             storeValue(cUnit, regvNone, mir->dalvikInsn.vA, NEXT_REG(regvNone));
             break;
         }
-        /*
-         * TODO: Verify that we can ignore the resolution check here because
-         * it will have already successfully been interpreted once
-         */
         case OP_CONST_CLASS: {
             void *classPtr = (void*)
               (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
@@ -2101,14 +2006,13 @@
             assert(fieldPtr != NULL);
             loadConstant(cUnit, regvNone,  (int) fieldPtr + valOffset);
 #if !defined(WITH_SELF_VERIFICATION)
-            newLIR3(cUnit, THUMB_LDR_RRI5, regvNone, regvNone, 0);
-            storeValue(cUnit, regvNone, mir->dalvikInsn.vA, NEXT_REG(regvNone));
+            loadWordDisp(cUnit, regvNone, 0, regvNone);
 #else
             int regMap = regvNone << 4 | regvNone;
             selfVerificationMemOpWrapper(cUnit, regMap, &selfVerificationLoad);
 
-            storeValue(cUnit, regvNone, mir->dalvikInsn.vA, NEXT_REG(regvNone));
 #endif
+            storeValue(cUnit, regvNone, mir->dalvikInsn.vA, NEXT_REG(regvNone));
             break;
         }
         case OP_SGET_WIDE: {
@@ -2123,15 +2027,14 @@
             reg2 = NEXT_REG(reg1);
             loadConstant(cUnit, reg2,  (int) fieldPtr + valOffset);
 #if !defined(WITH_SELF_VERIFICATION)
-            newLIR2(cUnit, THUMB_LDMIA, reg2, (1<<reg0 | 1<<reg1));
-            storeValuePair(cUnit, reg0, reg1, mir->dalvikInsn.vA, reg2);
+            loadMultiple(cUnit, reg2, (1<<reg0 | 1<<reg1));
 #else
             int regMap = reg1 << 8 | reg0 << 4 | reg2;
             selfVerificationMemOpWrapper(cUnit, regMap,
                 &selfVerificationLoadDoubleword);
 
-            storeValuePair(cUnit, reg0, reg1, mir->dalvikInsn.vA, reg2);
 #endif
+            storeValuePair(cUnit, reg0, reg1, mir->dalvikInsn.vA, reg2);
             break;
         }
         case OP_SPUT_OBJECT:
@@ -2149,7 +2052,7 @@
             updateLiveRegister(cUnit, mir->dalvikInsn.vA, regvA);
             loadConstant(cUnit, NEXT_REG(regvA),  (int) fieldPtr + valOffset);
 #if !defined(WITH_SELF_VERIFICATION)
-            newLIR3(cUnit, THUMB_STR_RRI5, regvA, NEXT_REG(regvA), 0);
+            storeWordDisp(cUnit, NEXT_REG(regvA), 0 , regvA, -1);
 #else
             int regMap = regvA << 4 | NEXT_REG(regvA);
             selfVerificationMemOpWrapper(cUnit, regMap, &selfVerificationStore);
@@ -2170,7 +2073,7 @@
             updateLiveRegisterPair(cUnit, mir->dalvikInsn.vA, reg0, reg1);
             loadConstant(cUnit, reg2,  (int) fieldPtr + valOffset);
 #if !defined(WITH_SELF_VERIFICATION)
-            newLIR2(cUnit, THUMB_STMIA, reg2, (1<<reg0 | 1<<reg1));
+            storeMultiple(cUnit, reg2, (1<<reg0 | 1<<reg1));
 #else
             int regMap = reg1 << 8 | reg0 << 4 | reg2;
             selfVerificationMemOpWrapper(cUnit, regMap,
@@ -2196,11 +2099,7 @@
             loadConstant(cUnit, r0, (int) classPtr);
             genExportPC(cUnit, mir, r2, r3 );
             loadConstant(cUnit, r1, ALLOC_DONT_TRACK);
-            newLIR1(cUnit, THUMB_BLX_R, r4PC);
-            /*
-             * TODO: As coded, we'll bail and reinterpret on alloc failure.
-             * Need a general mechanism to bail to thrown exception code.
-             */
+            opReg(cUnit, OP_BLX, r4PC);
             genZeroCheck(cUnit, r0, mir->offset, NULL);
             storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
             break;
@@ -2214,25 +2113,18 @@
               (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
             loadConstant(cUnit, r1, (int) classPtr );
             loadValue(cUnit, mir->dalvikInsn.vA, r0);  /* Ref */
-            /*
-             * TODO - in theory classPtr should be resoved by the time this
-             * instruction made into a trace, but we are seeing NULL at runtime
-             * so this check is temporarily used as a workaround.
-             */
-            ArmLIR * pcrLabel = genZeroCheck(cUnit, r1, mir->offset, NULL);
-            newLIR2(cUnit, THUMB_CMP_RI8, r0, 0);    /* Null? */
+            opRegImm(cUnit, OP_CMP, r0, 0, rNone);   /* Null? */
             ArmLIR *branch1 =
-                newLIR2(cUnit, THUMB_B_COND, 4, ARM_COND_EQ);
+                opImmImm(cUnit, OP_COND_BR, 4, ARM_COND_EQ);
             /* r0 now contains object->clazz */
-            newLIR3(cUnit, THUMB_LDR_RRI5, r0, r0,
-                    offsetof(Object, clazz) >> 2);
+            loadWordDisp(cUnit, r0, offsetof(Object, clazz), r0);
             loadConstant(cUnit, r4PC, (int)dvmInstanceofNonTrivial);
-            newLIR2(cUnit, THUMB_CMP_RR, r0, r1);
+            opRegReg(cUnit, OP_CMP, r0, r1);
             ArmLIR *branch2 =
-                newLIR2(cUnit, THUMB_B_COND, 2, ARM_COND_EQ);
-            newLIR1(cUnit, THUMB_BLX_R, r4PC);
+                opImmImm(cUnit, OP_COND_BR, 2, ARM_COND_EQ);
+            opReg(cUnit, OP_BLX, r4PC);
             /* check cast failed - punt to the interpreter */
-            genZeroCheck(cUnit, r0, mir->offset, pcrLabel);
+            genZeroCheck(cUnit, r0, mir->offset, NULL);
             /* check cast passed - branch target here */
             ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
             branch1->generic.target = (LIR *)target;
@@ -2252,65 +2144,61 @@
         case OP_MOVE_EXCEPTION: {
             int offset = offsetof(InterpState, self);
             int exOffset = offsetof(Thread, exception);
-            newLIR3(cUnit, THUMB_LDR_RRI5, r1, rGLUE, offset >> 2);
-            newLIR3(cUnit, THUMB_LDR_RRI5, r0, r1, exOffset >> 2);
+            loadWordDisp(cUnit, rGLUE, offset, r1);
+            loadWordDisp(cUnit, r1, exOffset, r0);
             storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
            break;
         }
         case OP_MOVE_RESULT:
         case OP_MOVE_RESULT_OBJECT: {
             int offset = offsetof(InterpState, retval);
-            newLIR3(cUnit, THUMB_LDR_RRI5, r0, rGLUE, offset >> 2);
+            loadWordDisp(cUnit, rGLUE, offset, r0);
             storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
             break;
         }
         case OP_MOVE_RESULT_WIDE: {
             int offset = offsetof(InterpState, retval);
-            newLIR3(cUnit, THUMB_LDR_RRI5, r0, rGLUE, offset >> 2);
-            newLIR3(cUnit, THUMB_LDR_RRI5, r1, rGLUE, (offset >> 2)+1);
+            loadWordDisp(cUnit, rGLUE, offset, r0);
+            loadWordDisp(cUnit, rGLUE, offset+4, r1);
             storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
             break;
         }
         case OP_RETURN_WIDE: {
-            loadValuePair(cUnit, mir->dalvikInsn.vA, r0, r1);
+            int vSrc = mir->dalvikInsn.vA;
+            int reg0 = selectFirstRegister(cUnit, vSrc, true);
+            int reg1 = NEXT_REG(reg0);
+            int rScratch = NEXT_REG(reg1);
             int offset = offsetof(InterpState, retval);
-            newLIR3(cUnit, THUMB_STR_RRI5, r0, rGLUE, offset >> 2);
-            newLIR3(cUnit, THUMB_STR_RRI5, r1, rGLUE, (offset >> 2)+1);
+            loadValuePair(cUnit, vSrc, reg0, reg1);
+            storeWordDisp(cUnit, rGLUE, offset, reg0, rScratch);
+            storeWordDisp(cUnit, rGLUE, offset + 4, reg1, rScratch);
             genReturnCommon(cUnit,mir);
             break;
         }
         case OP_RETURN:
         case OP_RETURN_OBJECT: {
-            loadValue(cUnit, mir->dalvikInsn.vA, r0);
-            int offset = offsetof(InterpState, retval);
-            newLIR3(cUnit, THUMB_STR_RRI5, r0, rGLUE, offset >> 2);
+            int vSrc = mir->dalvikInsn.vA;
+            int reg0 = selectFirstRegister(cUnit, vSrc, false);
+            int rScratch = NEXT_REG(reg0);
+            loadValue(cUnit, vSrc, reg0);
+            storeWordDisp(cUnit, rGLUE, offsetof(InterpState, retval),
+                          reg0, rScratch);
             genReturnCommon(cUnit,mir);
             break;
         }
-        /*
-         * TODO-VERIFY: May be playing a bit fast and loose here.  As coded,
-         * a failure on lock/unlock will cause us to revert to the interpeter
-         * to try again. This means we essentially ignore the first failure on
-         * the assumption that the interpreter will correctly handle the 2nd.
-         */
         case OP_MONITOR_ENTER:
         case OP_MONITOR_EXIT: {
             int offset = offsetof(InterpState, self);
             loadValue(cUnit, mir->dalvikInsn.vA, r1);
-            newLIR3(cUnit, THUMB_LDR_RRI5, r0, rGLUE, offset >> 2);
+            loadWordDisp(cUnit, rGLUE, offset, r0);
             if (dalvikOpCode == OP_MONITOR_ENTER) {
                 loadConstant(cUnit, r2, (int)dvmLockObject);
             } else {
                 loadConstant(cUnit, r2, (int)dvmUnlockObject);
             }
-          /*
-           * TODO-VERIFY: Note that we're not doing an EXPORT_PC, as
-           * Lock/unlock won't throw, and this code does not support
-           * DEADLOCK_PREDICTION or MONITOR_TRACKING.  Should it?
-           */
             genNullCheck(cUnit, mir->dalvikInsn.vA, r1, mir->offset, NULL);
             /* Do the call */
-            newLIR1(cUnit, THUMB_BLX_R, r2);
+            opReg(cUnit, OP_BLX, r2);
             break;
         }
         case OP_THROW: {
@@ -2335,6 +2223,8 @@
     int    __aeabi_d2iz( double op1 );
     float  __aeabi_l2f(  long op1 );
     double __aeabi_l2d(  long op1 );
+    s8 dvmJitf2l( float op1 );
+    s8 dvmJitd2l( double op1 );
 
     switch (opCode) {
         case OP_INT_TO_FLOAT:
@@ -2370,8 +2260,6 @@
     int vSrc2 = mir->dalvikInsn.vB;
     int reg0, reg1, reg2;
 
-    /* TODO - find the proper include file to declare these */
-
     if ( (opCode >= OP_ADD_INT_2ADDR) && (opCode <= OP_REM_DOUBLE_2ADDR)) {
         return genArithOp( cUnit, mir );
     }
@@ -2421,7 +2309,7 @@
             reg2 = NEXT_REG(reg1);
 
             loadValue(cUnit, vSrc2, reg0);
-            newLIR3(cUnit, THUMB_ASR, reg1, reg0, 31);
+            opRegRegImm(cUnit, OP_ASR, reg1, reg0, 31, rNone);
             storeValuePair(cUnit, reg0, reg1, vSrc1Dest, reg2);
             break;
         }
@@ -2433,27 +2321,24 @@
             break;
         case OP_INT_TO_BYTE:
             loadValue(cUnit, vSrc2, reg0);
-            newLIR3(cUnit, THUMB_LSL, reg0, reg0, 24);
-            newLIR3(cUnit, THUMB_ASR, reg0, reg0, 24);
-            storeValue(cUnit, reg0, vSrc1Dest, reg1);
+            opRegReg(cUnit, OP_2BYTE, reg1, reg0);
+            storeValue(cUnit, reg1, vSrc1Dest, reg2);
             break;
         case OP_INT_TO_SHORT:
             loadValue(cUnit, vSrc2, reg0);
-            newLIR3(cUnit, THUMB_LSL, reg0, reg0, 16);
-            newLIR3(cUnit, THUMB_ASR, reg0, reg0, 16);
-            storeValue(cUnit, reg0, vSrc1Dest, reg1);
+            opRegReg(cUnit, OP_2SHORT, reg1, reg0);
+            storeValue(cUnit, reg1, vSrc1Dest, reg2);
             break;
         case OP_INT_TO_CHAR:
             loadValue(cUnit, vSrc2, reg0);
-            newLIR3(cUnit, THUMB_LSL, reg0, reg0, 16);
-            newLIR3(cUnit, THUMB_LSR, reg0, reg0, 16);
-            storeValue(cUnit, reg0, vSrc1Dest, reg1);
+            opRegReg(cUnit, OP_2CHAR, reg1, reg0);
+            storeValue(cUnit, reg1, vSrc1Dest, reg2);
             break;
         case OP_ARRAY_LENGTH: {
             int lenOffset = offsetof(ArrayObject, length);
-            loadValue(cUnit, vSrc2, reg0);
-            genNullCheck(cUnit, vSrc2, reg0, mir->offset, NULL);
-            newLIR3(cUnit, THUMB_LDR_RRI5, reg0, reg0, lenOffset >> 2);
+            loadValue(cUnit, vSrc2, reg1);
+            genNullCheck(cUnit, vSrc2, reg1, mir->offset, NULL);
+            loadWordDisp(cUnit, reg1, lenOffset, reg0);
             storeValue(cUnit, reg0, vSrc1Dest, reg1);
             break;
         }
@@ -2478,7 +2363,7 @@
         reg2 = NEXT_REG(reg1);
 
         loadConstant(cUnit, reg0, BBBB);
-        newLIR3(cUnit, THUMB_ASR, reg1, reg0, 31);
+        opRegRegImm(cUnit, OP_ASR, reg1, reg0, 31, rNone);
 
         /* Save the long values to the specified Dalvik register pair */
         storeValuePair(cUnit, reg0, reg1, vDest, reg2);
@@ -2506,8 +2391,9 @@
     int reg0 = selectFirstRegister(cUnit, mir->dalvikInsn.vA, false);
 
     loadValue(cUnit, mir->dalvikInsn.vA, reg0);
-    newLIR2(cUnit, THUMB_CMP_RI8, reg0, 0);
+    opRegImm(cUnit, OP_CMP, reg0, 0, rNone);
 
+//TUNING: break this out to allow use of Thumb2 CB[N]Z
     switch (dalvikOpCode) {
         case OP_IF_EQZ:
             cond = ARM_COND_EQ;
@@ -2544,14 +2430,13 @@
     int vSrc = mir->dalvikInsn.vB;
     int vDest = mir->dalvikInsn.vA;
     int lit = mir->dalvikInsn.vC;
-    int armOp;
+    OpKind op;
     int reg0, reg1, regDest;
 
     reg0 = selectFirstRegister(cUnit, vSrc, false);
     reg1 = NEXT_REG(reg0);
     regDest = NEXT_REG(reg1);
 
-    /* TODO: find the proper .h file to declare these */
     int __aeabi_idivmod(int op1, int op2);
     int __aeabi_idiv(int op1, int op2);
 
@@ -2559,31 +2444,16 @@
         case OP_ADD_INT_LIT8:
         case OP_ADD_INT_LIT16:
             loadValue(cUnit, vSrc, reg0);
-            if (lit <= 7 && lit >= 0) {
-                newLIR3(cUnit, THUMB_ADD_RRI3, regDest, reg0, lit);
-                storeValue(cUnit, regDest, vDest, reg1);
-            } else if (lit <= 255 && lit >= 0) {
-                newLIR2(cUnit, THUMB_ADD_RI8, reg0, lit);
-                storeValue(cUnit, reg0, vDest, reg1);
-            } else if (lit >= -7 && lit <= 0) {
-                /* Convert to a small constant subtraction */
-                newLIR3(cUnit, THUMB_SUB_RRI3, regDest, reg0, -lit);
-                storeValue(cUnit, regDest, vDest, reg1);
-            } else if (lit >= -255 && lit <= 0) {
-                /* Convert to a small constant subtraction */
-                newLIR2(cUnit, THUMB_SUB_RI8, reg0, -lit);
-                storeValue(cUnit, reg0, vDest, reg1);
-            } else {
-                loadConstant(cUnit, reg1, lit);
-                genBinaryOp(cUnit, vDest, THUMB_ADD_RRR, reg0, reg1, regDest);
-            }
+            opRegImm(cUnit, OP_ADD, reg0, lit, reg1);
+            storeValue(cUnit, reg0, vDest, reg1);
             break;
 
         case OP_RSUB_INT_LIT8:
         case OP_RSUB_INT:
             loadValue(cUnit, vSrc, reg1);
             loadConstant(cUnit, reg0, lit);
-            genBinaryOp(cUnit, vDest, THUMB_SUB_RRR, reg0, reg1, regDest);
+            opRegRegReg(cUnit, OP_SUB, regDest, reg0, reg1);
+            storeValue(cUnit, regDest, vDest, reg1);
             break;
 
         case OP_MUL_INT_LIT8:
@@ -2595,28 +2465,28 @@
         case OP_XOR_INT_LIT8:
         case OP_XOR_INT_LIT16:
             loadValue(cUnit, vSrc, reg0);
-            loadConstant(cUnit, reg1, lit);
             switch (dalvikOpCode) {
                 case OP_MUL_INT_LIT8:
                 case OP_MUL_INT_LIT16:
-                    armOp = THUMB_MUL;
+                    op = OP_MUL;
                     break;
                 case OP_AND_INT_LIT8:
                 case OP_AND_INT_LIT16:
-                    armOp = THUMB_AND_RR;
+                    op = OP_AND;
                     break;
                 case OP_OR_INT_LIT8:
                 case OP_OR_INT_LIT16:
-                    armOp = THUMB_ORR;
+                    op = OP_OR;
                     break;
                 case OP_XOR_INT_LIT8:
                 case OP_XOR_INT_LIT16:
-                    armOp = THUMB_EOR;
+                    op = OP_XOR;
                     break;
                 default:
                     dvmAbort();
             }
-            genBinaryOp(cUnit, vDest, armOp, reg0, reg1, regDest);
+            opRegRegImm(cUnit, op, regDest, reg0, lit, reg1);
+            storeValue(cUnit, regDest, vDest, reg1);
             break;
 
         case OP_SHL_INT_LIT8:
@@ -2625,18 +2495,18 @@
             loadValue(cUnit, vSrc, reg0);
             switch (dalvikOpCode) {
                 case OP_SHL_INT_LIT8:
-                    armOp = THUMB_LSL;
+                    op = OP_LSL;
                     break;
                 case OP_SHR_INT_LIT8:
-                    armOp = THUMB_ASR;
+                    op = OP_ASR;
                     break;
                 case OP_USHR_INT_LIT8:
-                    armOp = THUMB_LSR;
+                    op = OP_LSR;
                     break;
                 default: dvmAbort();
             }
-            newLIR3(cUnit, armOp, reg0, reg0, lit);
-            storeValue(cUnit, reg0, vDest, reg1);
+            opRegRegImm(cUnit, op, regDest, reg0, lit, reg1);
+            storeValue(cUnit, regDest, vDest, reg1);
             break;
 
         case OP_DIV_INT_LIT8:
@@ -2650,7 +2520,7 @@
             loadConstant(cUnit, r2, (int)__aeabi_idiv);
             loadConstant(cUnit, r1, lit);
             loadValue(cUnit, vSrc, r0);
-            newLIR1(cUnit, THUMB_BLX_R, r2);
+            opReg(cUnit, OP_BLX, r2);
             storeValue(cUnit, r0, vDest, r2);
             break;
 
@@ -2665,7 +2535,7 @@
             loadConstant(cUnit, r2, (int)__aeabi_idivmod);
             loadConstant(cUnit, r1, lit);
             loadValue(cUnit, vSrc, r0);
-            newLIR1(cUnit, THUMB_BLX_R, r2);
+            opReg(cUnit, OP_BLX, r2);
             storeValue(cUnit, r1, vDest, r2);
             break;
         default:
@@ -2691,10 +2561,6 @@
         fieldOffset = 0;
     }
     switch (dalvikOpCode) {
-        /*
-         * TODO: I may be assuming too much here.
-         * Verify what is known at JIT time.
-         */
         case OP_NEW_ARRAY: {
             void *classPtr = (void*)
               (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
@@ -2705,41 +2571,32 @@
             ArmLIR *pcrLabel =
                 genRegImmCheck(cUnit, ARM_COND_MI, r1, 0, mir->offset, NULL);
             genExportPC(cUnit, mir, r2, r3 );
-            newLIR2(cUnit, THUMB_MOV_IMM,r2,ALLOC_DONT_TRACK);
-            newLIR1(cUnit, THUMB_BLX_R, r4PC);
-            /*
-             * TODO: As coded, we'll bail and reinterpret on alloc failure.
-             * Need a general mechanism to bail to thrown exception code.
-             */
+            loadConstant(cUnit, r2, ALLOC_DONT_TRACK);
+            opReg(cUnit, OP_BLX, r4PC);
+            /* Note: on failure, we'll bail and reinterpret */
             genZeroCheck(cUnit, r0, mir->offset, pcrLabel);
             storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
             break;
         }
-        /*
-         * TODO: I may be assuming too much here.
-         * Verify what is known at JIT time.
-         */
         case OP_INSTANCE_OF: {
             ClassObject *classPtr =
               (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
             assert(classPtr != NULL);
             loadValue(cUnit, mir->dalvikInsn.vB, r0);  /* Ref */
             loadConstant(cUnit, r2, (int) classPtr );
-            newLIR2(cUnit, THUMB_CMP_RI8, r0, 0);    /* Null? */
+//TUNING: compare to 0 primative to allow use of CB[N]Z
+            opRegImm(cUnit, OP_CMP, r0, 0, rNone); /* NULL? */
             /* When taken r0 has NULL which can be used for store directly */
-            ArmLIR *branch1 = newLIR2(cUnit, THUMB_B_COND, 4,
-                                          ARM_COND_EQ);
+            ArmLIR *branch1 = opImmImm(cUnit, OP_COND_BR, 4, ARM_COND_EQ);
             /* r1 now contains object->clazz */
-            newLIR3(cUnit, THUMB_LDR_RRI5, r1, r0,
-                    offsetof(Object, clazz) >> 2);
+            loadWordDisp(cUnit, r0, offsetof(Object, clazz), r1);
             loadConstant(cUnit, r4PC, (int)dvmInstanceofNonTrivial);
             loadConstant(cUnit, r0, 1);                /* Assume true */
-            newLIR2(cUnit, THUMB_CMP_RR, r1, r2);
-            ArmLIR *branch2 = newLIR2(cUnit, THUMB_B_COND, 2,
-                                          ARM_COND_EQ);
-            newLIR2(cUnit, THUMB_MOV_RR, r0, r1);
-            newLIR2(cUnit, THUMB_MOV_RR, r1, r2);
-            newLIR1(cUnit, THUMB_BLX_R, r4PC);
+            opRegReg(cUnit, OP_CMP, r1, r2);
+            ArmLIR *branch2 = opImmImm(cUnit, OP_COND_BR, 2, ARM_COND_EQ);
+            opRegReg(cUnit, OP_MOV, r0, r1);
+            opRegReg(cUnit, OP_MOV, r1, r2);
+            opReg(cUnit, OP_BLX, r4PC);
             /* branch target here */
             ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
             storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
@@ -2752,34 +2609,34 @@
             break;
         case OP_IGET:
         case OP_IGET_OBJECT:
-            genIGet(cUnit, mir, THUMB_LDR_RRR, fieldOffset);
+            genIGet(cUnit, mir, WORD, fieldOffset);
             break;
         case OP_IGET_BOOLEAN:
-            genIGet(cUnit, mir, THUMB_LDRB_RRR, fieldOffset);
+            genIGet(cUnit, mir, UNSIGNED_BYTE, fieldOffset);
             break;
         case OP_IGET_BYTE:
-            genIGet(cUnit, mir, THUMB_LDRSB_RRR, fieldOffset);
+            genIGet(cUnit, mir, SIGNED_BYTE, fieldOffset);
             break;
         case OP_IGET_CHAR:
-            genIGet(cUnit, mir, THUMB_LDRH_RRR, fieldOffset);
+            genIGet(cUnit, mir, UNSIGNED_HALF, fieldOffset);
             break;
         case OP_IGET_SHORT:
-            genIGet(cUnit, mir, THUMB_LDRSH_RRR, fieldOffset);
+            genIGet(cUnit, mir, SIGNED_HALF, fieldOffset);
             break;
         case OP_IPUT_WIDE:
             genIPutWide(cUnit, mir, fieldOffset);
             break;
         case OP_IPUT:
         case OP_IPUT_OBJECT:
-            genIPut(cUnit, mir, THUMB_STR_RRR, fieldOffset);
+            genIPut(cUnit, mir, WORD, fieldOffset);
             break;
         case OP_IPUT_SHORT:
         case OP_IPUT_CHAR:
-            genIPut(cUnit, mir, THUMB_STRH_RRR, fieldOffset);
+            genIPut(cUnit, mir, UNSIGNED_HALF, fieldOffset);
             break;
         case OP_IPUT_BYTE:
         case OP_IPUT_BOOLEAN:
-            genIPut(cUnit, mir, THUMB_STRB_RRR, fieldOffset);
+            genIPut(cUnit, mir, UNSIGNED_BYTE, fieldOffset);
             break;
         default:
             return true;
@@ -2794,11 +2651,11 @@
     switch (dalvikOpCode) {
         case OP_IGET_QUICK:
         case OP_IGET_OBJECT_QUICK:
-            genIGet(cUnit, mir, THUMB_LDR_RRR, fieldOffset);
+            genIGet(cUnit, mir, WORD, fieldOffset);
             break;
         case OP_IPUT_QUICK:
         case OP_IPUT_OBJECT_QUICK:
-            genIPut(cUnit, mir, THUMB_STR_RRR, fieldOffset);
+            genIPut(cUnit, mir, WORD, fieldOffset);
             break;
         case OP_IGET_WIDE_QUICK:
             genIGetWide(cUnit, mir, fieldOffset);
@@ -2834,7 +2691,7 @@
         loadValue(cUnit, mir->dalvikInsn.vA, reg0);
         loadValue(cUnit, mir->dalvikInsn.vB, reg1);
     }
-    newLIR2(cUnit, THUMB_CMP_RR, reg0, reg1);
+    opRegReg(cUnit, OP_CMP, reg0, reg1);
 
     switch (dalvikOpCode) {
         case OP_IF_EQ:
@@ -2924,38 +2781,38 @@
             storeValue(cUnit, r0, vA, r1);
             break;
         case OP_AGET_WIDE:
-            genArrayGet(cUnit, mir, THUMB_LDR_RRR, vB, vC, vA, 3);
+            genArrayGet(cUnit, mir, LONG, vB, vC, vA, 3);
             break;
         case OP_AGET:
         case OP_AGET_OBJECT:
-            genArrayGet(cUnit, mir, THUMB_LDR_RRR, vB, vC, vA, 2);
+            genArrayGet(cUnit, mir, WORD, vB, vC, vA, 2);
             break;
         case OP_AGET_BOOLEAN:
-            genArrayGet(cUnit, mir, THUMB_LDRB_RRR, vB, vC, vA, 0);
+            genArrayGet(cUnit, mir, UNSIGNED_BYTE, vB, vC, vA, 0);
             break;
         case OP_AGET_BYTE:
-            genArrayGet(cUnit, mir, THUMB_LDRSB_RRR, vB, vC, vA, 0);
+            genArrayGet(cUnit, mir, SIGNED_BYTE, vB, vC, vA, 0);
             break;
         case OP_AGET_CHAR:
-            genArrayGet(cUnit, mir, THUMB_LDRH_RRR, vB, vC, vA, 1);
+            genArrayGet(cUnit, mir, UNSIGNED_HALF, vB, vC, vA, 1);
             break;
         case OP_AGET_SHORT:
-            genArrayGet(cUnit, mir, THUMB_LDRSH_RRR, vB, vC, vA, 1);
+            genArrayGet(cUnit, mir, SIGNED_HALF, vB, vC, vA, 1);
             break;
         case OP_APUT_WIDE:
-            genArrayPut(cUnit, mir, THUMB_STR_RRR, vB, vC, vA, 3);
+            genArrayPut(cUnit, mir, LONG, vB, vC, vA, 3);
             break;
         case OP_APUT:
         case OP_APUT_OBJECT:
-            genArrayPut(cUnit, mir, THUMB_STR_RRR, vB, vC, vA, 2);
+            genArrayPut(cUnit, mir, WORD, vB, vC, vA, 2);
             break;
         case OP_APUT_SHORT:
         case OP_APUT_CHAR:
-            genArrayPut(cUnit, mir, THUMB_STRH_RRR, vB, vC, vA, 1);
+            genArrayPut(cUnit, mir, UNSIGNED_HALF, vB, vC, vA, 1);
             break;
         case OP_APUT_BYTE:
         case OP_APUT_BOOLEAN:
-            genArrayPut(cUnit, mir, THUMB_STRB_RRR, vB, vC, vA, 0);
+            genArrayPut(cUnit, mir, UNSIGNED_BYTE, vB, vC, vA, 0);
             break;
         default:
             return true;
@@ -2973,7 +2830,7 @@
             loadConstant(cUnit, r1, (mir->dalvikInsn.vB << 1) +
                  (int) (cUnit->method->insns + mir->offset));
             genExportPC(cUnit, mir, r2, r3 );
-            newLIR1(cUnit, THUMB_BLX_R, r4PC);
+            opReg(cUnit, OP_BLX, r4PC);
             genZeroCheck(cUnit, r0, mir->offset, NULL);
             break;
         }
@@ -2993,14 +2850,13 @@
             loadValue(cUnit, mir->dalvikInsn.vA, r1);
             loadConstant(cUnit, r0, (mir->dalvikInsn.vB << 1) +
                  (int) (cUnit->method->insns + mir->offset));
-            newLIR1(cUnit, THUMB_BLX_R, r4PC);
+            opReg(cUnit, OP_BLX, r4PC);
             loadConstant(cUnit, r1, (int)(cUnit->method->insns + mir->offset));
-            newLIR3(cUnit, THUMB_LDR_RRI5, r2, rGLUE,
-                offsetof(InterpState, jitToInterpEntries.dvmJitToInterpNoChain)
-                    >> 2);
-            newLIR3(cUnit, THUMB_ADD_RRR, r0, r0, r0);
-            newLIR3(cUnit, THUMB_ADD_RRR, r4PC, r0, r1);
-            newLIR1(cUnit, THUMB_BLX_R, r2);
+            loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                         jitToInterpEntries.dvmJitToInterpNoChain), r2);
+            opRegReg(cUnit, OP_ADD, r0, r0);
+            opRegRegReg(cUnit, OP_ADD, r4PC, r0, r1);
+            opReg(cUnit, OP_BLX, r2);
             break;
         }
         default:
@@ -3105,6 +2961,11 @@
                                      calleeMethod);
             break;
         }
+/*
+ * TODO:  When we move to using upper registers in Thumb2, make sure
+ *        the register allocater is told that r9, r10, & r12 are killed
+ *        here.
+ */
         /*
          * calleeMethod = dvmFindInterfaceMethodInCache(this->clazz,
          *                    BBBB, method, method->clazz->pDvmDex)
@@ -3186,12 +3047,13 @@
                          (int) (cUnit->method->insns + mir->offset));
 
             /* r1 = &retChainingCell */
-            ArmLIR *addrRetChain = newLIR3(cUnit, THUMB_ADD_PC_REL, r1, 0, 0);
+            ArmLIR *addrRetChain =
+                opRegRegImm(cUnit, OP_ADD, r1, rpc, 0, rNone);
             addrRetChain->generic.target = (LIR *) retChainingCell;
 
             /* r2 = &predictedChainingCell */
             ArmLIR *predictedChainingCell =
-                newLIR3(cUnit, THUMB_ADD_PC_REL, r2, 0, 0);
+                opRegRegImm(cUnit, OP_ADD, r2, rpc, 0, rNone);
             predictedChainingCell->generic.target = (LIR *) predChainingCell;
 
             genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN);
@@ -3226,12 +3088,12 @@
              */
 
             /* Save count, &predictedChainCell, and class to high regs first */
-            newLIR2(cUnit, THUMB_MOV_RR_L2H, r9 & THUMB_REG_MASK, r1);
-            newLIR2(cUnit, THUMB_MOV_RR_L2H, r10 & THUMB_REG_MASK, r2);
-            newLIR2(cUnit, THUMB_MOV_RR_L2H, r12 & THUMB_REG_MASK, r3);
+            opRegReg(cUnit, OP_MOV, r9, r1);
+            opRegReg(cUnit, OP_MOV, r10, r2);
+            opRegReg(cUnit, OP_MOV, r12, r3);
 
             /* r0 now contains this->clazz */
-            newLIR2(cUnit, THUMB_MOV_RR, r0, r3);
+            opRegReg(cUnit, OP_MOV, r0, r3);
 
             /* r1 = BBBB */
             loadConstant(cUnit, r1, dInsn->vB);
@@ -3244,25 +3106,23 @@
 
             loadConstant(cUnit, r7,
                          (intptr_t) dvmFindInterfaceMethodInCache);
-            newLIR1(cUnit, THUMB_BLX_R, r7);
+            opReg(cUnit, OP_BLX, r7);
 
             /* r0 = calleeMethod (returned from dvmFindInterfaceMethodInCache */
 
-            newLIR2(cUnit, THUMB_MOV_RR_H2L, r1, r9 & THUMB_REG_MASK);
+            opRegReg(cUnit, OP_MOV, r1, r9);
 
             /* Check if rechain limit is reached */
-            newLIR2(cUnit, THUMB_CMP_RI8, r1, 0);
+            opRegImm(cUnit, OP_CMP, r1, 0, rNone);
 
             ArmLIR *bypassRechaining =
-                newLIR2(cUnit, THUMB_B_COND, 0, ARM_COND_GT);
+                opImmImm(cUnit, OP_COND_BR, 0, ARM_COND_GT);
 
-            newLIR3(cUnit, THUMB_LDR_RRI5, r7, rGLUE,
-                    offsetof(InterpState,
-                             jitToInterpEntries.dvmJitToPatchPredictedChain)
-                    >> 2);
+            loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                         jitToInterpEntries.dvmJitToPatchPredictedChain), r7);
 
-            newLIR2(cUnit, THUMB_MOV_RR_H2L, r2, r10 & THUMB_REG_MASK);
-            newLIR2(cUnit, THUMB_MOV_RR_H2L, r3, r12 & THUMB_REG_MASK);
+            opRegReg(cUnit, OP_MOV, r2, r10);
+            opRegReg(cUnit, OP_MOV, r3, r12);
 
             /*
              * r0 = calleeMethod
@@ -3273,11 +3133,10 @@
              * when patching the chaining cell and will be clobbered upon
              * returning so it will be reconstructed again.
              */
-            newLIR1(cUnit, THUMB_BLX_R, r7);
+            opReg(cUnit, OP_BLX, r7);
 
             /* r1 = &retChainingCell */
-            addrRetChain = newLIR3(cUnit, THUMB_ADD_PC_REL,
-                                               r1, 0, 0);
+            addrRetChain = opRegRegImm(cUnit, OP_ADD, r1, rpc, 0, rNone);
             addrRetChain->generic.target = (LIR *) retChainingCell;
 
             bypassRechaining->generic.target = (LIR *) addrRetChain;
@@ -3419,10 +3278,11 @@
             }
 
             /* Materialize pointer to retval & push */
-            newLIR2(cUnit, THUMB_MOV_RR, r4PC, rGLUE);
-            newLIR2(cUnit, THUMB_ADD_RI8, r4PC, offset);
+            opRegReg(cUnit, OP_MOV, r4PC, rGLUE);
+            opRegImm(cUnit, OP_ADD, r4PC, offset, rNone);
+
             /* Push r4 and (just to take up space) r5) */
-            newLIR1(cUnit, THUMB_PUSH, (1<<r4PC | 1<<rFP));
+            opImm(cUnit, OP_PUSH, (1 << r4PC | 1 << rFP));
 
             /* Get code pointer to inline routine */
             loadConstant(cUnit, r4PC, (int)inLineTable[operation].func);
@@ -3435,10 +3295,10 @@
                 loadValue(cUnit, dInsn->arg[i], i);
             }
             /* Call inline routine */
-            newLIR1(cUnit, THUMB_BLX_R, r4PC);
+            opReg(cUnit, OP_BLX, r4PC);
 
             /* Strip frame */
-            newLIR1(cUnit, THUMB_ADD_SPI7, 2);
+            opRegImm(cUnit, OP_ADD, r13, 8, rNone);
 
             /* Did we throw? If so, redo under interpreter*/
             genZeroCheck(cUnit, r0, mir->offset, NULL);
@@ -3460,7 +3320,6 @@
     return false;
 }
 
-/*****************************************************************************/
 /*
  * The following are special processing routines that handle transfer of
  * controls between compiled code and the interpreter. Certain VM states like
@@ -3471,9 +3330,9 @@
 static void handleNormalChainingCell(CompilationUnit *cUnit,
                                      unsigned int offset)
 {
-    newLIR3(cUnit, THUMB_LDR_RRI5, r0, rGLUE,
-        offsetof(InterpState, jitToInterpEntries.dvmJitToInterpNormal) >> 2);
-    newLIR1(cUnit, THUMB_BLX_R, r0);
+    loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                 jitToInterpEntries.dvmJitToInterpNormal), r0);
+    opReg(cUnit, OP_BLX, r0);
     addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
 }
 
@@ -3484,9 +3343,9 @@
 static void handleHotChainingCell(CompilationUnit *cUnit,
                                   unsigned int offset)
 {
-    newLIR3(cUnit, THUMB_LDR_RRI5, r0, rGLUE,
-        offsetof(InterpState, jitToInterpEntries.dvmJitToTraceSelect) >> 2);
-    newLIR1(cUnit, THUMB_BLX_R, r0);
+    loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                 jitToInterpEntries.dvmJitToTraceSelect), r0);
+    opReg(cUnit, OP_BLX, r0);
     addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
 }
 
@@ -3506,9 +3365,9 @@
 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
                                               const Method *callee)
 {
-    newLIR3(cUnit, THUMB_LDR_RRI5, r0, rGLUE,
-        offsetof(InterpState, jitToInterpEntries.dvmJitToTraceSelect) >> 2);
-    newLIR1(cUnit, THUMB_BLX_R, r0);
+    loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                 jitToInterpEntries.dvmJitToTraceSelect), r0);
+    opReg(cUnit, OP_BLX, r0);
     addWordData(cUnit, (int) (callee->insns), true);
 }
 
@@ -3584,6 +3443,7 @@
         cUnit->chainCellOffsetLIR =
             (LIR *) newLIR1(cUnit, ARM_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
         cUnit->headerSize = 6;
+        /* Thumb instruction used directly here to ensure correct size */
         newLIR2(cUnit, THUMB_MOV_RR_H2L, r0, rpc & THUMB_REG_MASK);
         newLIR2(cUnit, THUMB_SUB_RI8, r0, 10);
         newLIR3(cUnit, THUMB_LDR_RRI5, r1, r0, 0);
@@ -3659,11 +3519,10 @@
                 case EXCEPTION_HANDLING:
                     labelList[i].opCode = ARM_PSEUDO_EH_BLOCK_LABEL;
                     if (cUnit->pcReconstructionList.numUsed) {
-                        newLIR3(cUnit, THUMB_LDR_RRI5, r1, rGLUE,
-                            offsetof(InterpState,
-                                     jitToInterpEntries.dvmJitToInterpPunt)
-                            >> 2);
-                        newLIR1(cUnit, THUMB_BLX_R, r1);
+                        loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                                     jitToInterpEntries.dvmJitToInterpPunt),
+                                     r1);
+                        opReg(cUnit, OP_BLX, r1);
                     }
                     break;
 #if defined(WITH_SELF_VERIFICATION)
diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h
index 4016075..6da22eb 100644
--- a/vm/compiler/codegen/arm/Codegen.h
+++ b/vm/compiler/codegen/arm/Codegen.h
@@ -29,8 +29,8 @@
                            int dest, int src1);
 static ArmLIR *newLIR3(CompilationUnit *cUnit, ArmOpCode opCode,
                            int dest, int src1, int src2);
-static ArmLIR *newLIR23(CompilationUnit *cUnit, ArmOpCode opCode,
-                            int srcdest, int src2);
+static ArmLIR *newLIR4(CompilationUnit *cUnit, ArmOpCode opCode,
+                            int dest, int src1, int src2, int info);
 static ArmLIR *scanLiteralPool(CompilationUnit *cUnit, int value,
                                    unsigned int delta);
 static ArmLIR *addWordData(CompilationUnit *cUnit, int value, bool inPlace);
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index 11aaedd..5f24b4c 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -39,6 +39,28 @@
              (lir->opCode == THUMB2_VSTRD)));
 }
 
+/* Double regs overlap float regs.  Return true if collision  */
+static bool regClobber(int reg1, int reg2)
+{
+    int reg1a, reg1b;
+    int reg2a, reg2b;
+    if (!FPREG(reg1) || !FPREG(reg2))
+        return (reg1 == reg2);
+    if (DOUBLEREG(reg1)) {
+        reg1a = reg1 & FP_REG_MASK;
+        reg1b = reg1a + 1;
+    } else {
+        reg1a = reg1b = reg1 & FP_REG_MASK;
+    }
+    if (DOUBLEREG(reg2)) {
+        reg2a = reg2 & FP_REG_MASK;
+        reg2b = reg2a + 1;
+    } else {
+        reg2a = reg2b = reg2 & FP_REG_MASK;
+    }
+    return (reg1a == reg2a) || (reg1a == reg2b) ||
+           (reg1b == reg2a) || (reg1b == reg2b);
+}
 /*
  * Perform a pass of top-down walk to
  * 1) Eliminate redundant loads and stores
@@ -110,16 +132,20 @@
                                 checkLIR->opCode == THUMB_LDMIA ||
                                 checkLIR->opCode == THUMB_STR_RRR ||
                                 checkLIR->opCode == THUMB_LDR_RRR ||
+                                checkLIR->opCode == THUMB2_STR_RRR ||
+                                checkLIR->opCode == THUMB2_LDR_RRR ||
+                                checkLIR->opCode == THUMB2_STMIA ||
+                                checkLIR->opCode == THUMB2_LDMIA ||
                                 checkLIR->opCode == THUMB2_VLDRD ||
                                 checkLIR->opCode == THUMB2_VSTRD;
-;
 
                     if (!isPseudoOpCode(checkLIR->opCode)) {
 
                         /* Store data is clobbered */
                         stopHere |= (EncodingMap[checkLIR->opCode].flags &
                                      CLOBBER_DEST) != 0 &&
-                                    checkLIR->operands[0] == nativeRegId;
+                                     regClobber(checkLIR->operands[0],
+                                               nativeRegId);
 
                         stopHere |= (EncodingMap[checkLIR->opCode].flags &
                                      IS_BRANCH) != 0;
diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c
index 3a9f1de..f05a867 100644
--- a/vm/compiler/codegen/arm/Thumb2Util.c
+++ b/vm/compiler/codegen/arm/Thumb2Util.c
@@ -16,46 +16,83 @@
 
 /*
  * This file contains codegen for the Thumb ISA and is intended to be
- * includes by:and support common to all supported
+ * includes by:
  *
  *        Codegen-$(TARGET_ARCH_VARIANT).c
  *
  */
 
 #include "Codegen.h"
+/* Forward decls */
+static ArmLIR *genNullCheck(CompilationUnit *cUnit, int vReg, int mReg,
+                            int dOffset, ArmLIR *pcrLabel);
+static ArmLIR *loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest);
+static ArmLIR *loadValue(CompilationUnit *cUnit, int vSrc, int rDest);
+static ArmLIR *loadWordDisp(CompilationUnit *cUnit, int rBase,
+                            int displacement, int rDest);
+static ArmLIR *storeWordDisp(CompilationUnit *cUnit, int rBase,
+                             int displacement, int rSrc, int rScratch);
+static ArmLIR *storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
+                          int rScratch);
+static ArmLIR *genConditionalBranch(CompilationUnit *cUnit,
+                                    ArmConditionCode cond,
+                                    ArmLIR *target);
+static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target);
+static ArmLIR *loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
+                             int rDestHi);
+static ArmLIR *storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
+                              int vDest, int rScratch);
+static ArmLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex,
+                              int rBound, int dOffset, ArmLIR *pcrLabel);
+static ArmLIR *genRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+
 
 /* Routines which must be supplied here */
-static void loadConstant(CompilationUnit *cUnit, int rDest, int value);
-static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr);
-static void genConditionalBranch(CompilationUnit *cUnit,
-                                 ArmConditionCode cond,
-                                 ArmLIR *target);
-static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target);
-static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
-                          int rDestHi);
-static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
-                           int vDest, int rScratch);
-static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int vDest);
-static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest);
-static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement,
-                         int rDest);
-static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
-                       int rScratch);
+static ArmLIR *loadConstant(CompilationUnit *cUnit, int rDest, int value);
+static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC,
+                           int rAddr);
+static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
+                            int displacement, int rDest, OpSize size,
+                            bool nullCheck, int vReg);
+static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
+                             int displacement, int rSrc, OpSize size,
+                             int rScratch);
 static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit,
-                                         ArmConditionCode cond, int reg,
-                                         int checkValue, int dOffset,
-                                         ArmLIR *pcrLabel);
+                                     ArmConditionCode cond, int reg,
+                                     int checkValue, int dOffset,
+                                     ArmLIR *pcrLabel);
 ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+static ArmLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask);
+static ArmLIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask);
 
-/*****************************************************************************/
+static ArmLIR *opNone(CompilationUnit *cUnit, OpKind op);
+static ArmLIR *opImm(CompilationUnit *cUnit, OpKind op, int value);
+static ArmLIR *opImmImm(CompilationUnit *cUnit, OpKind op, int value1,
+                        int value2);
+static ArmLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc);
+static ArmLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int rSrc2);
+static ArmLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int value, int rScratch);
+static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int value, int rScratch);
+static ArmLIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int rSrc2);
+static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
+                               int rIndex, int rDest, int scale, OpSize size);
+
+static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin);
+static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir);
 
 /*
  * Support for register allocation
  */
 
-/* non-existent register */
-#define vNone   (-1)
-
 /* get the next register in r0..r3 in a round-robin fashion */
 #define NEXT_REG(reg) ((reg + 1) & 3)
 /*
@@ -131,9 +168,7 @@
 
 }
 
-/*****************************************************************************/
-
-ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
+static ArmLIR *fpRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
 {
     ArmLIR* res = dvmCompilerNew(sizeof(ArmLIR), true);
     res->operands[0] = rDest;
@@ -141,22 +176,41 @@
     if (rDest == rSrc) {
         res->isNop = true;
     } else {
-        if (LOWREG(rDest) && LOWREG(rSrc)) {
-            res->opCode = THUMB_MOV_RR;
-        } else if (FPREG(rDest) && FPREG(rSrc)) {
-            if (DOUBLEREG(rDest)) {
-                assert(DOUBLEREG(rSrc));
-                res->opCode = THUMB2_VMOVD;
-            } else {
-                assert(SINGLEREG(rSrc));
-                res->opCode = THUMB2_VMOVS;
-            }
+        // TODO: support copy between FP and gen regs
+        if (DOUBLEREG(rDest)) {
+            assert(DOUBLEREG(rSrc));
+            res->opCode = THUMB2_VMOVD;
         } else {
-            // TODO: support copy between FP and gen regs.
-            assert(!FPREG(rDest));
-            assert(!FPREG(rSrc));
-            res->opCode = THUMB2_MOV_RR;
+            assert(SINGLEREG(rSrc));
+            res->opCode = THUMB2_VMOVS;
         }
+        res->operands[0] = rDest;
+        res->operands[1] = rSrc;
+    }
+    return res;
+}
+
+ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
+{
+    ArmLIR* res;
+    ArmOpCode opCode;
+    if (FPREG(rDest) || FPREG(rSrc))
+        return fpRegCopy(cUnit, rDest, rSrc);
+    res = dvmCompilerNew(sizeof(ArmLIR), true);
+    if (LOWREG(rDest) && LOWREG(rSrc))
+        opCode = THUMB_MOV_RR;
+    else if (!LOWREG(rDest) && !LOWREG(rSrc))
+         opCode = THUMB_MOV_RR_H2H;
+    else if (LOWREG(rDest))
+         opCode = THUMB_MOV_RR_H2L;
+    else
+         opCode = THUMB_MOV_RR_L2H;
+
+    res->operands[0] = rDest & THUMB_REG_MASK;
+    res->operands[1] = rSrc & THUMB_REG_MASK;
+    res->opCode = opCode;
+    if (rDest == rSrc) {
+        res->isNop = true;
     }
     return res;
 }
@@ -181,7 +235,7 @@
 }
 
 /*
- * Determine whether value can be encoded as a Thumb modified
+ * Determine whether value can be encoded as a Thumb2 modified
  * immediate.  If not, return -1.  If so, return i:imm3:a:bcdefgh form.
  */
 static int modifiedImmediate(u4 value)
@@ -218,28 +272,33 @@
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool
  */
-static void loadConstant(CompilationUnit *cUnit, int rDest, int value)
+static ArmLIR *loadConstant(CompilationUnit *cUnit, int rDest, int value)
 {
+    ArmLIR *res;
     int modImm;
     /* See if the value can be constructed cheaply */
-    if ((value & 0xff) == value) {
-        newLIR2(cUnit, THUMB_MOV_IMM, rDest, value);
-        return;
+    if ((value >= 0) && (value <= 255)) {
+        return newLIR2(cUnit, THUMB_MOV_IMM, rDest, value);
     } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) {
-        newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value);
+        res = newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value);
         newLIR2(cUnit, THUMB_MVN, rDest, rDest);
-        return;
+        return res;
     }
     /* Check Modified immediate special cases */
     modImm = modifiedImmediate(value);
     if (modImm >= 0) {
-        newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, rDest, modImm);
-        return;
+        res = newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, rDest, modImm);
+        return res;
+    }
+    modImm = modifiedImmediate(~value);
+    if (modImm >= 0) {
+        res = newLIR2(cUnit, THUMB2_MVN_IMM_SHIFT, rDest, modImm);
+        return res;
     }
     /* 16-bit immediate? */
     if ((value & 0xffff) == value) {
-        newLIR2(cUnit, THUMB2_MOV_IMM16, rDest, value);
-        return;
+        res = newLIR2(cUnit, THUMB2_MOV_IMM16, rDest, value);
+        return res;
     }
     /* No shortcut - go ahead and use literal pool */
     ArmLIR *dataTarget = scanLiteralPool(cUnit, value, 255);
@@ -250,6 +309,7 @@
     loadPcRel->opCode = THUMB_LDR_PC_REL;
     loadPcRel->generic.target = (LIR *) dataTarget;
     loadPcRel->operands[0] = rDest;
+    res = loadPcRel;
     dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
 
     /*
@@ -259,210 +319,273 @@
     if (dataTarget->operands[0] != value) {
         newLIR2(cUnit, THUMB_ADD_RI8, rDest, value - dataTarget->operands[0]);
     }
+    return res;
 }
 
 /* Export the Dalvik PC assicated with an instruction to the StackSave area */
-static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr)
+static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC,
+                           int rAddr)
 {
+    ArmLIR *res;
     int offset = offsetof(StackSaveArea, xtra.currentPc);
-    loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
+    res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
     newLIR3(cUnit, THUMB2_STR_RRI8_PREDEC, rDPC, rFP,
             sizeof(StackSaveArea) - offset);
+    return res;
 }
 
-/* Generate conditional branch instructions */
-static void genConditionalBranch(CompilationUnit *cUnit,
-                                 ArmConditionCode cond,
-                                 ArmLIR *target)
+/* Load value from base + scaled index. Note: index reg killed */
+static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
+                               int rIndex, int rDest, int scale, OpSize size)
 {
-    ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond);
-    branch->generic.target = (LIR *) target;
+    bool allLowRegs = LOWREG(rBase) && LOWREG(rIndex) && LOWREG(rDest);
+    ArmOpCode opCode = THUMB_BKPT;
+    bool thumbForm = (allLowRegs && (scale == 0));
+    switch (size) {
+        case WORD:
+            opCode = (thumbForm) ? THUMB_LDR_RRR : THUMB2_LDR_RRR;
+            break;
+        case UNSIGNED_HALF:
+            opCode = (thumbForm) ? THUMB_LDRH_RRR : THUMB2_LDRH_RRR;
+            break;
+        case SIGNED_HALF:
+            opCode = (thumbForm) ? THUMB_LDRSH_RRR : THUMB2_LDRSH_RRR;
+            break;
+        case UNSIGNED_BYTE:
+            opCode = (thumbForm) ? THUMB_LDRB_RRR : THUMB2_LDRB_RRR;
+            break;
+        case SIGNED_BYTE:
+            opCode = (thumbForm) ? THUMB_LDRSB_RRR : THUMB2_LDRSB_RRR;
+            break;
+        default:
+            assert(0);
+    }
+    if (thumbForm)
+        return newLIR3(cUnit, opCode, rDest, rBase, rIndex);
+    else
+        return newLIR4(cUnit, opCode, rDest, rBase, rIndex, scale);
 }
 
-/* Generate unconditional branch instructions */
-static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target)
+/* store value base base + scaled index. Note: index reg killed */
+static ArmLIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase,
+                                int rIndex, int rSrc, int scale, OpSize size)
 {
-    ArmLIR *branch = newLIR0(cUnit, THUMB_B_UNCOND);
-    branch->generic.target = (LIR *) target;
-    return branch;
+    bool allLowRegs = LOWREG(rBase) && LOWREG(rIndex) && LOWREG(rSrc);
+    ArmOpCode opCode = THUMB_BKPT;
+    bool thumbForm = (allLowRegs && (scale == 0));
+    switch (size) {
+        case WORD:
+            opCode = (thumbForm) ? THUMB_STR_RRR : THUMB2_STR_RRR;
+            break;
+        case UNSIGNED_HALF:
+        case SIGNED_HALF:
+            opCode = (thumbForm) ? THUMB_STRH_RRR : THUMB2_STRH_RRR;
+            break;
+        case UNSIGNED_BYTE:
+        case SIGNED_BYTE:
+            opCode = (thumbForm) ? THUMB_STRB_RRR : THUMB2_STRB_RRR;
+            break;
+        default:
+            assert(0);
+    }
+    if (thumbForm)
+        return newLIR3(cUnit, opCode, rSrc, rBase, rIndex);
+    else
+        return newLIR4(cUnit, opCode, rSrc, rBase, rIndex, scale);
 }
 
 /*
- * Load a pair of values of rFP[src..src+1] and store them into rDestLo and
- * rDestHi
+ * Load a float from a Dalvik register.  Note: using fixed r7 here
+ * when operation is out of range.  Revisit this when registor allocation
+ * strategy changes.
  */
-static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
-                          int rDestHi)
+static ArmLIR *fpVarAccess(CompilationUnit *cUnit, int vSrcDest,
+                           int rSrcDest, ArmOpCode opCode)
 {
-    bool allLowRegs = (LOWREG(rDestLo) && LOWREG(rDestHi));
-
-    /* Use reg + imm5*4 to load the values if possible */
-    if (allLowRegs && vSrc <= 30) {
-        newLIR3(cUnit, THUMB_LDR_RRI5, rDestLo, rFP, vSrc);
-        newLIR3(cUnit, THUMB_LDR_RRI5, rDestHi, rFP, vSrc+1);
+    ArmLIR *res;
+    if (vSrcDest > 255) {
+        res = opRegRegImm(cUnit, OP_ADD, r7, rFP, vSrcDest * 4, rNone);
+        newLIR3(cUnit, opCode, rSrcDest, r7, 0);
     } else {
-        assert(rDestLo < rDestHi);
-        loadValueAddress(cUnit, vSrc, rDestLo);
-        if (allLowRegs) {
-            newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi)));
-        } else {
-            assert(0); // Unimp - need Thumb2 ldmia
-        }
+        res = newLIR3(cUnit, opCode, rSrcDest, rFP, vSrcDest);
     }
+    return res;
 }
-
-/*
- * Store a pair of values of rSrc and rSrc+1 and store them into vDest and
- * vDest+1
- */
-static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
-                           int vDest, int rScratch)
+static ArmLIR *loadFloat(CompilationUnit *cUnit, int vSrc, int rDest)
 {
-    bool allLowRegs = (LOWREG(rSrcLo) && LOWREG(rSrcHi));
-    killNullCheckedRegister(cUnit, vDest);
-    killNullCheckedRegister(cUnit, vDest+1);
-    updateLiveRegisterPair(cUnit, vDest, rSrcLo, rSrcHi);
-
-    /* Use reg + imm5*4 to store the values if possible */
-    if (allLowRegs && vDest <= 30) {
-        newLIR3(cUnit, THUMB_STR_RRI5, rSrcLo, rFP, vDest);
-        newLIR3(cUnit, THUMB_STR_RRI5, rSrcHi, rFP, vDest+1);
-    } else {
-        assert(rSrcLo < rSrcHi);
-        loadValueAddress(cUnit, vDest, rScratch);
-        if (allLowRegs) {
-            newLIR2(cUnit, THUMB_STMIA, rScratch,
-                    (1<<rSrcLo) | (1 << (rSrcHi)));
-        } else {
-            assert(0); // Unimp - need Thumb2 stmia
-        }
-    }
-}
-
-static void addRegisterRegister(CompilationUnit *cUnit, int rDest,
-                                int rSrc1, int rSrc2)
-{
-    if (!LOWREG(rDest) || !LOWREG(rSrc1) || !LOWREG(rSrc2)) {
-        assert(0); // Unimp
-        //newLIR3(cUnit, THUMB2_ADD_RRR, rDest, rFP, rDest);
-    } else {
-        newLIR3(cUnit, THUMB_ADD_RRR, rDest, rFP, rDest);
-    }
-}
-
-/* Add in immediate to a register. */
-static void addRegisterImmediate(CompilationUnit *cUnit, int rDest, int rSrc,
-                                 int value)
-{
-// TODO: check for modified immediate form
-    if (LOWREG(rDest) && LOWREG(rSrc) && (value <= 7)) {
-        newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rSrc, value);
-    } else if (LOWREG(rDest) && (rDest == rSrc) && ((value & 0xff) == 0xff)) {
-        newLIR2(cUnit, THUMB_ADD_RI8, rDest, value);
-    } else if (value <= 4095) {
-        newLIR3(cUnit, THUMB2_ADD_RRI12, rDest, rSrc, value);
-    } else {
-        loadConstant(cUnit, rDest, value);
-        addRegisterRegister(cUnit, rDest, rDest, rFP);
-    }
-}
-
-/* Load the address of a Dalvik register on the frame */
-static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
-{
-    addRegisterImmediate(cUnit, rDest, rFP, vSrc*4);
-}
-
-/*
- * FIXME: We need a general register temp for all of these coprocessor
- * operations in case we can't reach in 1 shot.  Might just want to
- * designate a hot temp that all codegen routines could use in their
- * scope.  Alternately, callers will need to allocate a temp and
- * pass it in to each of these.
- */
-
-/* Load a float from a Dalvik register */
-static void loadFloat(CompilationUnit *cUnit, int vSrc, int rDest)
-{
-    assert(vSrc <= 255); // FIXME - temp limit to 1st 256
     assert(SINGLEREG(rDest));
-    newLIR3(cUnit, THUMB2_VLDRS, rDest, rFP, vSrc);
+    return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRS);
 }
 
 /* Store a float to a Dalvik register */
-static void storeFloat(CompilationUnit *cUnit, int rSrc, int vDest,
-                       int rScratch)
+static ArmLIR *storeFloat(CompilationUnit *cUnit, int rSrc, int vDest,
+                          int rScratch)
 {
-    assert(vSrc <= 255); // FIXME - temp limit to 1st 256
     assert(SINGLEREG(rSrc));
-    newLIR3(cUnit, THUMB2_VSTRS, rSrc, rFP, vDest);
+    return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRS);
 }
 
 /* Load a double from a Dalvik register */
-static void loadDouble(CompilationUnit *cUnit, int vSrc, int rDest)
+static ArmLIR *loadDouble(CompilationUnit *cUnit, int vSrc, int rDest)
 {
-    assert(vSrc <= 255); // FIXME - temp limit to 1st 256
     assert(DOUBLEREG(rDest));
-    newLIR3(cUnit, THUMB2_VLDRD, rDest, rFP, vSrc);
+    return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRD);
 }
 
 /* Store a double to a Dalvik register */
-static void storeDouble(CompilationUnit *cUnit, int rSrc, int vDest,
-                       int rScratch)
+static ArmLIR *storeDouble(CompilationUnit *cUnit, int rSrc, int vDest,
+                           int rScratch)
 {
-    assert(vSrc <= 255); // FIXME - temp limit to 1st 256
     assert(DOUBLEREG(rSrc));
-    newLIR3(cUnit, THUMB2_VSTRD, rSrc, rFP, vDest);
+    return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRD);
 }
 
 
-/* Load a single value from rFP[src] and store them into rDest */
-static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest)
+/*
+ * Load value from base + displacement.  Optionally perform null check
+ * on base (which must have an associated vReg and MIR).  If not
+ * performing null check, incoming MIR can be null. Note: base and
+ * dest must not be the same if there is any chance that the long
+ * form must be used.
+ * TODO: revisit, perhaps use hot temp reg in (base == dest) case.
+ */
+static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
+                            int displacement, int rDest, OpSize size,
+                            bool nullCheck, int vReg)
 {
-    loadWordDisp(cUnit, rFP, vSrc * 4, rDest);
-}
-
-/* Load a word at base + displacement.  Displacement must be word multiple */
-static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement,
-                         int rDest)
-{
+    ArmLIR *first = NULL;
+    ArmLIR *res;
+    ArmOpCode opCode = THUMB_BKPT;
+    bool shortForm = false;
+    bool thumb2Form = (displacement < 4092 && displacement >= 0);
+    int shortMax = 128;
     bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest));
-    assert((displacement & 0x3) == 0);
-    /* Can it fit in a RRI5? */
-    if (allLowRegs && displacement < 128) {
-        newLIR3(cUnit, THUMB_LDR_RRI5, rDest, rBase, displacement >> 2);
-    } else if (displacement < 4092) {
-        newLIR3(cUnit, THUMB2_LDR_RRI12, rDest, rFP, displacement);
-    } else {
-        loadConstant(cUnit, rDest, displacement);
-        if (allLowRegs) {
-            newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest);
-        } else {
-            assert(0); // Unimp - need Thumb2 ldr_rrr
-        }
+    switch (size) {
+        case WORD:
+            if (LOWREG(rDest) && (rBase == rpc) &&
+                (displacement <= 1020) && (displacement >= 0)) {
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_LDR_PC_REL;
+            } else if (LOWREG(rDest) && (rBase == r13) &&
+                      (displacement <= 1020) && (displacement >= 0)) {
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_LDR_SP_REL;
+            } else if (allLowRegs && displacement < 128 && displacement >= 0) {
+                assert((displacement & 0x3) == 0);
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_LDR_RRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opCode = THUMB2_LDR_RRI12;
+            }
+            break;
+        case UNSIGNED_HALF:
+            if (allLowRegs && displacement < 64 && displacement >= 0) {
+                assert((displacement & 0x1) == 0);
+                shortForm = true;
+                displacement >>= 1;
+                opCode = THUMB_LDRH_RRI5;
+            } else if (displacement < 4092 && displacement >= 0) {
+                shortForm = true;
+                opCode = THUMB2_LDRH_RRI12;
+            }
+            break;
+        case SIGNED_HALF:
+            if (thumb2Form) {
+                shortForm = true;
+                opCode = THUMB2_LDRSH_RRI12;
+            }
+            break;
+        case UNSIGNED_BYTE:
+            if (allLowRegs && displacement < 32 && displacement >= 0) {
+                shortForm = true;
+                opCode = THUMB_LDRB_RRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opCode = THUMB2_LDRB_RRI12;
+            }
+            break;
+        case SIGNED_BYTE:
+            if (thumb2Form) {
+                shortForm = true;
+                opCode = THUMB2_LDRSB_RRI12;
+            }
+            break;
+        default:
+            assert(0);
     }
+    if (nullCheck)
+        first = genNullCheck(cUnit, vReg, rBase, mir->offset, NULL);
+    if (shortForm) {
+        res = newLIR3(cUnit, opCode, rDest, rBase, displacement);
+    } else {
+        assert(rBase != rDest);
+        res = loadConstant(cUnit, rDest, displacement);
+        loadBaseIndexed(cUnit, rBase, rDest, rDest, 0, size);
+    }
+    return (first) ? first : res;
 }
 
-/* Store a value from rSrc to vDest */
-static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
-                       int rScratch)
+static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
+                             int displacement, int rSrc, OpSize size,
+                             int rScratch)
 {
-    killNullCheckedRegister(cUnit, vDest);
-    updateLiveRegister(cUnit, vDest, rSrc);
-
-    /* Use reg + imm5*4 to store the value if possible */
-    if (LOWREG(rSrc) && vDest <= 31) {
-        newLIR3(cUnit, THUMB_STR_RRI5, rSrc, rFP, vDest);
-    } else if (vDest <= 1023) {
-        newLIR3(cUnit, THUMB2_STR_RRI12, rSrc, rFP, vDest*4);
-    } else {
-        loadConstant(cUnit, rScratch, vDest*4);
-        if (LOWREG(rSrc)) {
-            newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch);
-        } else {
-            assert(0); // Unimp: Need generic str_rrr routine
-        }
+    ArmLIR *res;
+    ArmOpCode opCode = THUMB_BKPT;
+    bool shortForm = false;
+    bool thumb2Form = (displacement < 4092 && displacement >= 0);
+    int shortMax = 128;
+    bool allLowRegs = (LOWREG(rBase) && LOWREG(rSrc));
+    if (rScratch != -1)
+        allLowRegs &= LOWREG(rScratch);
+    switch (size) {
+        case WORD:
+            if (allLowRegs && displacement < 128 && displacement >= 0) {
+                assert((displacement & 0x3) == 0);
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_STR_RRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opCode = THUMB2_STR_RRI12;
+            }
+            break;
+        case UNSIGNED_HALF:
+        case SIGNED_HALF:
+            if (displacement < 64 && displacement >= 0) {
+                assert((displacement & 0x1) == 0);
+                shortForm = true;
+                displacement >>= 1;
+                opCode = THUMB_STRH_RRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opCode = THUMB2_STRH_RRI12;
+            }
+            break;
+        case UNSIGNED_BYTE:
+        case SIGNED_BYTE:
+            if (displacement < 32 && displacement >= 0) {
+                shortForm = true;
+                opCode = THUMB_STRB_RRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opCode = THUMB2_STRH_RRI12;
+            }
+            break;
+        default:
+            assert(0);
     }
+    if (shortForm) {
+        res = newLIR3(cUnit, opCode, rSrc, rBase, displacement);
+    } else {
+        assert(rScratch != -1);
+        res = loadConstant(cUnit, rScratch, displacement);
+        storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size);
+    }
+    return res;
 }
 
 /*
@@ -475,14 +598,574 @@
                                          ArmLIR *pcrLabel)
 {
     ArmLIR *branch;
+    int modImm;
     if ((LOWREG(reg)) && (checkValue == 0) &&
        ((cond == ARM_COND_EQ) || (cond == ARM_COND_NE))) {
         branch = newLIR2(cUnit,
                          (cond == ARM_COND_EQ) ? THUMB2_CBZ : THUMB2_CBNZ,
                          reg, 0);
     } else {
-        newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue);
+        modImm = modifiedImmediate(checkValue);
+        if ((checkValue & 0xff) == checkValue) {
+            newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue);
+        } else if (modImm >= 0) {
+            newLIR2(cUnit, THUMB2_CMP_RI8, reg, modImm);
+        } else {
+            /* Note: direct use of hot temp r7 here. Revisit. */
+            loadConstant(cUnit, r7, checkValue);
+            newLIR2(cUnit, THUMB_CMP_RR, reg, r7);
+        }
         branch = newLIR2(cUnit, THUMB_B_COND, 0, cond);
     }
     return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
 }
+
+static ArmLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask)
+{
+    ArmLIR *res;
+    if (LOWREG(rBase) && ((rMask & 0xff)==rMask)) {
+        res = newLIR2(cUnit, THUMB_LDMIA, rBase, rMask);
+    } else {
+        res = newLIR2(cUnit, THUMB2_LDMIA, rBase, rMask);
+    }
+    return res;
+}
+
+static ArmLIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask)
+{
+    ArmLIR *res;
+    if (LOWREG(rBase) && ((rMask & 0xff)==rMask)) {
+        res = newLIR2(cUnit, THUMB_STMIA, rBase, rMask);
+    } else {
+        res = newLIR2(cUnit, THUMB2_STMIA, rBase, rMask);
+    }
+    return res;
+}
+
+static ArmLIR *opNone(CompilationUnit *cUnit, OpKind op)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_UNCOND_BR:
+            opCode = THUMB_B_UNCOND;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR0(cUnit, opCode);
+}
+
+static ArmLIR *opImmImm(CompilationUnit *cUnit, OpKind op, int value1,
+                        int value2)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_COND_BR:
+            opCode = THUMB_B_COND;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR2(cUnit, opCode, value1, value2);
+}
+
+static ArmLIR *opImm(CompilationUnit *cUnit, OpKind op, int value)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_PUSH:
+            opCode = ((value & 0xff00) != 0) ? THUMB2_PUSH : THUMB_PUSH;
+            break;
+        case OP_POP:
+            opCode = ((value & 0xff00) != 0) ? THUMB2_POP : THUMB_POP;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR1(cUnit, opCode, value);
+}
+
+static ArmLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_BLX:
+            opCode = THUMB_BLX_R;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR1(cUnit, opCode, rDestSrc);
+}
+
+static ArmLIR *opRegRegShift(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int rSrc2, int shift)
+{
+    bool thumbForm = ((shift == 0) && LOWREG(rDestSrc1) && LOWREG(rSrc2));
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_ADC:
+            opCode = (thumbForm) ? THUMB_ADC : THUMB2_ADC_RRR;
+            break;
+        case OP_AND:
+            opCode = (thumbForm) ? THUMB_AND_RR : THUMB2_AND_RRR;
+            break;
+        case OP_BIC:
+            opCode = (thumbForm) ? THUMB_BIC : THUMB2_BIC_RRR;
+            break;
+        case OP_CMN:
+            assert(shift == 0);
+            opCode = (thumbForm) ? THUMB_CMN : THUMB2_CMN_RR;
+            break;
+        case OP_CMP:
+            if (thumbForm)
+                opCode = THUMB_CMP_RR;
+            else if ((shift == 0) && !LOWREG(rDestSrc1) && !LOWREG(rSrc2))
+                opCode = THUMB_CMP_HH;
+            else if ((shift == 0) && LOWREG(rDestSrc1))
+                opCode = THUMB_CMP_LH;
+            else if (shift == 0)
+                opCode = THUMB_CMP_HL;
+            if (shift == 0) {
+                rDestSrc1 &= THUMB_REG_MASK;
+                rSrc2 &= THUMB_REG_MASK;
+            } else {
+                opCode = THUMB2_CMP_RR;
+            }
+            break;
+        case OP_XOR:
+            opCode = (thumbForm) ? THUMB_EOR : THUMB2_EOR_RRR;
+            break;
+        case OP_MOV:
+            assert(shift == 0);
+            if (LOWREG(rDestSrc1) && LOWREG(rSrc2))
+                opCode = THUMB_MOV_RR;
+            else if (!LOWREG(rDestSrc1) && !LOWREG(rSrc2))
+                opCode = THUMB_MOV_RR_H2H;
+            else if (LOWREG(rDestSrc1))
+                opCode = THUMB_MOV_RR_H2L;
+            else
+                opCode = THUMB_MOV_RR_L2H;
+            rDestSrc1 &= THUMB_REG_MASK;
+            rSrc2 &= THUMB_REG_MASK;
+            break;
+        case OP_MUL:
+            assert(shift == 0);
+            opCode = (thumbForm) ? THUMB_MUL : THUMB2_MUL_RRR;
+            break;
+        case OP_MVN:
+            opCode = (thumbForm) ? THUMB_MVN : THUMB2_MVN_RR;
+            break;
+        case OP_NEG:
+            assert(shift == 0);
+            opCode = (thumbForm) ? THUMB_NEG : THUMB2_NEG_RR;
+            break;
+        case OP_OR:
+            opCode = (thumbForm) ? THUMB_ORR : THUMB2_ORR_RRR;
+            break;
+        case OP_SBC:
+            opCode = (thumbForm) ? THUMB_SBC : THUMB2_SBC_RRR;
+            break;
+        case OP_TST:
+            opCode = (thumbForm) ? THUMB_TST : THUMB2_TST_RR;
+            break;
+        case OP_LSL:
+            assert(shift == 0);
+            opCode = (thumbForm) ? THUMB_LSLV : THUMB2_LSLV_RRR;
+            break;
+        case OP_LSR:
+            assert(shift == 0);
+            opCode = (thumbForm) ? THUMB_LSRV : THUMB2_LSRV_RRR;
+            break;
+        case OP_ASR:
+            assert(shift == 0);
+            opCode = (thumbForm) ? THUMB_ASRV : THUMB2_ASRV_RRR;
+            break;
+        case OP_ROR:
+            assert(shift == 0);
+            opCode = (thumbForm) ? THUMB_RORV : THUMB2_RORV_RRR;
+            break;
+        case OP_ADD:
+            opCode = (thumbForm) ? THUMB_ADD_RRR : THUMB2_ADD_RRR;
+            break;
+        case OP_SUB:
+            opCode = (thumbForm) ? THUMB_SUB_RRR : THUMB2_SUB_RRR;
+            break;
+        case OP_2BYTE:
+            assert(shift == 0);
+            return newLIR4(cUnit, THUMB2_SBFX, rDestSrc1, rSrc2, 0, 8);
+        case OP_2SHORT:
+            assert(shift == 0);
+            return newLIR4(cUnit, THUMB2_SBFX, rDestSrc1, rSrc2, 0, 16);
+        case OP_2CHAR:
+            assert(shift == 0);
+            return newLIR4(cUnit, THUMB2_UBFX, rDestSrc1, rSrc2, 0, 16);
+        default:
+            assert(0);
+            break;
+    }
+    assert(opCode >= 0);
+    if (EncodingMap[opCode].flags & IS_BINARY_OP)
+        return newLIR2(cUnit, opCode, rDestSrc1, rSrc2);
+    else if (EncodingMap[opCode].flags & IS_TERTIARY_OP) {
+        if (EncodingMap[opCode].fieldLoc[2].kind == SHIFT)
+            return newLIR3(cUnit, opCode, rDestSrc1, rSrc2, shift);
+        else
+            return newLIR3(cUnit, opCode, rDestSrc1, rDestSrc1, rSrc2);
+    } else if (EncodingMap[opCode].flags & IS_QUAD_OP)
+        return newLIR4(cUnit, opCode, rDestSrc1, rDestSrc1, rSrc2, shift);
+    else {
+        assert(0);
+        return NULL;
+    }
+}
+
+static ArmLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int rSrc2)
+{
+    return opRegRegShift(cUnit, op, rDestSrc1, rSrc2, 0);
+}
+
+/* Handle Thumb-only variants here - otherwise punt to opRegRegImm */
+static ArmLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int value, int rScratch)
+{
+    ArmLIR *res;
+    bool neg = (value < 0);
+    int absValue = (neg) ? -value : value;
+    bool shortForm = (((absValue & 0xff) == absValue) && LOWREG(rDestSrc1));
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_ADD:
+            if ( !neg && (rDestSrc1 == 13) && (value <= 508)) { /* sp */
+                assert((value & 0x3) == 0);
+                return newLIR1(cUnit, THUMB_ADD_SPI7, value >> 2);
+            } else if (shortForm) {
+                opCode = (neg) ? THUMB_SUB_RI8 : THUMB_ADD_RI8;
+            }
+            break;
+        case OP_SUB:
+            if (!neg && (rDestSrc1 == 13) && (value <= 508)) { /* sp */
+                assert((value & 0x3) == 0);
+                return newLIR1(cUnit, THUMB_SUB_SPI7, value >> 2);
+            } else if (shortForm) {
+                opCode = (neg) ? THUMB_ADD_RI8 : THUMB_SUB_RI8;
+            }
+            break;
+        case OP_CMP:
+            if (LOWREG(rDestSrc1) && shortForm)
+                opCode = (shortForm) ?  THUMB_CMP_RI8 : THUMB_CMP_RR;
+            else if (LOWREG(rDestSrc1))
+                opCode = THUMB_CMP_RR;
+            else {
+                shortForm = false;
+                opCode = THUMB_CMP_HL;
+            }
+            break;
+        default:
+            /* Punt to opRegRegImm - if bad case catch it there */
+            shortForm = false;
+            break;
+    }
+    if (shortForm)
+        return newLIR2(cUnit, opCode, rDestSrc1, absValue);
+    else
+        return opRegRegImm(cUnit, op, rDestSrc1, rDestSrc1, value, rScratch);
+}
+
+static ArmLIR *opRegRegRegShift(CompilationUnit *cUnit, OpKind op,
+                                int rDest, int rSrc1, int rSrc2, int shift)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    bool thumbForm = (shift == 0) && LOWREG(rDest) && LOWREG(rSrc1) &&
+                      LOWREG(rSrc2);
+    switch (op) {
+        case OP_ADD:
+            opCode = (thumbForm) ? THUMB_ADD_RRR : THUMB2_ADD_RRR;
+            break;
+        case OP_SUB:
+            opCode = (thumbForm) ? THUMB_SUB_RRR : THUMB2_SUB_RRR;
+            break;
+        case OP_ADC:
+            opCode = THUMB2_ADC_RRR;
+            break;
+        case OP_AND:
+            opCode = THUMB2_AND_RRR;
+            break;
+        case OP_BIC:
+            opCode = THUMB2_BIC_RRR;
+            break;
+        case OP_XOR:
+            opCode = THUMB2_EOR_RRR;
+            break;
+        case OP_MUL:
+            assert(shift == 0);
+            opCode = THUMB2_MUL_RRR;
+            break;
+        case OP_OR:
+            opCode = THUMB2_ORR_RRR;
+            break;
+        case OP_SBC:
+            opCode = THUMB2_SBC_RRR;
+            break;
+        case OP_LSL:
+            assert(shift == 0);
+            opCode = THUMB2_LSLV_RRR;
+            break;
+        case OP_LSR:
+            assert(shift == 0);
+            opCode = THUMB2_LSRV_RRR;
+            break;
+        case OP_ASR:
+            assert(shift == 0);
+            opCode = THUMB2_ASRV_RRR;
+            break;
+        case OP_ROR:
+            assert(shift == 0);
+            opCode = THUMB2_RORV_RRR;
+            break;
+        default:
+            assert(0);
+            break;
+    }
+    assert(opCode >= 0);
+    if (EncodingMap[opCode].flags & IS_QUAD_OP)
+        return newLIR4(cUnit, opCode, rDest, rSrc1, rSrc2, shift);
+    else {
+        assert(EncodingMap[opCode].flags & IS_TERTIARY_OP);
+        return newLIR3(cUnit, opCode, rDest, rSrc1, rSrc2);
+    }
+}
+
+static ArmLIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int rSrc2)
+{
+    return opRegRegRegShift(cUnit, op, rDest, rSrc1, rSrc2, 0);
+}
+
+static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int value, int rScratch)
+{
+    ArmLIR *res;
+    bool neg = (value < 0);
+    int absValue = (neg) ? -value : value;
+    ArmOpCode opCode = THUMB_BKPT;
+    ArmOpCode altOpCode = THUMB_BKPT;
+    bool allLowRegs = (LOWREG(rDest) && LOWREG(rSrc1));
+    int modImm = modifiedImmediate(value);
+    int modImmNeg = modifiedImmediate(-value);
+
+    switch(op) {
+        case OP_LSL:
+            if (allLowRegs)
+                return newLIR3(cUnit, THUMB_LSL, rDest, rSrc1, value);
+            else
+                return newLIR3(cUnit, THUMB2_LSL_RRI5, rDest, rSrc1, value);
+        case OP_LSR:
+            if (allLowRegs)
+                return newLIR3(cUnit, THUMB_LSR, rDest, rSrc1, value);
+            else
+                return newLIR3(cUnit, THUMB2_LSR_RRI5, rDest, rSrc1, value);
+        case OP_ASR:
+            if (allLowRegs)
+                return newLIR3(cUnit, THUMB_ASR, rDest, rSrc1, value);
+            else
+                return newLIR3(cUnit, THUMB2_ASR_RRI5, rDest, rSrc1, value);
+        case OP_ROR:
+            return newLIR3(cUnit, THUMB2_ROR_RRI5, rDest, rSrc1, value);
+        case OP_ADD:
+            if (LOWREG(rDest) && (rSrc1 == 13) && (value <= 1020)) { /* sp */
+                assert((value & 0x3) == 0);
+                return newLIR3(cUnit, THUMB_ADD_SP_REL, rDest, rSrc1,
+                               value >> 2);
+            } else if (LOWREG(rDest) && (rSrc1 == rpc) && (value <= 1020)) {
+                assert((value & 0x3) == 0);
+                return newLIR3(cUnit, THUMB_ADD_PC_REL, rDest, rSrc1,
+                               value >> 2);
+            }
+            opCode = THUMB2_ADD_RRI8;
+            altOpCode = THUMB2_ADD_RRR;
+            // Note: intentional fallthrough
+        case OP_SUB:
+            if (allLowRegs && ((absValue & 0x7) == absValue)) {
+                if (op == OP_ADD)
+                    opCode = (neg) ? THUMB_SUB_RRI3 : THUMB_ADD_RRI3;
+                else
+                    opCode = (neg) ? THUMB_ADD_RRI3 : THUMB_SUB_RRI3;
+                return newLIR3(cUnit, opCode, rDest, rSrc1, absValue);
+            } else if ((absValue & 0xff) == absValue) {
+                if (op == OP_ADD)
+                    opCode = (neg) ? THUMB2_SUB_RRI12 : THUMB2_ADD_RRI12;
+                else
+                    opCode = (neg) ? THUMB2_ADD_RRI12 : THUMB2_SUB_RRI12;
+                return newLIR3(cUnit, opCode, rDest, rSrc1, absValue);
+            }
+            if (modImmNeg >= 0) {
+                op = (op == OP_ADD) ? OP_SUB : OP_ADD;
+                modImm = modImmNeg;
+            }
+            if (op == OP_SUB) {
+                opCode = THUMB2_SUB_RRI8;
+                altOpCode = THUMB2_SUB_RRR;
+            }
+            break;
+        case OP_ADC:
+            opCode = THUMB2_ADC_RRI8;
+            altOpCode = THUMB2_ADC_RRR;
+            break;
+        case OP_SBC:
+            opCode = THUMB2_SBC_RRI8;
+            altOpCode = THUMB2_SBC_RRR;
+            break;
+        case OP_OR:
+            opCode = THUMB2_ORR_RRI8;
+            altOpCode = THUMB2_ORR_RRR;
+            break;
+        case OP_AND:
+            opCode = THUMB2_AND_RRI8;
+            altOpCode = THUMB2_AND_RRR;
+            break;
+        case OP_XOR:
+            opCode = THUMB2_EOR_RRI8;
+            altOpCode = THUMB2_EOR_RRR;
+            break;
+        case OP_MUL:
+            //TUNING: power of 2, shift & add
+            modImm = -1;
+            altOpCode = THUMB2_MUL_RRR;
+            break;
+        default:
+            assert(0);
+    }
+
+    if (modImm >= 0) {
+        return newLIR3(cUnit, opCode, rDest, rSrc1, modImm);
+    } else {
+        loadConstant(cUnit, rScratch, value);
+        if (EncodingMap[opCode].flags & IS_QUAD_OP)
+            return newLIR4(cUnit, altOpCode, rDest, rSrc1, rScratch, 0);
+        else
+            return newLIR3(cUnit, altOpCode, rDest, rSrc1, rScratch);
+    }
+}
+
+//TODO: specialize the inlined routines for Thumb2
+static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int offset = offsetof(InterpState, retval);
+    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int reg1 = NEXT_REG(regObj);
+    loadValue(cUnit, dInsn->arg[0], regObj);
+    genNullCheck(cUnit, dInsn->arg[0], regObj, mir->offset, NULL);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, reg1);
+    storeWordDisp(cUnit, rGLUE, offset, reg1, regObj);
+    return false;
+}
+
+static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int offset = offsetof(InterpState, retval);
+    int contents = offsetof(ArrayObject, contents);
+    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int regIdx = NEXT_REG(regObj);
+    int regMax = NEXT_REG(regIdx);
+    int regOff = NEXT_REG(regMax);
+    loadValue(cUnit, dInsn->arg[0], regObj);
+    loadValue(cUnit, dInsn->arg[1], regIdx);
+    ArmLIR * pcrLabel = genNullCheck(cUnit, dInsn->arg[0], regObj,
+                                         mir->offset, NULL);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, regMax);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_offset, regOff);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_value, regObj);
+    genBoundsCheck(cUnit, regIdx, regMax, mir->offset, pcrLabel);
+
+    newLIR2(cUnit, THUMB_ADD_RI8, regObj, contents);
+    newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regOff);
+    newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regIdx);
+    newLIR3(cUnit, THUMB_LDRH_RRR, regMax, regObj, regIdx);
+    storeWordDisp(cUnit, rGLUE, offset, regMax, regObj);
+    return false;
+}
+
+static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int sign = NEXT_REG(reg0);
+    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    newLIR3(cUnit, THUMB_ASR, sign, reg0, 31);
+    newLIR3(cUnit, THUMB_ADD_RRR, reg0, reg0, sign);
+    newLIR2(cUnit, THUMB_EOR, reg0, sign);
+    storeWordDisp(cUnit, rGLUE, offset, reg0, sign);
+    return false;
+}
+
+static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int signMask = NEXT_REG(reg0);
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    loadConstant(cUnit, signMask, 0x7fffffff);
+    newLIR2(cUnit, THUMB_AND_RR, reg0, signMask);
+    storeWordDisp(cUnit, rGLUE, offset, reg0, signMask);
+    return false;
+}
+
+static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
+    int ophi = NEXT_REG(oplo);
+    int signMask = NEXT_REG(ophi);
+    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+    loadConstant(cUnit, signMask, 0x7fffffff);
+    storeWordDisp(cUnit, rGLUE, offset, oplo, ophi);
+    newLIR2(cUnit, THUMB_AND_RR, ophi, signMask);
+    storeWordDisp(cUnit, rGLUE, offset + 4, ophi, oplo);
+    return false;
+}
+
+ /* No select in thumb, so we need to branch.  Thumb2 will do better */
+static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int reg1 = NEXT_REG(reg0);
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    loadValue(cUnit, dInsn->arg[1], reg1);
+    newLIR2(cUnit, THUMB_CMP_RR, reg0, reg1);
+    ArmLIR *branch1 = newLIR2(cUnit, THUMB_B_COND, 2,
+           isMin ? ARM_COND_LT : ARM_COND_GT);
+    newLIR2(cUnit, THUMB_MOV_RR, reg0, reg1);
+    ArmLIR *target =
+        newLIR3(cUnit, THUMB_STR_RRI5, reg0, rGLUE, offset >> 2);
+    branch1->generic.target = (LIR *)target;
+    return false;
+}
+
+static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
+    int ophi = NEXT_REG(oplo);
+    int sign = NEXT_REG(ophi);
+    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
+    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+    newLIR3(cUnit, THUMB_ASR, sign, ophi, 31);
+    newLIR3(cUnit, THUMB_ADD_RRR, oplo, oplo, sign);
+    newLIR2(cUnit, THUMB_ADC, ophi, sign);
+    newLIR2(cUnit, THUMB_EOR, oplo, sign);
+    newLIR2(cUnit, THUMB_EOR, ophi, sign);
+    storeWordDisp(cUnit, rGLUE, offset, oplo, sign);
+    storeWordDisp(cUnit, rGLUE, offset + 4, ophi, sign);
+    return false;
+}
diff --git a/vm/compiler/codegen/arm/ThumbUtil.c b/vm/compiler/codegen/arm/ThumbUtil.c
index 8be50ad..cde1f71 100644
--- a/vm/compiler/codegen/arm/ThumbUtil.c
+++ b/vm/compiler/codegen/arm/ThumbUtil.c
@@ -16,46 +16,83 @@
 
 /*
  * This file contains codegen for the Thumb ISA and is intended to be
- * includes by:and support common to all supported
+ * includes by:
  *
  *        Codegen-$(TARGET_ARCH_VARIANT).c
  *
  */
 
 #include "Codegen.h"
+/* Forward decls */
+static ArmLIR *genNullCheck(CompilationUnit *cUnit, int vReg, int mReg,
+                            int dOffset, ArmLIR *pcrLabel);
+static ArmLIR *loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest);
+static ArmLIR *loadValue(CompilationUnit *cUnit, int vSrc, int rDest);
+static ArmLIR *loadWordDisp(CompilationUnit *cUnit, int rBase,
+                            int displacement, int rDest);
+static ArmLIR *storeWordDisp(CompilationUnit *cUnit, int rBase,
+                             int displacement, int rSrc, int rScratch);
+static ArmLIR *storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
+                          int rScratch);
+static ArmLIR *genConditionalBranch(CompilationUnit *cUnit,
+                                    ArmConditionCode cond,
+                                    ArmLIR *target);
+static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target);
+static ArmLIR *loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
+                             int rDestHi);
+static ArmLIR *storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
+                              int vDest, int rScratch);
+static ArmLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex,
+                              int rBound, int dOffset, ArmLIR *pcrLabel);
+static ArmLIR *genRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+
 
 /* Routines which must be supplied here */
-static void loadConstant(CompilationUnit *cUnit, int rDest, int value);
-static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr);
-static void genConditionalBranch(CompilationUnit *cUnit,
-                                 ArmConditionCode cond,
-                                 ArmLIR *target);
-static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target);
-static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
-                          int rDestHi);
-static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
-                           int vDest, int rScratch);
-static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int vDest);
-static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest);
-static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement,
-                         int rDest);
-static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
-                       int rScratch);
+static ArmLIR *loadConstant(CompilationUnit *cUnit, int rDest, int value);
+static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC,
+                           int rAddr);
+static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
+                            int displacement, int rDest, OpSize size,
+                            bool nullCheck, int vReg);
+static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
+                             int displacement, int rSrc, OpSize size,
+                             int rScratch);
 static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit,
-                                         ArmConditionCode cond, int reg,
-                                         int checkValue, int dOffset,
-                                         ArmLIR *pcrLabel);
+                                     ArmConditionCode cond, int reg,
+                                     int checkValue, int dOffset,
+                                     ArmLIR *pcrLabel);
 ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+static ArmLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask);
+static ArmLIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask);
 
-/*****************************************************************************/
+static ArmLIR *opNone(CompilationUnit *cUnit, OpKind op);
+static ArmLIR *opImm(CompilationUnit *cUnit, OpKind op, int value);
+static ArmLIR *opImmImm(CompilationUnit *cUnit, OpKind op, int value1,
+                        int value2);
+static ArmLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc);
+static ArmLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int rSrc2);
+static ArmLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int value, int rScratch);
+static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int value, int rScratch);
+static ArmLIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int rSrc2);
+static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
+                               int rIndex, int rDest, int scale, OpSize size);
+
+static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir);
+static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin);
+static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir);
 
 /*
  * Support for register allocation
  */
 
-/* non-existent register */
-#define vNone   (-1)
-
 /* get the next register in r0..r3 in a round-robin fashion */
 #define NEXT_REG(reg) ((reg + 1) & 3)
 /*
@@ -131,15 +168,25 @@
 
 }
 
-/*****************************************************************************/
-
 ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
 {
-    ArmLIR* res = dvmCompilerNew(sizeof(ArmLIR), true);
-    assert(LOWREG(rDest) && LOWREG(rSrc));
-    res->operands[0] = rDest;
-    res->operands[1] = rSrc;
-    res->opCode = THUMB_MOV_RR;
+    ArmLIR* res;
+    ArmOpCode opCode;
+    res = dvmCompilerNew(sizeof(ArmLIR), true);
+    if (LOWREG(rDest) && LOWREG(rSrc))
+        opCode = THUMB_MOV_RR;
+    else if (!LOWREG(rDest) && !LOWREG(rSrc))
+         opCode = THUMB_MOV_RR_H2H;
+    else if (LOWREG(rDest))
+         opCode = THUMB_MOV_RR_H2L;
+    else
+         opCode = THUMB_MOV_RR_L2H;
+    rDest &= THUMB_REG_MASK;
+    rSrc &= THUMB_REG_MASK;
+
+    res->operands[0] = rDest & THUMB_REG_MASK;
+    res->operands[1] = rSrc & THUMB_REG_MASK;
+    res->opCode = opCode;
     if (rDest == rSrc) {
         res->isNop = true;
     }
@@ -150,16 +197,16 @@
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool
  */
-static void loadConstant(CompilationUnit *cUnit, int rDest, int value)
+static ArmLIR *loadConstant(CompilationUnit *cUnit, int rDest, int value)
 {
+    ArmLIR *res;
     /* See if the value can be constructed cheaply */
     if ((value >= 0) && (value <= 255)) {
-        newLIR2(cUnit, THUMB_MOV_IMM, rDest, value);
-        return;
+        return newLIR2(cUnit, THUMB_MOV_IMM, rDest, value);
     } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) {
-        newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value);
+        res = newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value);
         newLIR2(cUnit, THUMB_MVN, rDest, rDest);
-        return;
+        return res;
     }
     /* No shortcut - go ahead and use literal pool */
     ArmLIR *dataTarget = scanLiteralPool(cUnit, value, 255);
@@ -170,6 +217,7 @@
     loadPcRel->opCode = THUMB_LDR_PC_REL;
     loadPcRel->generic.target = (LIR *) dataTarget;
     loadPcRel->operands[0] = rDest;
+    res = loadPcRel;
     dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
 
     /*
@@ -179,147 +227,208 @@
     if (dataTarget->operands[0] != value) {
         newLIR2(cUnit, THUMB_ADD_RI8, rDest, value - dataTarget->operands[0]);
     }
+    return res;
 }
 
 /* Export the Dalvik PC assicated with an instruction to the StackSave area */
-static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr)
+static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC,
+                           int rAddr)
 {
+    ArmLIR *res;
     int offset = offsetof(StackSaveArea, xtra.currentPc);
-    loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
+    res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
     newLIR2(cUnit, THUMB_MOV_RR, rAddr, rFP);
     newLIR2(cUnit, THUMB_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset);
-    newLIR3(cUnit, THUMB_STR_RRI5, rDPC, rAddr, 0);
+    storeWordDisp( cUnit, rAddr, 0, rDPC, -1);
+    return res;
 }
 
-/* Generate conditional branch instructions */
-static void genConditionalBranch(CompilationUnit *cUnit,
-                                 ArmConditionCode cond,
-                                 ArmLIR *target)
+/* Load value from base + scaled index. Note: index reg killed */
+static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
+                               int rIndex, int rDest, int scale, OpSize size)
 {
-    ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond);
-    branch->generic.target = (LIR *) target;
+    ArmLIR *first = NULL;
+    ArmLIR *res;
+    ArmOpCode opCode = THUMB_BKPT;
+    if (scale)
+        first = opRegRegImm(cUnit, OP_LSL, rIndex, rIndex, scale, rNone);
+    switch (size) {
+        case WORD:
+            opCode = THUMB_LDR_RRR;
+            break;
+        case UNSIGNED_HALF:
+            opCode = THUMB_LDRH_RRR;
+            break;
+        case SIGNED_HALF:
+            opCode = THUMB_LDRSH_RRR;
+            break;
+        case UNSIGNED_BYTE:
+            opCode = THUMB_LDRB_RRR;
+            break;
+        case SIGNED_BYTE:
+            opCode = THUMB_LDRSB_RRR;
+            break;
+        default:
+            assert(0);
+    }
+    res = newLIR3(cUnit, opCode, rDest, rBase, rIndex);
+    return (first) ? first : res;
 }
 
-/* Generate unconditional branch instructions */
-static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target)
+/* store value base base + scaled index. Note: index reg killed */
+static ArmLIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase,
+                                int rIndex, int rSrc, int scale, OpSize size)
 {
-    ArmLIR *branch = newLIR0(cUnit, THUMB_B_UNCOND);
-    branch->generic.target = (LIR *) target;
-    return branch;
+    ArmLIR *first = NULL;
+    ArmLIR *res;
+    ArmOpCode opCode = THUMB_BKPT;
+    if (scale)
+        first = opRegRegImm(cUnit, OP_LSL, rIndex, rIndex, scale, rNone);
+    switch (size) {
+        case WORD:
+            opCode = THUMB_STR_RRR;
+            break;
+        case UNSIGNED_HALF:
+        case SIGNED_HALF:
+            opCode = THUMB_STRH_RRR;
+            break;
+        case UNSIGNED_BYTE:
+        case SIGNED_BYTE:
+            opCode = THUMB_STRB_RRR;
+            break;
+        default:
+            assert(0);
+    }
+    res = newLIR3(cUnit, opCode, rSrc, rBase, rIndex);
+    return (first) ? first : res;
 }
 
 /*
- * Load a pair of values of rFP[src..src+1] and store them into rDestLo and
- * rDestHi
+ * Load value from base + displacement.  Optionally perform null check
+ * on base (which must have an associated vReg and MIR).  If not
+ * performing null check, incoming MIR can be null. Note: base and
+ * dest must not be the same if there is any chance that the long
+ * form must be used.
  */
-static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
-                          int rDestHi)
+static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
+                            int displacement, int rDest, OpSize size,
+                            bool nullCheck, int vReg)
 {
-    /* Use reg + imm5*4 to load the values if possible */
-    if (vSrc <= 30) {
-        newLIR3(cUnit, THUMB_LDR_RRI5, rDestLo, rFP, vSrc);
-        newLIR3(cUnit, THUMB_LDR_RRI5, rDestHi, rFP, vSrc+1);
-    } else {
-        if (vSrc <= 64) {
-            /* Sneak 4 into the base address first */
-            newLIR3(cUnit, THUMB_ADD_RRI3, rDestLo, rFP, 4);
-            newLIR2(cUnit, THUMB_ADD_RI8, rDestLo, (vSrc-1)*4);
-        } else {
-            /* Offset too far from rFP */
-            loadConstant(cUnit, rDestLo, vSrc*4);
-            newLIR3(cUnit, THUMB_ADD_RRR, rDestLo, rFP, rDestLo);
-        }
-        assert(rDestLo < rDestHi);
-        newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi)));
+    ArmLIR *first = NULL;
+    ArmLIR *res;
+    ArmOpCode opCode = THUMB_BKPT;
+    bool shortForm = false;
+    int shortMax = 128;
+    switch (size) {
+        case WORD:
+            if (LOWREG(rDest) && (rBase == rpc) &&
+                (displacement <= 1020) && (displacement >= 0)) {
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_LDR_PC_REL;
+            } else if (LOWREG(rDest) && (rBase == r13) &&
+                      (displacement <= 1020) && (displacement >= 0)) {
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_LDR_SP_REL;
+            } else if (displacement < 128 && displacement >= 0) {
+                assert((displacement & 0x3) == 0);
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_LDR_RRI5;
+            } else {
+                opCode = THUMB_LDR_RRR;
+            }
+            break;
+        case UNSIGNED_HALF:
+            if (displacement < 64 && displacement >= 0) {
+                assert((displacement & 0x1) == 0);
+                shortForm = true;
+                displacement >>= 1;
+                opCode = THUMB_LDRH_RRI5;
+            } else {
+                opCode = THUMB_LDRH_RRR;
+            }
+            break;
+        case SIGNED_HALF:
+            opCode = THUMB_LDRSH_RRR;
+            break;
+        case UNSIGNED_BYTE:
+            if (displacement < 32 && displacement >= 0) {
+                shortForm = true;
+                opCode = THUMB_LDRB_RRI5;
+            } else {
+                opCode = THUMB_LDRB_RRR;
+            }
+            break;
+        case SIGNED_BYTE:
+            opCode = THUMB_LDRSB_RRR;
+            break;
+        default:
+            assert(0);
     }
+    if (nullCheck)
+        first = genNullCheck(cUnit, vReg, rBase, mir->offset, NULL);
+    if (shortForm) {
+        res = newLIR3(cUnit, opCode, rDest, rBase, displacement);
+    } else {
+        assert(rBase != rDest);
+        res = loadConstant(cUnit, rDest, displacement);
+        newLIR3(cUnit, opCode, rDest, rBase, rDest);
+    }
+    return (first) ? first : res;
 }
 
-/*
- * Store a pair of values of rSrc and rSrc+1 and store them into vDest and
- * vDest+1
- */
-static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
-                           int vDest, int rScratch)
+static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
+                             int displacement, int rSrc, OpSize size,
+                             int rScratch)
 {
-    killNullCheckedRegister(cUnit, vDest);
-    killNullCheckedRegister(cUnit, vDest+1);
-    updateLiveRegisterPair(cUnit, vDest, rSrcLo, rSrcHi);
-
-    /* Use reg + imm5*4 to store the values if possible */
-    if (vDest <= 30) {
-        newLIR3(cUnit, THUMB_STR_RRI5, rSrcLo, rFP, vDest);
-        newLIR3(cUnit, THUMB_STR_RRI5, rSrcHi, rFP, vDest+1);
-    } else {
-        if (vDest <= 64) {
-            /* Sneak 4 into the base address first */
-            newLIR3(cUnit, THUMB_ADD_RRI3, rScratch, rFP, 4);
-            newLIR2(cUnit, THUMB_ADD_RI8, rScratch, (vDest-1)*4);
-        } else {
-            /* Offset too far from rFP */
-            loadConstant(cUnit, rScratch, vDest*4);
-            newLIR3(cUnit, THUMB_ADD_RRR, rScratch, rFP, rScratch);
-        }
-        assert(rSrcLo < rSrcHi);
-        newLIR2(cUnit, THUMB_STMIA, rScratch, (1<<rSrcLo) | (1 << (rSrcHi)));
+    ArmLIR *res;
+    ArmOpCode opCode = THUMB_BKPT;
+    bool shortForm = false;
+    int shortMax = 128;
+    switch (size) {
+        case WORD:
+            if (displacement < 128 && displacement >= 0) {
+                assert((displacement & 0x3) == 0);
+                shortForm = true;
+                displacement >>= 2;
+                opCode = THUMB_STR_RRI5;
+            } else {
+                opCode = THUMB_STR_RRR;
+            }
+            break;
+        case UNSIGNED_HALF:
+        case SIGNED_HALF:
+            if (displacement < 64 && displacement >= 0) {
+                assert((displacement & 0x1) == 0);
+                shortForm = true;
+                displacement >>= 1;
+                opCode = THUMB_STRH_RRI5;
+            } else {
+                opCode = THUMB_STRH_RRR;
+            }
+            break;
+        case UNSIGNED_BYTE:
+        case SIGNED_BYTE:
+            if (displacement < 32 && displacement >= 0) {
+                shortForm = true;
+                opCode = THUMB_STRB_RRI5;
+            } else {
+                opCode = THUMB_STRB_RRR;
+            }
+            break;
+        default:
+            assert(0);
     }
-}
-
-/* Load the address of a Dalvik register on the frame */
-static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
-{
-    /* RRI3 can add up to 7 */
-    if (vSrc <= 1) {
-        newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rFP, vSrc*4);
-    } else if (vSrc <= 64) {
-        /* Sneak 4 into the base address first */
-        newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rFP, 4);
-        newLIR2(cUnit, THUMB_ADD_RI8, rDest, (vSrc-1)*4);
+    if (shortForm) {
+        res = newLIR3(cUnit, opCode, rSrc, rBase, displacement);
     } else {
-        loadConstant(cUnit, rDest, vSrc*4);
-        newLIR3(cUnit, THUMB_ADD_RRR, rDest, rFP, rDest);
+        assert(rScratch != -1);
+        res = loadConstant(cUnit, rScratch, displacement);
+        newLIR3(cUnit, opCode, rSrc, rBase, rScratch);
     }
-}
-
-/* Load a single value from rFP[src] and store them into rDest */
-static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest)
-{
-    /* Use reg + imm5*4 to load the value if possible */
-    if (vSrc <= 31) {
-        newLIR3(cUnit, THUMB_LDR_RRI5, rDest, rFP, vSrc);
-    } else {
-        loadConstant(cUnit, rDest, vSrc*4);
-        newLIR3(cUnit, THUMB_LDR_RRR, rDest, rFP, rDest);
-    }
-}
-
-/* Load a word at base + displacement.  Displacement must be word multiple */
-static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement,
-                         int rDest)
-{
-    assert((displacement & 0x3) == 0);
-    /* Can it fit in a RRI5? */
-    if (displacement < 128) {
-        newLIR3(cUnit, THUMB_LDR_RRI5, rDest, rBase, displacement >> 2);
-    } else {
-        loadConstant(cUnit, rDest, displacement);
-        newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest);
-    }
-}
-
-/* Store a value from rSrc to vDest */
-static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
-                       int rScratch)
-{
-    killNullCheckedRegister(cUnit, vDest);
-    updateLiveRegister(cUnit, vDest, rSrc);
-
-    /* Use reg + imm5*4 to store the value if possible */
-    if (vDest <= 31) {
-        newLIR3(cUnit, THUMB_STR_RRI5, rSrc, rFP, vDest);
-    } else {
-        loadConstant(cUnit, rScratch, vDest*4);
-        newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch);
-    }
+    return res;
 }
 
 /*
@@ -331,7 +440,439 @@
                                          int checkValue, int dOffset,
                                          ArmLIR *pcrLabel)
 {
+    assert((checkValue & 0xff) == checkValue);
     newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue);
     ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond);
     return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
 }
+
+static ArmLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask)
+{
+    return newLIR2(cUnit, THUMB_LDMIA, rBase, rMask);
+}
+
+static ArmLIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask)
+{
+    return newLIR2(cUnit, THUMB_STMIA, rBase, rMask);
+}
+
+static ArmLIR *opNone(CompilationUnit *cUnit, OpKind op)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_UNCOND_BR:
+            opCode = THUMB_B_UNCOND;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR0(cUnit, opCode);
+}
+
+static ArmLIR *opImmImm(CompilationUnit *cUnit, OpKind op, int value1,
+                        int value2)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_COND_BR:
+            opCode = THUMB_B_COND;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR2(cUnit, opCode, value1, value2);
+}
+
+static ArmLIR *opImm(CompilationUnit *cUnit, OpKind op, int value)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_PUSH:
+            opCode = THUMB_PUSH;
+            break;
+        case OP_POP:
+            opCode = THUMB_POP;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR1(cUnit, opCode, value);
+}
+
+static ArmLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_BLX:
+            opCode = THUMB_BLX_R;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR1(cUnit, opCode, rDestSrc);
+}
+
+static ArmLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int rSrc2)
+{
+    ArmLIR *res;
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_ADC:
+            opCode = THUMB_ADC;
+            break;
+        case OP_AND:
+            opCode = THUMB_AND_RR;
+            break;
+        case OP_BIC:
+            opCode = THUMB_BIC;
+            break;
+        case OP_CMN:
+            opCode = THUMB_CMN;
+            break;
+        case OP_CMP:
+            opCode = THUMB_CMP_RR;
+            break;
+        case OP_XOR:
+            opCode = THUMB_EOR;
+            break;
+        case OP_MOV:
+            if (LOWREG(rDestSrc1) && LOWREG(rSrc2))
+                opCode = THUMB_MOV_RR;
+            else if (!LOWREG(rDestSrc1) && !LOWREG(rSrc2))
+                opCode = THUMB_MOV_RR_H2H;
+            else if (LOWREG(rDestSrc1))
+                opCode = THUMB_MOV_RR_H2L;
+            else
+                opCode = THUMB_MOV_RR_L2H;
+            rDestSrc1 &= THUMB_REG_MASK;
+            rSrc2 &= THUMB_REG_MASK;
+            break;
+        case OP_MUL:
+            opCode = THUMB_MUL;
+            break;
+        case OP_MVN:
+            opCode = THUMB_MVN;
+            break;
+        case OP_NEG:
+            opCode = THUMB_NEG;
+            break;
+        case OP_OR:
+            opCode = THUMB_ORR;
+            break;
+        case OP_SBC:
+            opCode = THUMB_SBC;
+            break;
+        case OP_TST:
+            opCode = THUMB_TST;
+            break;
+        case OP_LSL:
+            opCode = THUMB_LSLV;
+            break;
+        case OP_LSR:
+            opCode = THUMB_LSRV;
+            break;
+        case OP_ASR:
+            opCode = THUMB_ASRV;
+            break;
+        case OP_ROR:
+            opCode = THUMB_RORV;
+        case OP_ADD:
+        case OP_SUB:
+            return opRegRegReg(cUnit, op, rDestSrc1, rDestSrc1, rSrc2);
+        case OP_2BYTE:
+             res = opRegRegImm(cUnit, OP_LSL, rDestSrc1, rSrc2, 24, rNone);
+             opRegRegImm(cUnit, OP_ASR, rDestSrc1, rDestSrc1, 24, rNone);
+             return res;
+        case OP_2SHORT:
+             res = opRegRegImm(cUnit, OP_LSL, rDestSrc1, rSrc2, 16, rNone);
+             opRegRegImm(cUnit, OP_ASR, rDestSrc1, rDestSrc1, 16, rNone);
+             return res;
+        case OP_2CHAR:
+             res = opRegRegImm(cUnit, OP_LSL, rDestSrc1, rSrc2, 16, rNone);
+             opRegRegImm(cUnit, OP_LSR, rDestSrc1, rDestSrc1, 16, rNone);
+             return res;
+        default:
+            assert(0);
+            break;
+    }
+    return newLIR2(cUnit, opCode, rDestSrc1, rSrc2);
+}
+
+static ArmLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                        int value, int rScratch)
+{
+    ArmLIR *res;
+    bool neg = (value < 0);
+    int absValue = (neg) ? -value : value;
+    bool shortForm = (absValue & 0xff) == absValue;
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_ADD:
+            if ( !neg && (rDestSrc1 == 13) && (value <= 508)) { /* sp */
+                assert((value & 0x3) == 0);
+                return newLIR1(cUnit, THUMB_ADD_SPI7, value >> 2);
+            } else if (shortForm) {
+                opCode = (neg) ? THUMB_SUB_RI8 : THUMB_ADD_RI8;
+            } else
+                opCode = THUMB_ADD_RRR;
+            break;
+        case OP_SUB:
+            if (!neg && (rDestSrc1 == 13) && (value <= 508)) { /* sp */
+                assert((value & 0x3) == 0);
+                return newLIR1(cUnit, THUMB_SUB_SPI7, value >> 2);
+            } else if (shortForm) {
+                opCode = (neg) ? THUMB_ADD_RI8 : THUMB_SUB_RI8;
+            } else
+                opCode = THUMB_SUB_RRR;
+            break;
+        case OP_CMP:
+            if (LOWREG(rDestSrc1) && shortForm)
+                opCode = (shortForm) ?  THUMB_CMP_RI8 : THUMB_CMP_RR;
+            else if (LOWREG(rDestSrc1))
+                opCode = THUMB_CMP_RR;
+            else {
+                shortForm = false;
+                opCode = THUMB_CMP_HL;
+            }
+            break;
+        default:
+            assert(0);
+            break;
+    }
+    if (shortForm)
+        res = newLIR2(cUnit, opCode, rDestSrc1, absValue);
+    else {
+        assert(rScratch != rNone);
+        res = loadConstant(cUnit, rScratch, value);
+        newLIR3(cUnit, opCode, rDestSrc1, rDestSrc1, rScratch);
+    }
+    return res;
+}
+
+static ArmLIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int rSrc2)
+{
+    ArmOpCode opCode = THUMB_BKPT;
+    switch (op) {
+        case OP_ADD:
+            opCode = THUMB_ADD_RRR;
+            break;
+        case OP_SUB:
+            opCode = THUMB_SUB_RRR;
+            break;
+        default:
+            assert(0);
+            break;
+    }
+    return newLIR3(cUnit, opCode, rDest, rSrc1, rSrc2);
+}
+
+static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
+                           int rSrc1, int value, int rScratch)
+{
+    ArmLIR *res;
+    bool neg = (value < 0);
+    int absValue = (neg) ? -value : value;
+    ArmOpCode opCode = THUMB_BKPT;
+    bool shortForm = (absValue & 0x7) == absValue;
+    switch(op) {
+        case OP_ADD:
+            if ((rSrc1 == 13) && (value <= 1020)) { /* sp */
+                assert((value & 0x3) == 0);
+                shortForm = true;
+                opCode = THUMB_ADD_SP_REL;
+                value >>= 2;
+            } else if ((rSrc1 == 15) && (value <= 1020)) { /* pc */
+                assert((value & 0x3) == 0);
+                shortForm = true;
+                opCode = THUMB_ADD_PC_REL;
+                value >>= 2;
+            } else if (shortForm) {
+                opCode = (neg) ? THUMB_SUB_RRI3 : THUMB_ADD_RRI3;
+            } else if ((absValue > 0) && (absValue <= (255 + 7))) {
+                /* Two shots - 1st handle the 7 */
+                opCode = (neg) ? THUMB_SUB_RRI3 : THUMB_ADD_RRI3;
+                res = newLIR3(cUnit, opCode, rDest, rSrc1, 7);
+                opCode = (neg) ? THUMB_SUB_RI8 : THUMB_ADD_RI8;
+                newLIR2(cUnit, opCode, rDest, absValue - 7);
+                return res;
+            } else
+                opCode = THUMB_ADD_RRR;
+            break;
+
+        case OP_SUB:
+            if (shortForm) {
+                opCode = (neg) ? THUMB_ADD_RRI3 : THUMB_SUB_RRI3;
+            } else if ((absValue > 0) && (absValue <= (255 + 7))) {
+                /* Two shots - 1st handle the 7 */
+                opCode = (neg) ? THUMB_ADD_RRI3 : THUMB_SUB_RRI3;
+                res = newLIR3(cUnit, opCode, rDest, rSrc1, 7);
+                opCode = (neg) ? THUMB_ADD_RI8 : THUMB_SUB_RI8;
+                newLIR2(cUnit, opCode, rDest, absValue - 7);
+                return res;
+            } else
+                opCode = THUMB_SUB_RRR;
+            break;
+        case OP_LSL:
+                shortForm = (!neg && value <= 31);
+                opCode = THUMB_LSL;
+                break;
+        case OP_LSR:
+                shortForm = (!neg && value <= 31);
+                opCode = THUMB_LSR;
+                break;
+        case OP_ASR:
+                shortForm = (!neg && value <= 31);
+                opCode = THUMB_ASR;
+                break;
+        case OP_MUL:
+        case OP_AND:
+        case OP_OR:
+        case OP_XOR:
+                if (rDest == rSrc1) {
+                    res = loadConstant(cUnit, rScratch, value);
+                    opRegReg(cUnit, op, rDest, rScratch);
+                } else {
+                    res = loadConstant(cUnit, rDest, value);
+                    opRegReg(cUnit, op, rDest, rSrc1);
+                }
+                return res;
+        default:
+            assert(0);
+            break;
+    }
+    if (shortForm)
+        res = newLIR3(cUnit, opCode, rDest, rSrc1, absValue);
+    else {
+        if (rDest != rSrc1) {
+            res = loadConstant(cUnit, rDest, value);
+            newLIR3(cUnit, opCode, rDest, rSrc1, rDest);
+        } else {
+            assert(rScratch != rNone);
+            res = loadConstant(cUnit, rScratch, value);
+            newLIR3(cUnit, opCode, rDest, rSrc1, rScratch);
+        }
+    }
+    return res;
+}
+
+static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int offset = offsetof(InterpState, retval);
+    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int reg1 = NEXT_REG(regObj);
+    loadValue(cUnit, dInsn->arg[0], regObj);
+    genNullCheck(cUnit, dInsn->arg[0], regObj, mir->offset, NULL);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, reg1);
+    storeWordDisp(cUnit, rGLUE, offset, reg1, regObj);
+    return false;
+}
+
+static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int offset = offsetof(InterpState, retval);
+    int contents = offsetof(ArrayObject, contents);
+    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int regIdx = NEXT_REG(regObj);
+    int regMax = NEXT_REG(regIdx);
+    int regOff = NEXT_REG(regMax);
+    loadValue(cUnit, dInsn->arg[0], regObj);
+    loadValue(cUnit, dInsn->arg[1], regIdx);
+    ArmLIR * pcrLabel = genNullCheck(cUnit, dInsn->arg[0], regObj,
+                                         mir->offset, NULL);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, regMax);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_offset, regOff);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_value, regObj);
+    genBoundsCheck(cUnit, regIdx, regMax, mir->offset, pcrLabel);
+
+    newLIR2(cUnit, THUMB_ADD_RI8, regObj, contents);
+    newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regOff);
+    newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regIdx);
+    newLIR3(cUnit, THUMB_LDRH_RRR, regMax, regObj, regIdx);
+    storeWordDisp(cUnit, rGLUE, offset, regMax, regObj);
+    return false;
+}
+
+static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int sign = NEXT_REG(reg0);
+    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    newLIR3(cUnit, THUMB_ASR, sign, reg0, 31);
+    newLIR3(cUnit, THUMB_ADD_RRR, reg0, reg0, sign);
+    newLIR2(cUnit, THUMB_EOR, reg0, sign);
+    storeWordDisp(cUnit, rGLUE, offset, reg0, sign);
+    return false;
+}
+
+static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int signMask = NEXT_REG(reg0);
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    loadConstant(cUnit, signMask, 0x7fffffff);
+    newLIR2(cUnit, THUMB_AND_RR, reg0, signMask);
+    storeWordDisp(cUnit, rGLUE, offset, reg0, signMask);
+    return false;
+}
+
+static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
+    int ophi = NEXT_REG(oplo);
+    int signMask = NEXT_REG(ophi);
+    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+    loadConstant(cUnit, signMask, 0x7fffffff);
+    storeWordDisp(cUnit, rGLUE, offset, oplo, ophi);
+    newLIR2(cUnit, THUMB_AND_RR, ophi, signMask);
+    storeWordDisp(cUnit, rGLUE, offset + 4, ophi, oplo);
+    return false;
+}
+
+ /* No select in thumb, so we need to branch.  Thumb2 will do better */
+static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int reg1 = NEXT_REG(reg0);
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    loadValue(cUnit, dInsn->arg[1], reg1);
+    newLIR2(cUnit, THUMB_CMP_RR, reg0, reg1);
+    ArmLIR *branch1 = newLIR2(cUnit, THUMB_B_COND, 2,
+           isMin ? ARM_COND_LT : ARM_COND_GT);
+    newLIR2(cUnit, THUMB_MOV_RR, reg0, reg1);
+    ArmLIR *target =
+        newLIR3(cUnit, THUMB_STR_RRI5, reg0, rGLUE, offset >> 2);
+    branch1->generic.target = (LIR *)target;
+    return false;
+}
+
+static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
+    int ophi = NEXT_REG(oplo);
+    int sign = NEXT_REG(ophi);
+    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
+    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+    newLIR3(cUnit, THUMB_ASR, sign, ophi, 31);
+    newLIR3(cUnit, THUMB_ADD_RRR, oplo, oplo, sign);
+    newLIR2(cUnit, THUMB_ADC, ophi, sign);
+    newLIR2(cUnit, THUMB_EOR, oplo, sign);
+    newLIR2(cUnit, THUMB_EOR, ophi, sign);
+    storeWordDisp(cUnit, rGLUE, offset, oplo, sign);
+    storeWordDisp(cUnit, rGLUE, offset + 4, ophi, sign);
+    return false;
+}
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index 92097af..f9f2c10 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -15,8 +15,6 @@
  */
 
 
-static void loadFloat(CompilationUnit *cUnit, int vSrc, int rDest);
-
 /*
  * This file is included by Codegen-armv5te-vfp.c, and implements architecture
  * variant-specific code.
@@ -121,7 +119,7 @@
     int vSrc = mir->dalvikInsn.vA;
     loadDouble(cUnit, vSrc, dr1);
     newLIR2(cUnit, THUMB2_VSQRTD, dr0, dr1);
-    assert(offset & 0x3 == 0);  /* Must be word aligned */
+    assert((offset & 0x3) == 0);  /* Must be word aligned */
     assert(offset < 1024);
     newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
     return true;