Register promotion fix

Restructured the type inference mechanism, added lots of DCHECKS,
bumped the default memory allocation size to reflect AOT
compilation and tweaked the bit vector manipulation routines
to be better at handling large sparse vectors (something the old
trace JIT didn't encounter enough to care).

With this CL, optimization is back on by default.  Should also see
a significant boost in compilation speed (~2x better for boot.oat).

Change-Id: Ifd134ef337be173a1be756bb9198b24c5b4936b3
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index dac63cf..5be48ab 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -32,23 +32,30 @@
     kLocSpill,
 } RegLocationType;
 
+typedef struct PromotionMap {
+   RegLocationType coreLocation:3;
+   u1 coreReg;
+   RegLocationType fpLocation:3;
+   u1 fpReg;
+   bool firstInPair;
+} PromotionMap;
+
 typedef struct RegLocation {
-    RegLocationType location:2;
+    RegLocationType location:3;
     unsigned wide:1;
-    unsigned fp:1;      // Hint for float/double
-    u1 lowReg:6;        // First physical register
-    u1 highReg:6;       // 2nd physical register (if wide)
-    s2 sRegLow;         // SSA name for low Dalvik word
-    unsigned home:1;    // Does this represent the home location?
-    RegLocationType fpLocation:2; // Used only for non-SSA loc records
-    u1 fpLowReg:6;                // Used only for non-SSA loc records
-    u1 fpHighReg:6;               // Used only for non-SSA loc records
-    int spOffset:17;
+    unsigned defined:1;   // Do we know the type?
+    unsigned fp:1;        // Floating point?
+    unsigned core:1;      // Non-floating point?
+    unsigned highWord:1;  // High word of pair?
+    unsigned home:1;      // Does this represent the home location?
+    u1 lowReg;            // First physical register
+    u1 highReg;           // 2nd physical register (if wide)
+    s2 sRegLow;           // SSA name for low Dalvik word
 } RegLocation;
 
 #define INVALID_SREG (-1)
 #define INVALID_VREG (0xFFFFU)
-#define INVALID_REG (0x3F)
+#define INVALID_REG (0xFF)
 #define INVALID_OFFSET (-1)
 
 typedef enum BBType {
@@ -233,6 +240,9 @@
     RegLocation* regLocation;
     int sequenceNumber;
 
+    /* Keep track of Dalvik vReg to physical register mappings */
+    PromotionMap* promotionMap;
+
     /*
      * Set to the Dalvik PC of the switch instruction if it has more than
      * MAX_CHAINED_SWITCH_CASES cases.
diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h
index c0dcaf7..0a7d079 100644
--- a/src/compiler/CompilerUtility.h
+++ b/src/compiler/CompilerUtility.h
@@ -19,8 +19,8 @@
 
 #include "Dalvik.h"
 
-/* Each arena page has some overhead, so take a few bytes off 8k */
-#define ARENA_DEFAULT_SIZE 8100
+/* Each arena page has some overhead, so take a few bytes off */
+#define ARENA_DEFAULT_SIZE ((256 * 1024) - 256)
 
 /* Allocate the initial memory block for arena-based allocation */
 bool oatHeapInit(void);
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 09a362e..e73ccc9 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -50,13 +50,13 @@
     DF_DA_WIDE | DF_UB_WIDE | DF_IS_MOVE,
 
     // 07 OP_MOVE_OBJECT vA, vB
-    DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE,
+    DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B,
 
     // 08 OP_MOVE_OBJECT_FROM16 vAA, vBBBB
-    DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE,
+    DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B,
 
     // 09 OP_MOVE_OBJECT_16 vAAAA, vBBBB
-    DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE,
+    DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B,
 
     // 0A OP_MOVE_RESULT vAA
     DF_DA,
@@ -65,10 +65,10 @@
     DF_DA_WIDE,
 
     // 0C OP_MOVE_RESULT_OBJECT vAA
-    DF_DA,
+    DF_DA | DF_CORE_A,
 
     // 0D OP_MOVE_EXCEPTION vAA
-    DF_DA,
+    DF_DA | DF_CORE_A,
 
     // 0E OP_RETURN_VOID
     DF_NOP,
@@ -80,7 +80,7 @@
     DF_UA_WIDE,
 
     // 11 OP_RETURN_OBJECT vAA
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 12 OP_CONST_4 vA, #+B
     DF_DA | DF_SETS_CONST,
@@ -107,34 +107,34 @@
     DF_DA_WIDE | DF_SETS_CONST,
 
     // 1A OP_CONST_STRING vAA, string@BBBB
-    DF_DA,
+    DF_DA | DF_CORE_A,
 
     // 1B OP_CONST_STRING_JUMBO vAA, string@BBBBBBBB
-    DF_DA,
+    DF_DA | DF_CORE_A,
 
     // 1C OP_CONST_CLASS vAA, type@BBBB
-    DF_DA,
+    DF_DA | DF_CORE_A,
 
     // 1D OP_MONITOR_ENTER vAA
-    DF_UA | DF_NULL_CHK_0,
+    DF_UA | DF_NULL_CHK_0 | DF_CORE_A,
 
     // 1E OP_MONITOR_EXIT vAA
-    DF_UA | DF_NULL_CHK_0,
+    DF_UA | DF_NULL_CHK_0 | DF_CORE_A,
 
     // 1F OP_CHK_CAST vAA, type@BBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 20 OP_INSTANCE_OF vA, vB, type@CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 21 OP_ARRAY_LENGTH vA, vB
-    DF_DA | DF_UB | DF_NULL_CHK_0,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_A | DF_CORE_B,
 
     // 22 OP_NEW_INSTANCE vAA, type@BBBB
-    DF_DA | DF_NON_NULL_DST,
+    DF_DA | DF_NON_NULL_DST | DF_CORE_A,
 
     // 23 OP_NEW_ARRAY vA, vB, type@CCCC
-    DF_DA | DF_UB | DF_NON_NULL_DST,
+    DF_DA | DF_UB | DF_NON_NULL_DST | DF_CORE_A | DF_CORE_B,
 
     // 24 OP_FILLED_NEW_ARRAY {vD, vE, vF, vG, vA}
     DF_FORMAT_35C | DF_NON_NULL_RET,
@@ -143,10 +143,10 @@
     DF_FORMAT_3RC | DF_NON_NULL_RET,
 
     // 26 OP_FILL_ARRAY_DATA vAA, +BBBBBBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 27 OP_THROW vAA
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 28 OP_GOTO
     DF_NOP,
@@ -164,56 +164,56 @@
     DF_UA,
 
     // 2D OP_CMPL_FLOAT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C,
+    DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A,
 
     // 2E OP_CMPG_FLOAT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C,
+    DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A,
 
     // 2F OP_CMPL_DOUBLE vAA, vBB, vCC
-    DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C,
+    DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A,
 
     // 30 OP_CMPG_DOUBLE vAA, vBB, vCC
-    DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C,
+    DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A,
 
     // 31 OP_CMP_LONG vAA, vBB, vCC
-    DF_DA | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 32 OP_IF_EQ vA, vB, +CCCC
-    DF_UA | DF_UB,
+    DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 33 OP_IF_NE vA, vB, +CCCC
-    DF_UA | DF_UB,
+    DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 34 OP_IF_LT vA, vB, +CCCC
-    DF_UA | DF_UB,
+    DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 35 OP_IF_GE vA, vB, +CCCC
-    DF_UA | DF_UB,
+    DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 36 OP_IF_GT vA, vB, +CCCC
-    DF_UA | DF_UB,
+    DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 37 OP_IF_LE vA, vB, +CCCC
-    DF_UA | DF_UB,
+    DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
 
     // 38 OP_IF_EQZ vAA, +BBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 39 OP_IF_NEZ vAA, +BBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 3A OP_IF_LTZ vAA, +BBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 3B OP_IF_GEZ vAA, +BBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 3C OP_IF_GTZ vAA, +BBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 3D OP_IF_LEZ vAA, +BBBB
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // 3E OP_UNUSED_3E
     DF_NOP,
@@ -234,88 +234,88 @@
     DF_NOP,
 
     // 44 OP_AGET vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C,
 
     // 45 OP_AGET_WIDE vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER,
+    DF_DA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C,
 
     // 46 OP_AGET_OBJECT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C,
 
     // 47 OP_AGET_BOOLEAN vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C,
 
     // 48 OP_AGET_BYTE vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C,
 
     // 49 OP_AGET_CHAR vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C,
 
     // 4A OP_AGET_SHORT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C,
 
     // 4B OP_APUT vAA, vBB, vCC
-    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
 
     // 4C OP_APUT_WIDE vAA, vBB, vCC
-    DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER,
+    DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
 
     // 4D OP_APUT_OBJECT vAA, vBB, vCC
-    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
 
     // 4E OP_APUT_BOOLEAN vAA, vBB, vCC
-    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
 
     // 4F OP_APUT_BYTE vAA, vBB, vCC
-    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
 
     // 50 OP_APUT_CHAR vAA, vBB, vCC
-    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
 
     // 51 OP_APUT_SHORT vAA, vBB, vCC
-    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
 
     // 52 OP_IGET vA, vB, field@CCCC
-    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B,
 
     // 53 OP_IGET_WIDE vA, vB, field@CCCC
-    DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER,
+    DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B,
 
     // 54 OP_IGET_OBJECT vA, vB, field@CCCC
-    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B,
 
     // 55 OP_IGET_BOOLEAN vA, vB, field@CCCC
-    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B,
 
     // 56 OP_IGET_BYTE vA, vB, field@CCCC
-    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B,
 
     // 57 OP_IGET_CHAR vA, vB, field@CCCC
-    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B,
 
     // 58 OP_IGET_SHORT vA, vB, field@CCCC
-    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B,
 
     // 59 OP_IPUT vA, vB, field@CCCC
-    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
 
     // 5A OP_IPUT_WIDE vA, vB, field@CCCC
-    DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
+    DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
 
     // 5B OP_IPUT_OBJECT vA, vB, field@CCCC
-    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
 
     // 5C OP_IPUT_BOOLEAN vA, vB, field@CCCC
-    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
 
     // 5D OP_IPUT_BYTE vA, vB, field@CCCC
-    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
 
     // 5E OP_IPUT_CHAR vA, vB, field@CCCC
-    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
 
     // 5F OP_IPUT_SHORT vA, vB, field@CCCC
-    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
 
     // 60 OP_SGET vAA, field@BBBB
     DF_DA | DF_IS_GETTER,
@@ -324,7 +324,7 @@
     DF_DA_WIDE | DF_IS_GETTER,
 
     // 62 OP_SGET_OBJECT vAA, field@BBBB
-    DF_DA | DF_IS_GETTER,
+    DF_DA | DF_IS_GETTER | DF_CORE_A,
 
     // 63 OP_SGET_BOOLEAN vAA, field@BBBB
     DF_DA | DF_IS_GETTER,
@@ -345,7 +345,7 @@
     DF_UA_WIDE | DF_IS_SETTER,
 
     // 69 OP_SPUT_OBJECT vAA, field@BBBB
-    DF_UA | DF_IS_SETTER,
+    DF_UA | DF_IS_SETTER | DF_CORE_A,
 
     // 6A OP_SPUT_BOOLEAN vAA, field@BBBB
     DF_UA | DF_IS_SETTER,
@@ -399,16 +399,16 @@
     DF_NOP,
 
     // 7B OP_NEG_INT vA, vB
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 7C OP_NOT_INT vA, vB
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 7D OP_NEG_LONG vA, vB
-    DF_DA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // 7E OP_NOT_LONG vA, vB
-    DF_DA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // 7F OP_NEG_FLOAT vA, vB
     DF_DA | DF_UB | DF_FP_A | DF_FP_B,
@@ -417,115 +417,115 @@
     DF_DA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B,
 
     // 81 OP_INT_TO_LONG vA, vB
-    DF_DA_WIDE | DF_UB,
+    DF_DA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 82 OP_INT_TO_FLOAT vA, vB
-    DF_DA | DF_UB | DF_FP_A,
+    DF_DA | DF_UB | DF_FP_A | DF_CORE_B,
 
     // 83 OP_INT_TO_DOUBLE vA, vB
-    DF_DA_WIDE | DF_UB | DF_FP_A,
+    DF_DA_WIDE | DF_UB | DF_FP_A | DF_CORE_B,
 
     // 84 OP_LONG_TO_INT vA, vB
-    DF_DA | DF_UB_WIDE,
+    DF_DA | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // 85 OP_LONG_TO_FLOAT vA, vB
-    DF_DA | DF_UB_WIDE | DF_FP_A,
+    DF_DA | DF_UB_WIDE | DF_FP_A | DF_CORE_B,
 
     // 86 OP_LONG_TO_DOUBLE vA, vB
-    DF_DA_WIDE | DF_UB_WIDE | DF_FP_A,
+    DF_DA_WIDE | DF_UB_WIDE | DF_FP_A | DF_CORE_B,
 
     // 87 OP_FLOAT_TO_INT vA, vB
-    DF_DA | DF_UB | DF_FP_B,
+    DF_DA | DF_UB | DF_FP_B | DF_CORE_A,
 
     // 88 OP_FLOAT_TO_LONG vA, vB
-    DF_DA_WIDE | DF_UB | DF_FP_B,
+    DF_DA_WIDE | DF_UB | DF_FP_B | DF_CORE_A,
 
     // 89 OP_FLOAT_TO_DOUBLE vA, vB
     DF_DA_WIDE | DF_UB | DF_FP_A | DF_FP_B,
 
     // 8A OP_DOUBLE_TO_INT vA, vB
-    DF_DA | DF_UB_WIDE | DF_FP_B,
+    DF_DA | DF_UB_WIDE | DF_FP_B | DF_CORE_A,
 
     // 8B OP_DOUBLE_TO_LONG vA, vB
-    DF_DA_WIDE | DF_UB_WIDE | DF_FP_B,
+    DF_DA_WIDE | DF_UB_WIDE | DF_FP_B | DF_CORE_A,
 
     // 8C OP_DOUBLE_TO_FLOAT vA, vB
     DF_DA | DF_UB_WIDE | DF_FP_A | DF_FP_B,
 
     // 8D OP_INT_TO_BYTE vA, vB
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 8E OP_INT_TO_CHAR vA, vB
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 8F OP_INT_TO_SHORT vA, vB
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // 90 OP_ADD_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_IS_LINEAR,
+    DF_DA | DF_UB | DF_UC | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 91 OP_SUB_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC | DF_IS_LINEAR,
+    DF_DA | DF_UB | DF_UC | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 92 OP_MUL_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 93 OP_DIV_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 94 OP_REM_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 95 OP_AND_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 96 OP_OR_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 97 OP_XOR_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 98 OP_SHL_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 99 OP_SHR_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 9A OP_USHR_INT vAA, vBB, vCC
-    DF_DA | DF_UB | DF_UC,
+    DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 9B OP_ADD_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 9C OP_SUB_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 9D OP_MUL_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 9E OP_DIV_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // 9F OP_REM_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // A0 OP_AND_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // A1 OP_OR_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // A2 OP_XOR_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // A3 OP_SHL_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // A4 OP_SHR_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // A5 OP_USHR_LONG vAA, vBB, vCC
-    DF_DA_WIDE | DF_UB_WIDE | DF_UC,
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C,
 
     // A6 OP_ADD_FLOAT vAA, vBB, vCC
     DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C,
@@ -558,70 +558,70 @@
     DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_FP_A | DF_FP_B | DF_FP_C,
 
     // B0 OP_ADD_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B1 OP_SUB_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B2 OP_MUL_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B3 OP_DIV_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B4 OP_REM_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B5 OP_AND_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B6 OP_OR_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B7 OP_XOR_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B8 OP_SHL_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // B9 OP_SHR_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // BA OP_USHR_INT_2ADDR vA, vB
-    DF_DA | DF_UA | DF_UB,
+    DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // BB OP_ADD_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // BC OP_SUB_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // BD OP_MUL_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // BE OP_DIV_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // BF OP_REM_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // C0 OP_AND_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // C1 OP_OR_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // C2 OP_XOR_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
 
     // C3 OP_SHL_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // C4 OP_SHR_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // C5 OP_USHR_LONG_2ADDR vA, vB
-    DF_DA_WIDE | DF_UA_WIDE | DF_UB,
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // C6 OP_ADD_FLOAT_2ADDR vA, vB
     DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B,
@@ -654,67 +654,67 @@
     DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B,
 
     // D0 OP_ADD_INT_LIT16 vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D1 OP_RSUB_INT vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D2 OP_MUL_INT_LIT16 vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D3 OP_DIV_INT_LIT16 vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D4 OP_REM_INT_LIT16 vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D5 OP_AND_INT_LIT16 vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D6 OP_OR_INT_LIT16 vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D7 OP_XOR_INT_LIT16 vA, vB, #+CCCC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // D8 OP_ADD_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB | DF_IS_LINEAR,
+    DF_DA | DF_UB | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B,
 
     // D9 OP_RSUB_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // DA OP_MUL_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // DB OP_DIV_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // DC OP_REM_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // DD OP_AND_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // DE OP_OR_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // DF OP_XOR_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // E0 OP_SHL_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // E1 OP_SHR_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // E2 OP_USHR_INT_LIT8 vAA, vBB, #+CC
-    DF_DA | DF_UB,
+    DF_DA | DF_UB | DF_CORE_A | DF_CORE_B,
 
     // E3 OP_IGET_VOLATILE
-    DF_DA | DF_UB | DF_NULL_CHK_0,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_B,
 
     // E4 OP_IPUT_VOLATILE
-    DF_UA | DF_UB | DF_NULL_CHK_1,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_CORE_B,
 
     // E5 OP_SGET_VOLATILE
     DF_DA,
@@ -723,13 +723,13 @@
     DF_UA,
 
     // E7 OP_IGET_OBJECT_VOLATILE
-    DF_DA | DF_UB | DF_NULL_CHK_0,
+    DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_A | DF_CORE_B,
 
     // E8 OP_IGET_WIDE_VOLATILE
-    DF_DA_WIDE | DF_UB | DF_NULL_CHK_0,
+    DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_CORE_B,
 
     // E9 OP_IPUT_WIDE_VOLATILE
-    DF_UA_WIDE | DF_UB | DF_NULL_CHK_1,
+    DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_CORE_B,
 
     // EA OP_SGET_WIDE_VOLATILE
     DF_DA_WIDE,
@@ -786,13 +786,13 @@
     DF_FORMAT_3RC | DF_NULL_CHK_OUT0,
 
     // FC OP_IPUT_OBJECT_VOLATILE
-    DF_UA | DF_UB | DF_NULL_CHK_1,
+    DF_UA | DF_UB | DF_NULL_CHK_1 | DF_CORE_A | DF_CORE_B,
 
     // FD OP_SGET_OBJECT_VOLATILE
-    DF_DA,
+    DF_DA | DF_CORE_A,
 
     // FE OP_SPUT_OBJECT_VOLATILE
-    DF_UA,
+    DF_UA | DF_CORE_A,
 
     // FF OP_DISPATCH_FF
     DF_NOP,
diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h
index e4a3726..1696e44 100644
--- a/src/compiler/Dataflow.h
+++ b/src/compiler/Dataflow.h
@@ -47,6 +47,9 @@
     kFPA,
     kFPB,
     kFPC,
+    kCoreA,
+    kCoreB,
+    kCoreC,
     kGetter,
     kSetter,
 } DataFlowAttributes;
@@ -78,6 +81,9 @@
 #define DF_FP_A                 (1 << kFPA)
 #define DF_FP_B                 (1 << kFPB)
 #define DF_FP_C                 (1 << kFPC)
+#define DF_CORE_A               (1 << kCoreA)
+#define DF_CORE_B               (1 << kCoreB)
+#define DF_CORE_C               (1 << kCoreC)
 #define DF_IS_GETTER            (1 << kGetter)
 #define DF_IS_SETTER            (1 << kSetter)
 
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 33dbba4..015aee9 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -23,12 +23,12 @@
 
 /* Default optimizer/debug setting for the compiler. */
 uint32_t compilerOptimizerDisableFlags = 0 | // Disable specific optimizations
-     (1 << kLoadStoreElimination) |
-     (1 << kLoadHoisting) |
-     (1 << kSuppressLoads) |
-     (1 << kNullCheckElimination) |
-     (1 << kPromoteRegs) |
-     (1 << kTrackLiveTemps) |
+     //(1 << kLoadStoreElimination) |
+     //(1 << kLoadHoisting) |
+     //(1 << kSuppressLoads) |
+     //(1 << kNullCheckElimination) |
+     //(1 << kPromoteRegs) |
+     //(1 << kTrackLiveTemps) |
      0;
 
 uint32_t compilerDebugFlags = 0 |     // Enable debug/testing modes
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index aaf9b97..b4cc0b5 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -21,8 +21,25 @@
 
 STATIC bool setFp(CompilationUnit* cUnit, int index, bool isFP) {
     bool change = false;
+    if (cUnit->regLocation[index].highWord) {
+        return change;
+    }
     if (isFP && !cUnit->regLocation[index].fp) {
         cUnit->regLocation[index].fp = true;
+        cUnit->regLocation[index].defined = true;
+        change = true;
+    }
+    return change;
+}
+
+STATIC bool setCore(CompilationUnit* cUnit, int index, bool isCore) {
+    bool change = false;
+    if (cUnit->regLocation[index].highWord) {
+        return change;
+    }
+    if (isCore && !cUnit->regLocation[index].defined) {
+        cUnit->regLocation[index].core = true;
+        cUnit->regLocation[index].defined = true;
         change = true;
     }
     return change;
@@ -66,21 +83,60 @@
         SSARepresentation *ssaRep = mir->ssaRep;
         if (ssaRep) {
             int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode];
+
+            // Handle defs
+            if (attrs & (DF_DA | DF_DA_WIDE)) {
+                if (attrs & DF_CORE_A) {
+                    changed |= setCore(cUnit, ssaRep->defs[0], true);
+                }
+                if (attrs & DF_DA_WIDE) {
+                    cUnit->regLocation[ssaRep->defs[0]].wide = true;
+                    cUnit->regLocation[ssaRep->defs[1]].highWord = true;
+                    DCHECK_EQ(oatS2VReg(cUnit, ssaRep->defs[0])+1,
+                              oatS2VReg(cUnit, ssaRep->defs[1]));
+                }
+            }
+
+            // Handles uses
             int next = 0;
-            if (attrs & DF_DA_WIDE) {
-                cUnit->regLocation[ssaRep->defs[0]].wide = true;
+            if (attrs & (DF_UA | DF_UA_WIDE)) {
+                if (attrs & DF_CORE_A) {
+                    changed |= setCore(cUnit, ssaRep->uses[next], true);
+                }
+                if (attrs & DF_UA_WIDE) {
+                    cUnit->regLocation[ssaRep->uses[next]].wide = true;
+                    cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
+                    DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
+                              oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+                    next += 2;
+                } else {
+                    next++;
+                }
             }
-            if (attrs & DF_UA_WIDE) {
-                cUnit->regLocation[ssaRep->uses[next]].wide = true;
-                next += 2;
+            if (attrs & (DF_UB | DF_UB_WIDE)) {
+                if (attrs & DF_CORE_B) {
+                    changed |= setCore(cUnit, ssaRep->uses[next], true);
+                }
+                if (attrs & DF_UB_WIDE) {
+                    cUnit->regLocation[ssaRep->uses[next]].wide = true;
+                    cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
+                    DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
+                              oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+                    next += 2;
+                } else {
+                    next++;
+                }
             }
-            if (attrs & DF_UB_WIDE) {
-                cUnit->regLocation[ssaRep->uses[next]].wide = true;
-                next += 2;
-            }
-            if (attrs & DF_UC_WIDE) {
-                cUnit->regLocation[ssaRep->uses[next]].wide = true;
-                next += 2;
+            if (attrs & (DF_UC | DF_UC_WIDE)) {
+                if (attrs & DF_CORE_C) {
+                    changed |= setCore(cUnit, ssaRep->uses[next], true);
+                }
+                if (attrs & DF_UC_WIDE) {
+                    cUnit->regLocation[ssaRep->uses[next]].wide = true;
+                    cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
+                    DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
+                              oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+                }
             }
 
            // Special-case handling for format 35c/3rc invokes
@@ -97,6 +153,8 @@
                 // If this is a non-static invoke, skip implicit "this"
                 if (((mir->dalvikInsn.opcode != OP_INVOKE_STATIC) &&
                      (mir->dalvikInsn.opcode != OP_INVOKE_STATIC_RANGE))) {
+                   cUnit->regLocation[ssaRep->uses[next]].defined = true;
+                   cUnit->regLocation[ssaRep->uses[next]].core = true;
                    next++;
                 }
                 uint32_t cpos = 1;
@@ -108,16 +166,26 @@
                                 ssaRep->fpUse[i] = true;
                                 ssaRep->fpUse[i+1] = true;
                                 cUnit->regLocation[ssaRep->uses[i]].wide = true;
+                                cUnit->regLocation[ssaRep->uses[i+1]].highWord
+                                    = true;
+                                DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1,
+                                          oatS2VReg(cUnit, ssaRep->uses[i+1]));
                                 i++;
                                 break;
                             case 'J':
                                 cUnit->regLocation[ssaRep->uses[i]].wide = true;
+                                cUnit->regLocation[ssaRep->uses[i+1]].highWord
+                                    = true;
+                                DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1,
+                                          oatS2VReg(cUnit, ssaRep->uses[i+1]));
+                                changed |= setCore(cUnit, ssaRep->uses[i],true);
                                 i++;
                                break;
                             case 'F':
                                 ssaRep->fpUse[i] = true;
                                 break;
                            default:
+                                changed |= setCore(cUnit,ssaRep->uses[i], true);
                                 break;
                         }
                         i++;
@@ -135,13 +203,25 @@
             }
             // Special-case handling for moves & Phi
             if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) {
-                bool isFP = cUnit->regLocation[ssaRep->defs[0]].fp;
+                // If any of our inputs or outputs is defined, set all
+                bool definedFP = false;
+                bool definedCore = false;
+                definedFP |= (cUnit->regLocation[ssaRep->defs[0]].defined &&
+                              cUnit->regLocation[ssaRep->defs[0]].fp);
+                definedCore |= (cUnit->regLocation[ssaRep->defs[0]].defined &&
+                                cUnit->regLocation[ssaRep->defs[0]].core);
                 for (int i = 0; i < ssaRep->numUses; i++) {
-                    isFP |= cUnit->regLocation[ssaRep->uses[i]].fp;
+                    definedFP |= (cUnit->regLocation[ssaRep->uses[i]].defined &&
+                                  cUnit->regLocation[ssaRep->uses[i]].fp);
+                    definedCore |= (cUnit->regLocation[ssaRep->uses[i]].defined
+                                  && cUnit->regLocation[ssaRep->uses[i]].core);
                 }
-                changed |= setFp(cUnit, ssaRep->defs[0], isFP);
+                DCHECK(!(definedFP && definedCore));
+                changed |= setFp(cUnit, ssaRep->defs[0], definedFP);
+                changed |= setCore(cUnit, ssaRep->defs[0], definedCore);
                 for (int i = 0; i < ssaRep->numUses; i++) {
-                    changed |= setFp(cUnit, ssaRep->uses[i], isFP);
+                    changed |= setFp(cUnit, ssaRep->uses[i], definedFP);
+                    changed |= setCore(cUnit, ssaRep->uses[i], definedCore);
                 }
             }
         }
@@ -155,20 +235,19 @@
 {
     for (int i = 0; i < count; i++) {
         char buf[100];
-        snprintf(buf, 100, "Loc[%02d] : %s, %c %c r%d r%d S%d : %s s%d s%d",
+        snprintf(buf, 100, "Loc[%02d] : %s, %c %c %c %c %c %c%d %c%d S%d",
              i, storageName[table[i].location], table[i].wide ? 'W' : 'N',
-             table[i].fp ? 'F' : 'C', table[i].lowReg, table[i].highReg,
-             table[i].sRegLow, storageName[table[i].fpLocation],
-             table[i].fpLowReg & FP_REG_MASK, table[i].fpHighReg &
-             FP_REG_MASK);
+             table[i].defined ? 'D' : 'U', table[i].fp ? 'F' : 'C',
+             table[i].highWord ? 'H' : 'L', table[i].home ? 'h' : 't',
+             FPREG(table[i].lowReg) ? 's' : 'r', table[i].lowReg & FP_REG_MASK,
+             FPREG(table[i].highReg) ? 's' : 'r', table[i].highReg & FP_REG_MASK,
+             table[i].sRegLow);
         LOG(INFO) << buf;
     }
 }
 
-static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, INVALID_REG,
-                                     INVALID_REG, INVALID_SREG, 0,
-                                     kLocDalvikFrame, INVALID_REG, INVALID_REG,
-                                     INVALID_OFFSET};
+static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0,
+                                     INVALID_REG, INVALID_REG, INVALID_SREG};
 
 /*
  * Simple register allocation.  Some Dalvik virtual registers may
@@ -189,6 +268,10 @@
     }
     cUnit->regLocation = loc;
 
+    /* Allocation the promotion map */
+    cUnit->promotionMap = (PromotionMap*)oatNew( cUnit->method->NumRegisters()
+                           * sizeof(cUnit->promotionMap[0]), true);
+
     /* Add types of incoming arguments based on signature */
     int numRegs = cUnit->method->NumRegisters();
     int numIns = cUnit->method->NumIns();
@@ -196,16 +279,39 @@
         int sReg = numRegs - numIns;
         if (!cUnit->method->IsStatic()) {
             // Skip past "this"
+            cUnit->regLocation[sReg].defined = true;
+            cUnit->regLocation[sReg].core = true;
             sReg++;
         }
         const String* shorty = cUnit->method->GetShorty();
         for (int i = 1; i < shorty->GetLength(); i++) {
-            char arg = shorty->CharAt(i);
-            // Is it wide?
-            if ((arg == 'D') || (arg == 'J')) {
-                cUnit->regLocation[sReg].wide = true;
-                cUnit->regLocation[sReg+1].fp = cUnit->regLocation[sReg].fp;
-                sReg++;  // Skip to next
+            switch(shorty->CharAt(i)) {
+                case 'D':
+                    cUnit->regLocation[sReg].wide = true;
+                    cUnit->regLocation[sReg+1].highWord = true;
+                    DCHECK_EQ(oatS2VReg(cUnit, sReg)+1,
+                              oatS2VReg(cUnit, sReg+1));
+                    cUnit->regLocation[sReg].fp = true;
+                    cUnit->regLocation[sReg].defined = true;
+                    sReg++;
+                    break;
+                case 'J':
+                    cUnit->regLocation[sReg].wide = true;
+                    cUnit->regLocation[sReg+1].highWord = true;
+                    DCHECK_EQ(oatS2VReg(cUnit, sReg)+1,
+                              oatS2VReg(cUnit, sReg+1));
+                    cUnit->regLocation[sReg].core = true;
+                    cUnit->regLocation[sReg].defined = true;
+                    sReg++;
+                    break;
+                case 'F':
+                    cUnit->regLocation[sReg].fp = true;
+                    cUnit->regLocation[sReg].defined = true;
+                    break;
+                default:
+                    cUnit->regLocation[sReg].core = true;
+                    cUnit->regLocation[sReg].defined = true;
+                    break;
             }
             sReg++;
         }
@@ -254,10 +360,4 @@
                         cUnit->numPadding + 2) * 4;
     cUnit->insOffset = cUnit->frameSize + 4;
     cUnit->regsOffset = (cUnit->numOuts + cUnit->numPadding + 1) * 4;
-
-    /* Compute sp-relative home location offsets */
-    for (i = 0; i < cUnit->numSSARegs; i++) {
-        int vReg = oatS2VReg(cUnit, cUnit->regLocation[i].sRegLow);
-        cUnit->regLocation[i].spOffset = oatVRegOffset(cUnit, vReg);
-    }
 }
diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc
index 0fc8a80..e3c20ec 100644
--- a/src/compiler/Utility.cc
+++ b/src/compiler/Utility.cc
@@ -61,6 +61,7 @@
          */
         if (currentArena->next) {
             currentArena = currentArena->next;
+            currentArena->bytesAllocated = 0;
             goto retry;
         }
 
@@ -88,12 +89,10 @@
 /* Reclaim all the arena blocks allocated so far */
 void oatArenaReset(void)
 {
-    ArenaMemBlock *block;
-
-    for (block = arenaHead; block; block = block->next) {
-        block->bytesAllocated = 0;
-    }
     currentArena = arenaHead;
+    if (currentArena) {
+        currentArena->bytesAllocated = 0;
+    }
 }
 
 /* Growable List initialization */
@@ -201,6 +200,15 @@
     oatArchDump();
 }
 
+static uint32_t checkMasks[32] = {
+    0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010,
+    0x00000020, 0x00000040, 0x00000080, 0x00000100, 0x00000200,
+    0x00000400, 0x00000800, 0x00001000, 0x00002000, 0x00004000,
+    0x00008000, 0x00010000, 0x00020000, 0x00040000, 0x00080000,
+    0x00100000, 0x00200000, 0x00400000, 0x00800000, 0x01000000,
+    0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+    0x40000000, 0x80000000 };
+
 /*
  * Allocate a bit vector with enough space to hold at least the specified
  * number of bits.
@@ -231,7 +239,7 @@
 {
     DCHECK_LT(num, pBits->storageSize * sizeof(u4) * 8);
 
-    unsigned int val = pBits->storage[num >> 5] & (1 << (num & 0x1f));
+    unsigned int val = pBits->storage[num >> 5] & checkMasks[num & 0x1f];
     return (val != 0);
 }
 
@@ -270,7 +278,7 @@
         pBits->storageSize = newSize;
     }
 
-    pBits->storage[num >> 5] |= 1 << (num & 0x1f);
+    pBits->storage[num >> 5] |= checkMasks[num & 0x1f];
     return true;
 }
 
@@ -288,7 +296,7 @@
         LOG(FATAL) << "Attempt to clear a bit not set in the vector yet";;
     }
 
-    pBits->storage[num >> 5] &= ~(1 << (num & 0x1f));
+    pBits->storage[num >> 5] &= ~checkMasks[num & 0x1f];
     return true;
 }
 
@@ -462,13 +470,20 @@
     DCHECK_EQ(iterator->bitSize, pBits->storageSize * sizeof(u4) * 8);
     if (bitIndex >= iterator->bitSize) return -1;
 
-    for (; bitIndex < iterator->bitSize; bitIndex++) {
+    for (; bitIndex < iterator->bitSize;) {
         unsigned int wordIndex = bitIndex >> 5;
-        unsigned int mask = 1 << (bitIndex & 0x1f);
-        if (pBits->storage[wordIndex] & mask) {
+        unsigned int bitPos = bitIndex & 0x1f;
+        unsigned int word = pBits->storage[wordIndex];
+        if (word & checkMasks[bitPos]) {
             iterator->idx = bitIndex+1;
             return bitIndex;
         }
+        if (word == 0) {
+            // Helps if this is a sparse vector
+            bitIndex += (32 - bitPos);
+        } else {
+            bitIndex++;
+        }
     }
     /* No more set bits */
     return -1;
diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc
index 55ed8af..5dbcd97 100644
--- a/src/compiler/codegen/CodegenFactory.cc
+++ b/src/compiler/codegen/CodegenFactory.cc
@@ -58,7 +58,7 @@
         genRegCopy(cUnit, reg1, rlSrc.lowReg);
     } else {
         DCHECK(rlSrc.location == kLocDalvikFrame);
-        loadWordDisp(cUnit, rSP, rlSrc.spOffset, reg1);
+        loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), reg1);
     }
 }
 
@@ -88,7 +88,8 @@
         genRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg);
     } else {
         DCHECK(rlSrc.location == kLocDalvikFrame);
-        loadBaseDispWide(cUnit, NULL, rSP, rlSrc.spOffset,
+        loadBaseDispWide(cUnit, NULL, rSP,
+                         oatSRegOffset(cUnit, rlSrc.sRegLow),
                          regLo, regHi, INVALID_SREG);
     }
 }
@@ -156,7 +157,8 @@
     if (oatIsDirty(cUnit, rlDest.lowReg) &&
         oatLiveOut(cUnit, rlDest.sRegLow)) {
         defStart = (LIR* )cUnit->lastLIRInsn;
-        storeBaseDisp(cUnit, rSP, rlDest.spOffset, rlDest.lowReg, kWord);
+        storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow),
+                      rlDest.lowReg, kWord);
         oatMarkClean(cUnit, rlDest);
         defEnd = (LIR* )cUnit->lastLIRInsn;
         oatMarkDef(cUnit, rlDest, defStart, defEnd);
@@ -183,10 +185,6 @@
 {
     LIR* defStart;
     LIR* defEnd;
-    if (FPREG(rlSrc.lowReg)!=FPREG(rlSrc.highReg)) {
-        LOG(WARNING) << "rlSrc.lowreg:" << rlSrc.lowReg << ", rlSrc.highReg:"
-                     << rlSrc.highReg;
-    }
     DCHECK_EQ(FPREG(rlSrc.lowReg), FPREG(rlSrc.highReg));
     DCHECK(rlDest.wide);
     DCHECK(rlSrc.wide);
@@ -230,7 +228,7 @@
         defStart = (LIR*)cUnit->lastLIRInsn;
         DCHECK_EQ((oatS2VReg(cUnit, rlDest.sRegLow)+1),
                 oatS2VReg(cUnit, oatSRegHi(rlDest.sRegLow)));
-        storeBaseDispWide(cUnit, rSP, rlDest.spOffset,
+        storeBaseDispWide(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow),
                           rlDest.lowReg, rlDest.highReg);
         oatMarkClean(cUnit, rlDest);
         defEnd = (LIR*)cUnit->lastLIRInsn;
diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h
index 58ab1d3..d2e5f0a 100644
--- a/src/compiler/codegen/CompilerCodegen.h
+++ b/src/compiler/codegen/CompilerCodegen.h
@@ -27,6 +27,8 @@
 
 /* Implemented in the codegen/<target>/ArchUtility.c */
 void oatCodegenDump(CompilationUnit* cUnit);
+void oatDumpPromotionMap(CompilationUnit* cUnit);
+void oatDumpFullPromotionMap(CompilationUnit* cUnit);
 
 /* Implemented in codegen/<target>/Ralloc.c */
 void oatSimpleRegAlloc(CompilationUnit* cUnit);
diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h
index e343ec5..fee0e9a 100644
--- a/src/compiler/codegen/Ralloc.h
+++ b/src/compiler/codegen/Ralloc.h
@@ -232,6 +232,7 @@
 
 extern void oatDoPromotion(CompilationUnit* cUnit);
 extern int oatVRegOffset(CompilationUnit* cUnit, int reg);
+extern int oatSRegOffset(CompilationUnit* cUnit, int reg);
 extern void oatDumpCoreRegPool(CompilationUnit* cUint);
 extern void oatDumpFPRegPool(CompilationUnit* cUint);
 extern bool oatCheckCorePoolSanity(CompilationUnit* cUnit);
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index 7fd062d..1b0fb90 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -186,9 +186,10 @@
             cUnit->coreSpillMask |= (1 << res);
             cUnit->coreVmapTable.push_back(sReg);
             cUnit->numCoreSpills++;
-            cUnit->regLocation[sReg].location = kLocPhysReg;
-            cUnit->regLocation[sReg].lowReg = res;
-            cUnit->regLocation[sReg].home = true;
+            //  Should be promoting based on initial sReg set
+            DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+            cUnit->promotionMap[sReg].coreLocation = kLocPhysReg;
+            cUnit->promotionMap[sReg].coreReg = res;
             break;
         }
     }
@@ -231,10 +232,11 @@
             ((FPRegs[i].reg & 0x1) == 0) == even) {
             res = FPRegs[i].reg;
             FPRegs[i].inUse = true;
+            //  Should be promoting based on initial sReg set
+            DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
             markPreservedSingle(cUnit, sReg, res);
-            cUnit->regLocation[sReg].fpLocation = kLocPhysReg;
-            cUnit->regLocation[sReg].fpLowReg = res;
-            cUnit->regLocation[sReg].home = true;
+            cUnit->promotionMap[sReg].fpLocation = kLocPhysReg;
+            cUnit->promotionMap[sReg].fpReg = res;
             break;
         }
     }
@@ -252,9 +254,11 @@
 STATIC int allocPreservedDouble(CompilationUnit* cUnit, int sReg)
 {
     int res = -1; // Assume failure
-    if (cUnit->regLocation[sReg+1].fpLocation == kLocPhysReg) {
+    //  Should be promoting based on initial sReg set
+    DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+    if (cUnit->promotionMap[sReg+1].fpLocation == kLocPhysReg) {
         // Upper reg is already allocated.  Can we fit?
-        int highReg = cUnit->regLocation[sReg+1].fpLowReg;
+        int highReg = cUnit->promotionMap[sReg+1].fpReg;
         if ((highReg & 1) == 0) {
             // High reg is even - fail.
             return res;
@@ -289,12 +293,10 @@
         }
     }
     if (res != -1) {
-        cUnit->regLocation[sReg].fpLocation = kLocPhysReg;
-        cUnit->regLocation[sReg].fpLowReg = res;
-        cUnit->regLocation[sReg].home = true;
-        cUnit->regLocation[sReg+1].fpLocation = kLocPhysReg;
-        cUnit->regLocation[sReg+1].fpLowReg = res + 1;
-        cUnit->regLocation[sReg+1].home = true;
+        cUnit->promotionMap[sReg].fpLocation = kLocPhysReg;
+        cUnit->promotionMap[sReg].fpReg = res;
+        cUnit->promotionMap[sReg+1].fpLocation = kLocPhysReg;
+        cUnit->promotionMap[sReg+1].fpReg = res + 1;
     }
     return res;
 }
@@ -312,7 +314,6 @@
     int res = -1;
     if (doubleStart) {
         res = allocPreservedDouble(cUnit, sReg);
-    } else {
     }
     if (res == -1) {
         res = allocPreservedSingle(cUnit, sReg, false /* try odd # */);
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index aef98fa..c4d3b6d 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -16,6 +16,7 @@
 
 #include "../../CompilerInternals.h"
 #include "ArmLIR.h"
+#include "../Ralloc.h"
 
 static const char* coreRegNames[16] = {
     "r0",
@@ -391,6 +392,38 @@
     }
 }
 
+void oatDumpPromotionMap(CompilationUnit *cUnit)
+{
+    const Method *method = cUnit->method;
+    for (int i = 0; i < method->NumRegisters(); i++) {
+        PromotionMap vRegMap = cUnit->promotionMap[i];
+        char buf[100];
+        if (vRegMap.fpLocation == kLocPhysReg) {
+            snprintf(buf, 100, " : s%d", vRegMap.fpReg & FP_REG_MASK);
+        } else {
+            buf[0] = 0;
+        }
+        char buf2[100];
+        snprintf(buf2, 100, "V[%02d] -> %s%d%s", i,
+                 vRegMap.coreLocation == kLocPhysReg ?
+                 "r" : "SP+", vRegMap.coreLocation == kLocPhysReg ?
+                 vRegMap.coreReg : oatSRegOffset(cUnit, i), buf);
+        LOG(INFO) << buf2;
+    }
+}
+
+void oatDumpFullPromotionMap(CompilationUnit *cUnit)
+{
+    const Method *method = cUnit->method;
+    for (int i = 0; i < method->NumRegisters(); i++) {
+        PromotionMap vRegMap = cUnit->promotionMap[i];
+        LOG(INFO) << i << " -> " << "CL:" << (int)vRegMap.coreLocation <<
+            ", CR:" << (int)vRegMap.coreReg << ", FL:" <<
+            (int)vRegMap.fpLocation << ", FR:" << (int)vRegMap.fpReg <<
+            ", - " << (int)vRegMap.firstInPair;
+    }
+}
+
 /* Dump instructions and constant pool contents */
 void oatCodegenDump(CompilationUnit* cUnit)
 {
@@ -414,22 +447,7 @@
         " bytes, Dalvik size is " << insnsSize * 2;
     LOG(INFO) << "expansion factor: " <<
          (float)cUnit->totalSize / (float)(insnsSize * 2);
-    for (int i = 0; i < method->NumRegisters(); i++) {
-        RegLocation loc = cUnit->regLocation[i];
-        char buf[100];
-        if (loc.fpLocation == kLocPhysReg) {
-            snprintf(buf, 100, " : s%d", loc.fpLowReg & FP_REG_MASK);
-        } else {
-            buf[0] = 0;
-        }
-        char buf2[100];
-        snprintf(buf2, 100, "V[%02d] -> %s%d%s", i,
-                 loc.location == kLocPhysReg ?
-                 "r" : "SP+", loc.location == kLocPhysReg ?
-                 loc.lowReg : loc.spOffset, buf);
-        LOG(INFO) << buf2;
-
-    }
+    oatDumpPromotionMap(cUnit);
     for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) {
         oatDumpLIRInsn(cUnit, lirInsn, 0);
     }
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 1e4022e..729e708 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -123,16 +123,13 @@
 #define rNone   (-1)
 
 /* RegisterLocation templates return values (r0, or r0/r1) */
-#define LOC_C_RETURN {kLocPhysReg, 0, 0, r0, INVALID_REG, INVALID_SREG, \
-                      1, kLocPhysReg, r0, INVALID_REG, INVALID_OFFSET}
-#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, r0, r1, INVALID_SREG, \
-                      1, kLocPhysReg, r0, r1, INVALID_OFFSET}
+#define LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, INVALID_SREG}
+#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}
 /* RegisterLocation templates for interpState->retVal; */
-#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, r0, INVALID_REG, \
-                      INVALID_SREG, 1, kLocPhysReg, r0, INVALID_REG, \
-                      INVALID_OFFSET}
-#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, r0, r1, \
-                      INVALID_SREG, 1, kLocPhysReg, r0, r1, INVALID_OFFSET}
+#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \
+                      INVALID_SREG}
+#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, \
+                      INVALID_SREG}
 
  /*
  * Data structure tracking the mapping between a Dalvik register (pair) and a
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index ed8a5b2..4af3d07 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -37,7 +37,7 @@
 
 /* USE SSA names to count references of base Dalvik vRegs. */
 STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb,
-                      RefCounts* counts, bool fp)
+                      RefCounts* coreCounts, RefCounts* fpCounts)
 {
     MIR* mir;
     if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
@@ -47,59 +47,42 @@
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         SSARepresentation *ssaRep = mir->ssaRep;
         if (ssaRep) {
-            int i;
-            int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode];
-            if (fp) {
-                // Mark 1st reg of double pairs
-                int first = 0;
-                int sReg;
-                if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0]));
-                    counts[sReg].doubleStart = true;
+            for (int i = 0; i < ssaRep->numDefs;) {
+                RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
+                RefCounts* counts = loc.fp ? fpCounts : coreCounts;
+                int vReg = oatS2VReg(cUnit, ssaRep->defs[i]);
+                if (loc.defined) {
+                    counts[vReg].count++;
                 }
-                if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
-                    counts[sReg].doubleStart = true;
-                }
-                if (attrs & DF_UA_WIDE) {
-                    first += 2;
-                }
-                if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
-                    counts[sReg].doubleStart = true;
-                }
-                if (attrs & DF_UB_WIDE) {
-                    first += 2;
-                }
-                if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
-                    counts[sReg].doubleStart = true;
+                if (loc.wide) {
+                    if (loc.defined) {
+                        if (loc.fp) {
+                            counts[vReg].doubleStart = true;
+                        }
+                        counts[vReg+1].count++;
+                    }
+                    i += 2;
+                } else {
+                    i++;
                 }
             }
-            for (i=0; i< ssaRep->numUses; i++) {
-                int origSreg = DECODE_REG(
-                    oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]));
-                DCHECK_LT(origSreg, cUnit->method->NumRegisters());
-                bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false;
-                if (fp == fpUse) {
-                    counts[origSreg].count++;
+            for (int i = 0; i < ssaRep->numUses;) {
+                RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
+                RefCounts* counts = loc.fp ? fpCounts : coreCounts;
+                int vReg = oatS2VReg(cUnit, ssaRep->uses[i]);
+                if (loc.defined) {
+                    counts[vReg].count++;
                 }
-            }
-            for (i=0; i< ssaRep->numDefs; i++) {
-                if (attrs & DF_SETS_CONST) {
-                    // CONST opcodes are untyped - don't pollute the counts
-                    continue;
-                }
-                int origSreg = DECODE_REG(
-                    oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]));
-                DCHECK_LT(origSreg, cUnit->method->NumRegisters());
-                bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false;
-                if (fp == fpDef) {
-                    counts[origSreg].count++;
+                if (loc.wide) {
+                    if (loc.defined) {
+                        if (loc.fp) {
+                            counts[vReg].doubleStart = true;
+                        }
+                        counts[vReg+1].count++;
+                    }
+                    i += 2;
+                } else {
+                    i++;
                 }
             }
         }
@@ -159,8 +142,7 @@
         BasicBlock* bb;
         bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator);
         if (bb == NULL) break;
-        countRefs(cUnit, bb, coreRegs, false);
-        countRefs(cUnit, bb, fpRegs, true);
+        countRefs(cUnit, bb, coreRegs, fpRegs);
     }
 
     /*
@@ -178,21 +160,27 @@
     qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts);
     qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts);
 
+    if (cUnit->printMe) {
+        dumpCounts(coreRegs, numRegs, "Core regs after sort");
+        dumpCounts(fpRegs, numRegs, "Fp regs after sort");
+    }
+
     if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
         // Promote fpRegs
         for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
-            if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
+            if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
                 int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
                     fpRegs[i].doubleStart);
                 if (reg < 0) {
-                   break;  // No more left
+                    break;  // No more left
                 }
             }
         }
 
         // Promote core regs
         for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
-            if (cUnit->regLocation[i].location != kLocPhysReg) {
+            if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation !=
+                    kLocPhysReg) {
                 int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
                 if (reg < 0) {
                    break;  // No more left
@@ -203,58 +191,69 @@
 
     // Now, update SSA names to new home locations
     for (int i = 0; i < cUnit->numSSARegs; i++) {
-        int baseSreg = cUnit->regLocation[i].sRegLow;
-        RegLocation *base = &cUnit->regLocation[baseSreg];
-        RegLocation *baseNext = &cUnit->regLocation[baseSreg+1];
         RegLocation *curr = &cUnit->regLocation[i];
-        if (curr->fp) {
-            /* Single or double, check fpLocation of base */
-            if (base->fpLocation == kLocPhysReg) {
-                if (curr->wide) {
-                    /* TUNING: consider alignment during allocation */
-                    if ((base->fpLowReg & 1) ||
-                        (baseNext->fpLocation != kLocPhysReg)) {
-                        /* Half-promoted or bad alignment - demote */
-                        curr->location = kLocDalvikFrame;
-                        curr->lowReg = INVALID_REG;
-                        curr->highReg = INVALID_REG;
-                        continue;
-                    }
-                    curr->highReg = baseNext->fpLowReg;
+        int baseVReg = oatS2VReg(cUnit, curr->sRegLow);
+        if (!curr->wide) {
+            if (curr->fp) {
+                if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) {
+                    curr->location = kLocPhysReg;
+                    curr->lowReg = cUnit->promotionMap[baseVReg].fpReg;
+                    curr->home = true;
                 }
-                curr->location = kLocPhysReg;
-                curr->lowReg = base->fpLowReg;
-                curr->home = true;
+            } else {
+                if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) {
+                    curr->location = kLocPhysReg;
+                    curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
+                    curr->home = true;
+                }
             }
+            curr->highReg = INVALID_REG;
         } else {
-            /* Core or wide */
-            if (base->location == kLocPhysReg) {
-                if (curr->wide) {
-                    /* Make sure upper half is also in reg or skip */
-                    if (baseNext->location != kLocPhysReg) {
-                        /* Only half promoted; demote to frame */
-                        curr->location = kLocDalvikFrame;
-                        curr->lowReg = INVALID_REG;
-                        curr->highReg = INVALID_REG;
-                        continue;
+            if (curr->highWord) {
+                continue;
+            }
+            if (curr->fp) {
+                if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) &&
+                    (cUnit->promotionMap[baseVReg+1].fpLocation ==
+                    kLocPhysReg)) {
+                    int lowReg = cUnit->promotionMap[baseVReg].fpReg;
+                    int highReg = cUnit->promotionMap[baseVReg+1].fpReg;
+                    // Doubles require pair of singles starting at even reg
+                    if (((lowReg & 0x1) == 0) && ((lowReg + 1) == highReg)) {
+                        curr->location = kLocPhysReg;
+                        curr->lowReg = lowReg;
+                        curr->highReg = highReg;
+                        curr->home = true;
                     }
-                    curr->highReg = baseNext->lowReg;
                 }
-                curr->location = kLocPhysReg;
-                curr->lowReg = base->lowReg;
-                curr->home = true;
+            } else {
+                if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg)
+                     && (cUnit->promotionMap[baseVReg+1].coreLocation ==
+                     kLocPhysReg)) {
+                    curr->location = kLocPhysReg;
+                    curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
+                    curr->highReg = cUnit->promotionMap[baseVReg+1].coreReg;
+                    curr->home = true;
+                }
             }
         }
     }
 }
 
-/* Returns sp-relative offset in bytes */
-extern int oatVRegOffset(CompilationUnit* cUnit, int reg)
+/* Returns sp-relative offset in bytes for a VReg */
+extern int oatVRegOffset(CompilationUnit* cUnit, int vReg)
 {
-    return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) :
-            cUnit->insOffset + ((reg - cUnit->numRegs) << 2);
+    return (vReg < cUnit->numRegs) ? cUnit->regsOffset + (vReg << 2) :
+            cUnit->insOffset + ((vReg - cUnit->numRegs) << 2);
 }
 
+/* Returns sp-relative offset in bytes for a SReg */
+extern int oatSRegOffset(CompilationUnit* cUnit, int sReg)
+{
+    return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg));
+}
+
+
 /* Return sp-relative offset in bytes using Method* */
 extern int oatVRegOffsetFromMethod(Method* method, int reg)
 {
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 6331254..4a65771 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -17,10 +17,8 @@
 #define DISPLAY_MISSING_TARGETS (cUnit->enableDebug & \
     (1 << kDebugDisplayMissingTargets))
 
-STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, INVALID_REG,
-                                   INVALID_REG, INVALID_SREG, 0,
-                                   kLocDalvikFrame, INVALID_REG, INVALID_REG,
-                                   INVALID_OFFSET};
+STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, INVALID_REG,
+                                   INVALID_REG, INVALID_SREG};
 
 /* Mark register usage state and return long retloc */
 STATIC RegLocation getRetLocWide(CompilationUnit* cUnit)
@@ -99,7 +97,8 @@
             RegLocation loc = oatUpdateLoc(cUnit,
                 oatGetSrc(cUnit, mir, i));
             if (loc.location == kLocPhysReg) {
-                storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord);
+                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
+                              loc.lowReg, kWord);
             }
         }
         /*
@@ -113,7 +112,8 @@
         int rVal = rLR;  // Using a lot of temps, rLR is known free here
         // Set up source pointer
         RegLocation rlFirst = oatGetSrc(cUnit, mir, 0);
-        opRegRegImm(cUnit, kOpAdd, rSrc, rSP, rlFirst.spOffset);
+        opRegRegImm(cUnit, kOpAdd, rSrc, rSP,
+                    oatSRegOffset(cUnit, rlFirst.sRegLow));
         // Set up the target pointer
         opRegRegImm(cUnit, kOpAdd, rDst, r0,
                     Array::DataOffset().Int32Value());
@@ -773,7 +773,8 @@
             } else {
                 // r2 & r3 can safely be used here
                 reg = r3;
-                loadWordDisp(cUnit, rSP, rlArg.spOffset + 4, reg);
+                loadWordDisp(cUnit, rSP,
+                             oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
                 callState = nextCallInsn(cUnit, mir, dInsn, callState,
                                          rollback);
             }
@@ -872,20 +873,23 @@
         if (loc.wide) {
             loc = oatUpdateLocWide(cUnit, loc);
             if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
-                storeBaseDispWide(cUnit, rSP, loc.spOffset, loc.lowReg,
-                                  loc.highReg);
+                storeBaseDispWide(cUnit, rSP,
+                                  oatSRegOffset(cUnit, loc.sRegLow),
+                                  loc.lowReg, loc.highReg);
             }
             nextArg += 2;
         } else {
             loc = oatUpdateLoc(cUnit, loc);
             if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
-                storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord);
+                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
+                              loc.lowReg, kWord);
             }
             nextArg++;
         }
     }
 
-    int startOffset = cUnit->regLocation[mir->ssaRep->uses[3]].spOffset;
+    int startOffset = oatSRegOffset(cUnit,
+        cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
     int outsOffset = 4 /* Method* */ + (3 * 4);
     if (numArgs >= 20) {
         // Generate memcpy
@@ -1790,63 +1794,44 @@
     }
 }
 
-/* If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.
- * Note: at this pointCopy any ins that are passed in register to their
- * home location */
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform intial
+ * assignment of promoted arguments.
+ */
 STATIC void flushIns(CompilationUnit* cUnit)
 {
     if (cUnit->method->NumIns() == 0)
         return;
-    int inRegs = (cUnit->method->NumIns() > 2) ? 3
-                                               : cUnit->method->NumIns();
-    int startReg = r1;
-    int startLoc = cUnit->method->NumRegisters() -
+    int firstArgReg = r1;
+    int lastArgReg = r3;
+    int startVReg = cUnit->method->NumRegisters() -
         cUnit->method->NumIns();
-    for (int i = 0; i < inRegs; i++) {
-        RegLocation loc = cUnit->regLocation[startLoc + i];
-        //TUNING: be smarter about flushing ins to frame
-        storeBaseDisp(cUnit, rSP, loc.spOffset, startReg + i, kWord);
-        if (loc.location == kLocPhysReg) {
-            genRegCopy(cUnit, loc.lowReg, startReg + i);
-        }
-    }
-
-    // Handle special case of wide argument half in regs, half in frame
-    if (inRegs == 3) {
-        RegLocation loc = cUnit->regLocation[startLoc + 2];
-        if (loc.wide && loc.location == kLocPhysReg) {
-            // Load the other half of the arg into the promoted pair
-            loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg);
-            inRegs++;
-        }
-    }
-
-    // Now, do initial assignment of all promoted arguments passed in frame
-    for (int i = inRegs; i < cUnit->method->NumIns();) {
-        RegLocation loc = cUnit->regLocation[startLoc + i];
-        if (loc.fpLocation == kLocPhysReg) {
-            loc.location = kLocPhysReg;
-            loc.fp = true;
-            loc.lowReg = loc.fpLowReg;
-            loc.highReg = loc.fpHighReg;
-        }
-        if (loc.location == kLocPhysReg) {
-            if (loc.wide) {
-                if (loc.fp && (loc.lowReg & 1) != 0) {
-                    // Misaligned - need to load as a pair of singles
-                    loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg);
-                    loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg);
-                } else {
-                    loadBaseDispWide(cUnit, NULL, rSP, loc.spOffset,
-                                     loc.lowReg, loc.highReg, INVALID_SREG);
-                }
-                i++;
-            } else {
-                loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg);
+    for (int i = 0; i < cUnit->method->NumIns(); i++) {
+        PromotionMap vMap = cUnit->promotionMap[startVReg + i];
+        // For arguments only, should have at most one promotion kind
+        DCHECK(!((vMap.coreLocation == kLocPhysReg) &&
+                 (vMap.fpLocation == kLocPhysReg)));
+        if (i <= (lastArgReg - firstArgReg)) {
+            // If arriving in register, copy or flush
+            if (vMap.coreLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
+            } else if (vMap.fpLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
+            }
+            // Also put a copy in memory in case we're partially promoted
+            storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                          firstArgReg + i, kWord);
+        } else {
+            // If arriving in frame, initialize promoted target regs
+            if (vMap.coreLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.coreReg);
+            } else if (vMap.fpLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.fpReg);
             }
         }
-        i++;
     }
 }