Register promotion support for 64-bit targets

Not sufficiently tested for 64-bit targets, but should be
fairly close.

A significant amount of refactoring could stil be done, (in
later CLs).

With this change we are not making any changes to the vmap
scheme.  As a result, it is a requirement that if a vreg
is promoted to both a 32-bit view and the low half of a
64-bit view it must share the same physical register.  We
may change this restriction later on to allow for more flexibility
for 32-bit Arm.

For example, if v4, v5, v4/v5 and v5/v6 are all hot enough to
promote, we'd end up with something like:

v4 (as an int)    -> r10
v4/v5 (as a long) -> r10
v5 (as an int)    -> r11
v5/v6 (as a long) -> r11

Fix a couple of ARM64 bugs on the way...

Change-Id: I6a152b9c164d9f1a053622266e165428045362f3
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 70dce7f..a9d5893 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -67,7 +67,6 @@
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
-    RegStorage AllocPreservedDouble(int s_reg);
 
     // Required for target - miscellaneous.
     void AssembleLIR();
@@ -196,6 +195,8 @@
     bool InexpensiveConstantFloat(int32_t value);
     bool InexpensiveConstantLong(int64_t value);
     bool InexpensiveConstantDouble(int64_t value);
+    RegStorage AllocPreservedDouble(int s_reg);
+    RegStorage AllocPreservedSingle(int s_reg);
 
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index e34d944..6f0ac1a 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -19,6 +19,7 @@
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index e1e2d5b..ef94bbc 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -771,7 +771,7 @@
   int p_map_idx = SRegToPMap(s_reg);
   if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
     // Upper reg is already allocated.  Can we fit?
-    int high_reg = promotion_map_[p_map_idx+1].FpReg;
+    int high_reg = promotion_map_[p_map_idx+1].fp_reg;
     if ((high_reg & 1) == 0) {
       // High reg is even - fail.
       return res;  // Invalid.
@@ -805,13 +805,32 @@
   if (res.Valid()) {
     RegisterInfo* info = GetRegInfo(res);
     promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx].FpReg =
+    promotion_map_[p_map_idx].fp_reg =
         info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg().GetReg();
     promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx+1].FpReg =
+    promotion_map_[p_map_idx+1].fp_reg =
         info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg().GetReg();
   }
   return res;
 }
 
+// Reserve a callee-save sp single register.
+RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
+  RegStorage res;
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if (!info->IsTemp() && !info->InUse()) {
+      res = info->GetReg();
+      int p_map_idx = SRegToPMap(s_reg);
+      int v_reg = mir_graph_->SRegToVReg(s_reg);
+      GetRegInfo(res)->MarkInUse();
+      MarkPreservedSingle(v_reg, res);
+      promotion_map_[p_map_idx].fp_location = kLocPhysReg;
+      promotion_map_[p_map_idx].fp_reg = res.GetReg();
+      break;
+    }
+  }
+  return res;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index bc8f95b..2d5e291 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -17,6 +17,7 @@
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 3e0b3cf..56dcbe5 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -445,17 +445,59 @@
 
   NewLIR0(kPseudoMethodExit);
 
-  /* Need to restore any FP callee saves? */
-  if (fp_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
-    UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
-  }
-  if (core_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
-    UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
+  // Restore saves and drop stack frame.
+  // 2 versions:
+  //
+  // 1. (Original): Try to address directly, then drop the whole frame.
+  //                Limitation: ldp is a 7b signed immediate. There should have been a DCHECK!
+  //
+  // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
+  //           in range. Then drop the rest.
+  //
+  // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
+  //       in variant 1.
+
+  if (frame_size_ <= 504) {
+    // "Magic" constant, 63 (max signed 7b) * 8. Do variant 1.
+    // Could be tighter, as the last load is below frame_size_ offset.
+    if (fp_spill_mask_) {
+      int spill_offset = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+      UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
+    }
+    if (core_spill_mask_) {
+      int spill_offset = frame_size_ - kArm64PointerSize * num_core_spills_;
+      UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
+    }
+
+    OpRegImm64(kOpAdd, rs_sp, frame_size_);
+  } else {
+    // Second variant. Drop the frame part.
+    int drop = 0;
+    // TODO: Always use the first formula, as num_fp_spills would be zero?
+    if (fp_spill_mask_) {
+      drop = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+    } else {
+      drop = frame_size_ - kArm64PointerSize * num_core_spills_;
+    }
+
+    // Drop needs to be 16B aligned, so that SP keeps aligned.
+    drop = RoundDown(drop, 16);
+
+    OpRegImm64(kOpAdd, rs_sp, drop);
+
+    if (fp_spill_mask_) {
+      int offset = frame_size_ - drop - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+      UnSpillFPRegs(rs_sp, offset, fp_spill_mask_);
+    }
+    if (core_spill_mask_) {
+      int offset = frame_size_ - drop - kArm64PointerSize * num_core_spills_;
+      UnSpillCoreRegs(rs_sp, offset, core_spill_mask_);
+    }
+
+    OpRegImm64(kOpAdd, rs_sp, frame_size_ - drop);
   }
 
-  OpRegImm64(kOpAdd, rs_sp, frame_size_);
+  // Finally return.
   NewLIR0(kA64Ret);
 }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index f71713f..7db6ab6 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -123,8 +123,6 @@
     void ClobberCallerSave();
     void FreeCallTemps();
     void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, RegStorage reg);
-    void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 18a4e8f..51c8723 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -19,6 +19,7 @@
 #include "arm64_lir.h"
 #include "codegen_arm64.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 
@@ -1054,6 +1055,7 @@
     if (UNLIKELY(reg2 < 0)) {
       NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
     } else {
+      DCHECK_LE(offset, 63);
       NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
               RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
     }
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index dcb0050..6985de6 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -22,6 +22,7 @@
 
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
@@ -648,29 +649,6 @@
   num_core_spills_++;
 }
 
-/*
- * Mark a callee-save fp register as promoted.
- */
-void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
-  DCHECK(reg.IsFloat());
-  int adjusted_reg_num = reg.GetRegNum() - A64_FP_CALLEE_SAVE_BASE;
-  // Ensure fp_vmap_table is large enough
-  int table_size = fp_vmap_table_.size();
-  for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
-    fp_vmap_table_.push_back(INVALID_VREG);
-  }
-  // Add the current mapping
-  fp_vmap_table_[adjusted_reg_num] = v_reg;
-  // Size of fp_vmap_table is high-water mark, use to set mask
-  num_fp_spills_ = fp_vmap_table_.size();
-  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << A64_FP_CALLEE_SAVE_BASE;
-}
-
-void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
-  DCHECK(reg.IsDouble());
-  MarkPreservedSingle(v_reg, reg);
-}
-
 /* Clobber all regs that might be used by an external C call */
 void Arm64Mir2Lir::ClobberCallerSave() {
   Clobber(rs_x0);
@@ -904,7 +882,7 @@
     int n = *num_gpr_used;
     if (n < 8) {
       *num_gpr_used = n + 1;
-      if (loc->wide) {
+      if (loc->wide || loc->ref) {
         *op_size = k64;
         return RegStorage::Solo64(n);
       } else {
@@ -965,35 +943,64 @@
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
   for (int i = 0; i < cu_->num_ins; i++) {
-    PromotionMap* v_map = &promotion_map_[start_vreg + i];
     RegLocation* t_loc = &ArgLocs[i];
     OpSize op_size;
     RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
 
     if (reg.Valid()) {
-      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+      // If arriving in register.
+
+      // We have already updated the arg location with promoted info
+      // so we can be based on it.
+      if (t_loc->location == kLocPhysReg) {
+        // Just copy it.
+        OpRegCopy(t_loc->reg, reg);
       } else {
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
-        if (reg.Is64Bit()) {
-          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
-            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
-          }
-          i += 1;
+        // Needs flush.
+        if (t_loc->ref) {
+          StoreRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
+              kNotVolatile);
         }
       }
     } else {
-      // If arriving in frame & promoted
-      if (v_map->core_location == kLocPhysReg) {
-        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
-                     RegStorage::Solo32(v_map->core_reg));
-      }
-      if (v_map->fp_location == kLocPhysReg) {
-        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      // If arriving in frame & promoted.
+      if (t_loc->location == kLocPhysReg) {
+        if (t_loc->ref) {
+          LoadRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
+        } else {
+          LoadBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
+                       t_loc->wide ? k64 : k32, kNotVolatile);
+        }
       }
     }
+    if (t_loc->wide) {
+      // Increment i to skip the next one.
+      i++;
+    }
+    //      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+    //        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+    //      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+    //        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
+    //      } else {
+    //        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
+    //        if (reg.Is64Bit()) {
+    //          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
+    //            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
+    //          }
+    //          i += 1;
+    //        }
+    //      }
+    //    } else {
+    //      // If arriving in frame & promoted
+    //      if (v_map->core_location == kLocPhysReg) {
+    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
+    //                     RegStorage::Solo32(v_map->core_reg));
+    //      }
+    //      if (v_map->fp_location == kLocPhysReg) {
+    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
+    //      }
   }
 }
 
@@ -1067,7 +1074,11 @@
         loc = UpdateLoc(loc);
         if (loc.location == kLocPhysReg) {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+          if (loc.ref) {
+            StoreRefDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+          } else {
+            StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+          }
         }
         next_arg++;
       }
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index ca78e5b..aaee91b 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -17,6 +17,7 @@
 #include "arm64_lir.h"
 #include "codegen_arm64.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index e571b3a..5ba0d3f 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -272,7 +272,7 @@
     PromotionMap v_reg_map = promotion_map_[i];
     std::string buf;
     if (v_reg_map.fp_location == kLocPhysReg) {
-      StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.FpReg));
+      StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.fp_reg));
     }
 
     std::string buf3;
@@ -1184,6 +1184,7 @@
     // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
+  // TODO: This is actually a pointer, not a reference.
   LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
   AppendLIR(load_pc_rel);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
@@ -1211,7 +1212,7 @@
   if (data_target == nullptr) {
     data_target = AddWordData(&class_literal_list_, type_idx);
   }
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
   AppendLIR(load_pc_rel);
 }
 
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2c59055..fe90562 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1122,6 +1122,9 @@
   }
 }
 
+#define IsSameReg(r1, r2) \
+  (GetRegInfo(r1)->Master()->GetReg().GetReg() == GetRegInfo(r2)->Master()->GetReg().GetReg())
+
 // For final classes there are no sub-classes to check and so we can answer the instance-of
 // question with simple comparisons.
 void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
@@ -1132,8 +1135,9 @@
   RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
-  if (result_reg == object.reg) {
+  if (IsSameReg(result_reg, object.reg)) {
     result_reg = AllocTypedTemp(false, kCoreReg);
+    DCHECK(!IsSameReg(result_reg, object.reg));
   }
   LoadConstant(result_reg, 0);     // assume false
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index bf51d28..c75e681 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -257,11 +257,11 @@
 void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                          bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg());
-  if (TargetReg(kArg0) != arg0) {
-    OpRegCopy(TargetReg(kArg0), arg0);
+  DCHECK(!IsSameReg(TargetReg(kArg1), arg0));
+  if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) {
+    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
   }
-  LoadCurrMethodDirect(TargetReg(kArg1));
+  LoadCurrMethodDirect(TargetRefReg(kArg1));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -272,11 +272,11 @@
                                                     RegStorage arg0, RegLocation arg2,
                                                     bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg());
-  if (TargetReg(kArg0) != arg0) {
-    OpRegCopy(TargetReg(kArg0), arg0);
+  DCHECK(!IsSameReg(TargetReg(kArg1), arg0));
+  if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) {
+    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
   }
-  LoadCurrMethodDirect(TargetReg(kArg1));
+  LoadCurrMethodDirect(TargetRefReg(kArg1));
   LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
@@ -394,13 +394,6 @@
 INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocation, RegLocation arg0,
             RegLocation arg1, bool safepoint_pc)
 
-// TODO: This is a hack! Reshape the two macros into functions and move them to a better place.
-#define IsSameReg(r1, r2) \
-  (GetRegInfo(r1)->Master()->GetReg().GetReg() == GetRegInfo(r2)->Master()->GetReg().GetReg())
-#define TargetArgReg(arg, is_wide) \
-  (GetRegInfo(TargetReg(arg))->FindMatchingView( \
-     (is_wide) ? RegisterInfo::k64SoloStorageMask : RegisterInfo::k32SoloStorageMask)->GetReg())
-
 void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) {
   if (IsSameReg(arg1, TargetReg(kArg0))) {
     if (IsSameReg(arg0, TargetReg(kArg1))) {
@@ -562,7 +555,7 @@
         OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
         need_flush = false;
       } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
         need_flush = false;
       } else {
         need_flush = true;
@@ -584,8 +577,8 @@
            * halves of the double are promoted.  Make sure they are in a usable form.
            */
           int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0);
-          int low_reg = promotion_map_[lowreg_index].FpReg;
-          int high_reg = promotion_map_[lowreg_index + 1].FpReg;
+          int low_reg = promotion_map_[lowreg_index].fp_reg;
+          int high_reg = promotion_map_[lowreg_index + 1].fp_reg;
           if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) {
             need_flush = true;
           }
@@ -600,7 +593,7 @@
         Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
       }
       if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
       }
     }
   }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 61a047d..0b1f7b6 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -65,8 +65,6 @@
     void ClobberCallerSave();
     void FreeCallTemps();
     void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, RegStorage reg);
-    void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index cd29e78..c3a4c17 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -18,6 +18,7 @@
 
 #include "codegen_mips.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mips_lir.h"
 #include "mirror/array.h"
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 1d02cf7..a5b7824 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -340,20 +340,6 @@
   num_core_spills_++;
 }
 
-/*
- * Mark a callee-save fp register as promoted.  Note that
- * vpush/vpop uses contiguous register lists so we must
- * include any holes in the mask.  Associate holes with
- * Dalvik register INVALID_VREG (0xFFFFU).
- */
-void MipsMir2Lir::MarkPreservedSingle(int s_reg, RegStorage reg) {
-  LOG(FATAL) << "No support yet for promoted FP regs";
-}
-
-void MipsMir2Lir::MarkPreservedDouble(int s_reg, RegStorage reg) {
-  LOG(FATAL) << "No support yet for promoted FP regs";
-}
-
 /* Clobber all regs that might be used by an external C call */
 void MipsMir2Lir::ClobberCallerSave() {
   Clobber(rs_rZERO);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index d28abbf..129a696 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_mips.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "mips_lir.h"
 
 namespace art {
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 9a62255..9ce5bb7 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -31,7 +31,7 @@
     p->MarkDead();
     if (p->IsWide()) {
       p->SetIsWide(false);
-      if (p->GetReg() != p->Partner()) {
+      if (p->GetReg().NotExactlyEquals(p->Partner())) {
         // Register pair - deal with the other half.
         p = GetRegInfo(p->Partner());
         p->SetIsWide(false);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 984e8ea..237288e 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -61,7 +61,7 @@
   if (reg_arg_low.Valid()) {
     LockTemp(reg_arg_low);
   }
-  if (reg_arg_high.Valid() && reg_arg_low != reg_arg_high) {
+  if (reg_arg_high.Valid() && reg_arg_low.NotExactlyEquals(reg_arg_high)) {
     LockTemp(reg_arg_high);
   }
 }
@@ -249,7 +249,7 @@
     LoadBaseDisp(reg_obj, data.field_offset, r_result, size, data.is_volatile ? kVolatile :
         kNotVolatile);
   }
-  if (r_result != rl_dest.reg) {
+  if (r_result.NotExactlyEquals(rl_dest.reg)) {
     if (wide) {
       OpRegCopyWide(rl_dest.reg, r_result);
     } else {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 0c00df3..8ebd64a 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -195,8 +195,8 @@
     high_reg = (both_regs >> 8) & 0xff; \
   } while (false)
 
-// Mask to denote sreg as the start of a double.  Must not interfere with low 16 bits.
-#define STARTING_DOUBLE_SREG 0x10000
+// Mask to denote sreg as the start of a 64-bit item.  Must not interfere with low 16 bits.
+#define STARTING_WIDE_SREG 0x10000
 
 // TODO: replace these macros
 #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath))
@@ -487,7 +487,7 @@
       RegLocationType core_location:3;
       uint8_t core_reg;
       RegLocationType fp_location:3;
-      uint8_t FpReg;
+      uint8_t fp_reg;
       bool first_in_pair;
     };
 
@@ -740,9 +740,9 @@
     int SRegToPMap(int s_reg);
     void RecordCorePromotion(RegStorage reg, int s_reg);
     RegStorage AllocPreservedCoreReg(int s_reg);
-    void RecordSinglePromotion(RegStorage reg, int s_reg);
-    void RecordDoublePromotion(RegStorage reg, int s_reg);
-    RegStorage AllocPreservedSingle(int s_reg);
+    void RecordFpPromotion(RegStorage reg, int s_reg);
+    RegStorage AllocPreservedFpReg(int s_reg);
+    virtual RegStorage AllocPreservedSingle(int s_reg);
     virtual RegStorage AllocPreservedDouble(int s_reg);
     RegStorage AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required);
     virtual RegStorage AllocFreeTemp();
@@ -1175,6 +1175,13 @@
 
     // Required for target - register utilities.
 
+    bool IsSameReg(RegStorage reg1, RegStorage reg2) {
+      RegisterInfo* info1 = GetRegInfo(reg1);
+      RegisterInfo* info2 = GetRegInfo(reg2);
+      return (info1->Master() == info2->Master() &&
+             (info1->StorageMask() & info2->StorageMask()) != 0);
+    }
+
     /**
      * @brief Portable way of getting special registers from the backend.
      * @param reg Enumeration describing the purpose of the register.
@@ -1224,8 +1231,6 @@
     virtual void ClobberCallerSave() = 0;
     virtual void FreeCallTemps() = 0;
     virtual void LockCallTemps() = 0;
-    virtual void MarkPreservedSingle(int v_reg, RegStorage reg) = 0;
-    virtual void MarkPreservedDouble(int v_reg, RegStorage reg) = 0;
     virtual void CompilerInitializeRegAlloc() = 0;
 
     // Required for target - miscellaneous.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 38370ad..6bedae8 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -178,7 +178,7 @@
   } else {
     RegisterInfo* info = GetRegInfo(reg);
     if (info->IsTemp() && !info->IsDead()) {
-      if (info->GetReg() != info->Partner()) {
+      if (info->GetReg().NotExactlyEquals(info->Partner())) {
         ClobberBody(GetRegInfo(info->Partner()));
       }
       ClobberBody(info);
@@ -225,7 +225,7 @@
     GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
     for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
       if (info->SReg() == s_reg) {
-        if (info->GetReg() != info->Partner()) {
+        if (info->GetReg().NotExactlyEquals(info->Partner())) {
           // Dealing with a pair - clobber the other half.
           DCHECK(!info->IsAliased());
           ClobberBody(GetRegInfo(info->Partner()));
@@ -284,8 +284,13 @@
 
 /* Reserve a callee-save register.  Return InvalidReg if none available */
 RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
-  // TODO: 64-bit and refreg update
   RegStorage res;
+  /*
+   * Note: it really doesn't matter much whether we allocate from the core or core64
+   * pool for 64-bit targets - but for some targets it does matter whether allocations
+   * happens from the single or double pool.  This entire section of code could stand
+   * a good refactoring.
+   */
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
     if (!info->IsTemp() && !info->InUse()) {
@@ -297,49 +302,50 @@
   return res;
 }
 
-void Mir2Lir::RecordSinglePromotion(RegStorage reg, int s_reg) {
+void Mir2Lir::RecordFpPromotion(RegStorage reg, int s_reg) {
+  DCHECK_NE(cu_->instruction_set, kThumb2);
   int p_map_idx = SRegToPMap(s_reg);
   int v_reg = mir_graph_->SRegToVReg(s_reg);
+  int reg_num = reg.GetRegNum();
   GetRegInfo(reg)->MarkInUse();
-  MarkPreservedSingle(v_reg, reg);
+  fp_spill_mask_ |= (1 << reg_num);
+  // Include reg for later sort
+  fp_vmap_table_.push_back(reg_num << VREG_NUM_WIDTH | (v_reg & ((1 << VREG_NUM_WIDTH) - 1)));
+  num_fp_spills_++;
   promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-  promotion_map_[p_map_idx].FpReg = reg.GetReg();
+  promotion_map_[p_map_idx].fp_reg = reg.GetReg();
 }
 
-// Reserve a callee-save sp single register.
-RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) {
+// Reserve a callee-save floating point.
+RegStorage Mir2Lir::AllocPreservedFpReg(int s_reg) {
+  /*
+   * For targets other than Thumb2, it doesn't matter whether we allocate from
+   * the sp_regs_ or dp_regs_ pool.  Some refactoring is in order here.
+   */
+  DCHECK_NE(cu_->instruction_set, kThumb2);
   RegStorage res;
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
     if (!info->IsTemp() && !info->InUse()) {
       res = info->GetReg();
-      RecordSinglePromotion(res, s_reg);
+      RecordFpPromotion(res, s_reg);
       break;
     }
   }
   return res;
 }
 
-void Mir2Lir::RecordDoublePromotion(RegStorage reg, int s_reg) {
-  int p_map_idx = SRegToPMap(s_reg);
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  GetRegInfo(reg)->MarkInUse();
-  MarkPreservedDouble(v_reg, reg);
-  promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-  promotion_map_[p_map_idx].FpReg = reg.GetReg();
-}
-
-// Reserve a callee-save dp solo register.
+// TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
 RegStorage Mir2Lir::AllocPreservedDouble(int s_reg) {
   RegStorage res;
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-    if (!info->IsTemp() && !info->InUse()) {
-      res = info->GetReg();
-      RecordDoublePromotion(res, s_reg);
-      break;
-    }
-  }
+  UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedDouble";
+  return res;
+}
+
+// TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
+RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) {
+  RegStorage res;
+  UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedSingle";
   return res;
 }
 
@@ -736,7 +742,8 @@
     RegisterInfo* info1 = GetRegInfo(reg.GetLow());
     RegisterInfo* info2 = GetRegInfo(reg.GetHigh());
     DCHECK(info1 && info2 && info1->IsWide() && info2->IsWide() &&
-         (info1->Partner() == info2->GetReg()) && (info2->Partner() == info1->GetReg()));
+           (info1->Partner().ExactlyEquals(info2->GetReg())) &&
+           (info2->Partner().ExactlyEquals(info1->GetReg())));
     if ((info1->IsLive() && info1->IsDirty()) || (info2->IsLive() && info2->IsDirty())) {
       if (!(info1->IsTemp() && info2->IsTemp())) {
         /* Should not happen.  If it does, there's a problem in eval_loc */
@@ -872,10 +879,10 @@
     RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
     RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
     // Unpair any old partners.
-    if (info_lo->IsWide() && info_lo->Partner() != info_hi->GetReg()) {
+    if (info_lo->IsWide() && info_lo->Partner().NotExactlyEquals(info_hi->GetReg())) {
       GetRegInfo(info_lo->Partner())->SetIsWide(false);
     }
-    if (info_hi->IsWide() && info_hi->Partner() != info_lo->GetReg()) {
+    if (info_hi->IsWide() && info_hi->Partner().NotExactlyEquals(info_lo->GetReg())) {
       GetRegInfo(info_hi->Partner())->SetIsWide(false);
     }
     info_lo->SetIsWide(true);
@@ -1039,12 +1046,12 @@
         RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
         match &= info_lo->IsWide();
         match &= info_hi->IsWide();
-        match &= (info_lo->Partner() == info_hi->GetReg());
-        match &= (info_hi->Partner() == info_lo->GetReg());
+        match &= (info_lo->Partner().ExactlyEquals(info_hi->GetReg()));
+        match &= (info_hi->Partner().ExactlyEquals(info_lo->GetReg()));
       } else {
         RegisterInfo* info = GetRegInfo(reg);
         match &= info->IsWide();
-        match &= (info->GetReg() == info->Partner());
+        match &= (info->GetReg().ExactlyEquals(info->Partner()));
       }
       if (match) {
         loc.location = kLocPhysReg;
@@ -1147,16 +1154,23 @@
     RegLocation loc = mir_graph_->reg_location_[i];
     RefCounts* counts = loc.fp ? fp_counts : core_counts;
     int p_map_idx = SRegToPMap(loc.s_reg_low);
+    int use_count = mir_graph_->GetUseCount(i);
     if (loc.fp) {
       if (loc.wide) {
         // Treat doubles as a unit, using upper half of fp_counts array.
-        counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i);
+        counts[p_map_idx + num_regs].count += use_count;
         i++;
       } else {
-        counts[p_map_idx].count += mir_graph_->GetUseCount(i);
+        counts[p_map_idx].count += use_count;
       }
     } else if (!IsInexpensiveConstant(loc)) {
-      counts[p_map_idx].count += mir_graph_->GetUseCount(i);
+      if (loc.wide && cu_->target64) {
+        // Treat long as a unit, using upper half of core_counts array.
+        counts[p_map_idx + num_regs].count += use_count;
+        i++;
+      } else {
+        counts[p_map_idx].count += use_count;
+      }
     }
   }
 }
@@ -1176,10 +1190,10 @@
 void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) {
   LOG(INFO) << msg;
   for (int i = 0; i < size; i++) {
-    if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
-      LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count;
+    if ((arr[i].s_reg & STARTING_WIDE_SREG) != 0) {
+      LOG(INFO) << "s_reg[64_" << (arr[i].s_reg & ~STARTING_WIDE_SREG) << "]: " << arr[i].count;
     } else {
-      LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+      LOG(INFO) << "s_reg[32_" << arr[i].s_reg << "]: " << arr[i].count;
     }
   }
 }
@@ -1210,69 +1224,83 @@
    * TUNING: replace with linear scan once we have the ability
    * to describe register live ranges for GC.
    */
+  size_t core_reg_count_size = cu_->target64 ? num_regs * 2 : num_regs;
+  size_t fp_reg_count_size = num_regs * 2;
   RefCounts *core_regs =
-      static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs,
+      static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size,
                                             kArenaAllocRegAlloc));
-  RefCounts *FpRegs =
-      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2,
+  RefCounts *fp_regs =
+      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * fp_reg_count_size,
                                              kArenaAllocRegAlloc));
   // Set ssa names for original Dalvik registers
   for (int i = 0; i < dalvik_regs; i++) {
-    core_regs[i].s_reg = FpRegs[i].s_reg = i;
+    core_regs[i].s_reg = fp_regs[i].s_reg = i;
   }
 
   // Set ssa names for compiler temporaries
   for (unsigned int ct_idx = 0; ct_idx < mir_graph_->GetNumUsedCompilerTemps(); ct_idx++) {
     CompilerTemp* ct = mir_graph_->GetCompilerTemp(ct_idx);
     core_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
-    FpRegs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
-    FpRegs[num_regs + dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    fp_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
   }
 
-  // Duplicate in upper half to represent possible fp double starting sregs.
-  for (int i = 0; i < num_regs; i++) {
-    FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG;
+  // Duplicate in upper half to represent possible wide starting sregs.
+  for (size_t i = num_regs; i < fp_reg_count_size; i++) {
+    fp_regs[i].s_reg = fp_regs[i - num_regs].s_reg | STARTING_WIDE_SREG;
+  }
+  for (size_t i = num_regs; i < core_reg_count_size; i++) {
+    core_regs[i].s_reg = core_regs[i - num_regs].s_reg | STARTING_WIDE_SREG;
   }
 
   // Sum use counts of SSA regs by original Dalvik vreg.
-  CountRefs(core_regs, FpRegs, num_regs);
+  CountRefs(core_regs, fp_regs, num_regs);
 
 
   // Sort the count arrays
-  qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts);
-  qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts);
+  qsort(core_regs, core_reg_count_size, sizeof(RefCounts), SortCounts);
+  qsort(fp_regs, fp_reg_count_size, sizeof(RefCounts), SortCounts);
 
   if (cu_->verbose) {
-    DumpCounts(core_regs, num_regs, "Core regs after sort");
-    DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort");
+    DumpCounts(core_regs, core_reg_count_size, "Core regs after sort");
+    DumpCounts(fp_regs, fp_reg_count_size, "Fp regs after sort");
   }
 
   if (!(cu_->disable_opt & (1 << kPromoteRegs))) {
-    // Promote FpRegs
-    for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) {
-      int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG);
-      if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
-        if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) &&
-            (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) {
-          int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG;
-          // Ignore result - if can't alloc double may still be able to alloc singles.
-          AllocPreservedDouble(low_sreg);
+    // Promote fp regs
+    for (size_t i = 0; (i < fp_reg_count_size) && (fp_regs[i].count >= promotion_threshold); i++) {
+      int low_sreg = fp_regs[i].s_reg & ~STARTING_WIDE_SREG;
+      size_t p_map_idx = SRegToPMap(low_sreg);
+      RegStorage reg = RegStorage::InvalidReg();
+      if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
+        // TODO: break out the Thumb2-specific code.
+        if (cu_->instruction_set == kThumb2) {
+          bool wide = fp_regs[i].s_reg & STARTING_WIDE_SREG;
+          if (wide) {
+            if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) {
+              // Ignore result - if can't alloc double may still be able to alloc singles.
+              AllocPreservedDouble(low_sreg);
+            }
+            // Continue regardless of success - might still be able to grab a single.
+            continue;
+          } else {
+            reg = AllocPreservedSingle(low_sreg);
+          }
+        } else {
+          reg = AllocPreservedFpReg(low_sreg);
         }
-      } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
-        RegStorage reg = AllocPreservedSingle(FpRegs[i].s_reg);
         if (!reg.Valid()) {
-          break;  // No more left.
+           break;  // No more left
         }
       }
     }
 
     // Promote core regs
-    for (int i = 0; (i < num_regs) &&
-            (core_regs[i].count >= promotion_threshold); i++) {
-      int p_map_idx = SRegToPMap(core_regs[i].s_reg);
-      if (promotion_map_[p_map_idx].core_location !=
-          kLocPhysReg) {
-        RegStorage reg = AllocPreservedCoreReg(core_regs[i].s_reg);
+    for (size_t i = 0; (i < core_reg_count_size) &&
+         (core_regs[i].count >= promotion_threshold); i++) {
+      int low_sreg = core_regs[i].s_reg & ~STARTING_WIDE_SREG;
+      size_t p_map_idx = SRegToPMap(low_sreg);
+      if (promotion_map_[p_map_idx].core_location != kLocPhysReg) {
+        RegStorage reg = AllocPreservedCoreReg(low_sreg);
         if (!reg.Valid()) {
            break;  // No more left
         }
@@ -1284,51 +1312,35 @@
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
     RegLocation *curr = &mir_graph_->reg_location_[i];
     int p_map_idx = SRegToPMap(curr->s_reg_low);
-    if (!curr->wide) {
-      if (curr->fp) {
-        if (promotion_map_[p_map_idx].fp_location == kLocPhysReg) {
-          curr->location = kLocPhysReg;
-          curr->reg = RegStorage::Solo32(promotion_map_[p_map_idx].FpReg);
-          curr->home = true;
-        }
-      } else {
-        if (promotion_map_[p_map_idx].core_location == kLocPhysReg) {
-          curr->location = kLocPhysReg;
-          curr->reg = RegStorage::Solo32(promotion_map_[p_map_idx].core_reg);
-          curr->home = true;
-        }
-      }
-    } else {
-      if (curr->high_word) {
-        continue;
-      }
-      if (curr->fp) {
-        if ((promotion_map_[p_map_idx].fp_location == kLocPhysReg) &&
-            (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg)) {
-          int low_reg = promotion_map_[p_map_idx].FpReg;
-          int high_reg = promotion_map_[p_map_idx+1].FpReg;
-          // Doubles require pair of singles starting at even reg
+    int reg_num = curr->fp ? promotion_map_[p_map_idx].fp_reg : promotion_map_[p_map_idx].core_reg;
+    bool wide = curr->wide || (cu_->target64 && curr->ref && cu_->instruction_set != kX86_64);
+    RegStorage reg = RegStorage::InvalidReg();
+    if (curr->fp && promotion_map_[p_map_idx].fp_location == kLocPhysReg) {
+      if (wide && cu_->instruction_set == kThumb2) {
+        if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) {
+          int high_reg = promotion_map_[p_map_idx+1].fp_reg;
           // TODO: move target-specific restrictions out of here.
-          if (((low_reg & 0x1) == 0) && ((low_reg + 1) == high_reg)) {
-            curr->location = kLocPhysReg;
-            if (cu_->instruction_set == kThumb2) {
-              curr->reg = RegStorage::FloatSolo64(RegStorage::RegNum(low_reg) >> 1);
-            } else {
-              curr->reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
-            }
-            curr->home = true;
+          if (((reg_num & 0x1) == 0) && ((reg_num + 1) == high_reg)) {
+            reg = RegStorage::FloatSolo64(RegStorage::RegNum(reg_num) >> 1);
           }
         }
       } else {
-        if ((promotion_map_[p_map_idx].core_location == kLocPhysReg)
-           && (promotion_map_[p_map_idx+1].core_location ==
-           kLocPhysReg)) {
-          curr->location = kLocPhysReg;
-          curr->reg = RegStorage(RegStorage::k64BitPair, promotion_map_[p_map_idx].core_reg,
-                                 promotion_map_[p_map_idx+1].core_reg);
-          curr->home = true;
-        }
+        reg = wide ? RegStorage::FloatSolo64(reg_num) : RegStorage::FloatSolo32(reg_num);
       }
+    } else if (!curr->fp && promotion_map_[p_map_idx].core_location == kLocPhysReg) {
+      if (wide && !cu_->target64) {
+        if (promotion_map_[p_map_idx + 1].core_location == kLocPhysReg) {
+          int high_reg = promotion_map_[p_map_idx+1].core_reg;
+          reg = RegStorage(RegStorage::k64BitPair, reg_num, high_reg);
+        }
+      } else {
+        reg = wide ? RegStorage::Solo64(reg_num) : RegStorage::Solo32(reg_num);
+      }
+    }
+    if (reg.Valid()) {
+      curr->reg = reg;
+      curr->location = kLocPhysReg;
+      curr->home = true;
     }
   }
   if (cu_->verbose) {
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 21d7419..afb6184 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -100,8 +100,6 @@
   void ClobberCallerSave();
   void FreeCallTemps();
   void LockCallTemps();
-  void MarkPreservedSingle(int v_reg, RegStorage reg);
-  void MarkPreservedDouble(int v_reg, RegStorage reg);
   void CompilerInitializeRegAlloc();
 
   // Required for target - miscellaneous.
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 1aeaced..7454475 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "x86_lir.h"
 
 namespace art {
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index bd007e7e..14a18e5 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -18,6 +18,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "mirror/array.h"
 #include "x86_lir.h"
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 64b4af8..72b2cea 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -20,6 +20,7 @@
 #include "codegen_x86.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "mirror/array.h"
 #include "mirror/string.h"
 #include "x86_lir.h"
@@ -409,20 +410,6 @@
   num_core_spills_++;
 }
 
-/*
- * Mark a callee-save fp register as promoted.  Note that
- * vpush/vpop uses contiguous register lists so we must
- * include any holes in the mask.  Associate holes with
- * Dalvik register INVALID_VREG (0xFFFFU).
- */
-void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
-  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
-}
-
-void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
-  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
-}
-
 RegStorage X86Mir2Lir::AllocateByteRegister() {
   RegStorage reg = AllocTypedTemp(false, kCoreReg);
   if (!cu_->target64) {
@@ -2206,4 +2193,3 @@
 }
 
 }  // namespace art
-
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 392eecf..d835b22 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -20,6 +20,7 @@
 #include "x86_lir.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 3b891f2..8ed3adc 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -122,11 +122,18 @@
   constexpr explicit RegStorage(uint16_t val) : reg_(val) {}
   RegStorage() : reg_(kInvalid) {}
 
-  bool operator==(const RegStorage rhs) const {
+  // We do not provide a general operator overload for equality of reg storage, as this is
+  // dangerous in the case of architectures with multiple views, and the naming ExactEquals
+  // expresses the exact match expressed here. It is more likely that a comparison between the views
+  // is intended in most cases. Such code can be found in, for example, Mir2Lir::IsSameReg.
+  //
+  // If you know what you are doing, include reg_storage_eq.h, which defines == and != for brevity.
+
+  bool ExactlyEquals(const RegStorage& rhs) const {
     return (reg_ == rhs.GetRawBits());
   }
 
-  bool operator!=(const RegStorage rhs) const {
+  bool NotExactlyEquals(const RegStorage& rhs) const {
     return (reg_ != rhs.GetRawBits());
   }
 
diff --git a/compiler/dex/reg_storage_eq.h b/compiler/dex/reg_storage_eq.h
new file mode 100644
index 0000000..b688dac
--- /dev/null
+++ b/compiler/dex/reg_storage_eq.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_REG_STORAGE_EQ_H_
+#define ART_COMPILER_DEX_REG_STORAGE_EQ_H_
+
+#include "reg_storage.h"
+
+namespace art {
+
+// Define == and != operators for RegStorage. These are based on exact equality of the reg storage,
+// that is, 32b and 64b views of the same physical register won't match. This is often not the
+// intended behavior, so be careful when including this header.
+
+inline bool operator==(const RegStorage& lhs, const RegStorage& rhs) {
+  return lhs.ExactlyEquals(rhs);
+}
+
+inline bool operator!=(const RegStorage& lhs, const RegStorage& rhs) {
+  return lhs.NotExactlyEquals(rhs);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_REG_STORAGE_EQ_H_
+
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 132ac3e..d5405fb 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -159,11 +159,22 @@
       uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
       uintptr_t ptr_val;
       bool success = false;
+      bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64);
       if (is_float) {
         success = GetFPR(reg, &ptr_val);
       } else {
         success = GetGPR(reg, &ptr_val);
       }
+      if (success && target64) {
+        bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
+        bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
+        int64_t value_long = static_cast<int64_t>(ptr_val);
+        if (wide_lo) {
+          ptr_val = static_cast<uintptr_t>(value_long & 0xFFFFFFFF);
+        } else if (wide_hi) {
+          ptr_val = static_cast<uintptr_t>(value_long >> 32);
+        }
+      }
       *val = ptr_val;
       return success;
     } else {
@@ -194,6 +205,28 @@
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64);
+      // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
+      if (target64) {
+        bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
+        bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
+        if (wide_lo || wide_hi) {
+          uintptr_t old_reg_val;
+          bool success = is_float ? GetFPR(reg, &old_reg_val) : GetGPR(reg, &old_reg_val);
+          if (!success) {
+            return false;
+          }
+          uint64_t new_vreg_portion = static_cast<uint64_t>(new_value);
+          uint64_t old_reg_val_as_wide = static_cast<uint64_t>(old_reg_val);
+          uint64_t mask = 0xffffffff;
+          if (wide_lo) {
+            mask = mask << 32;
+          } else {
+            new_vreg_portion = new_vreg_portion << 32;
+          }
+          new_value = static_cast<uintptr_t>((old_reg_val_as_wide & mask) | new_vreg_portion);
+        }
+      }
       if (is_float) {
         return SetFPR(reg, new_value);
       } else {
diff --git a/runtime/vmap_table.h b/runtime/vmap_table.h
index 9821753..df5cd80 100644
--- a/runtime/vmap_table.h
+++ b/runtime/vmap_table.h
@@ -64,6 +64,12 @@
     const uint8_t* table = table_;
     uint16_t adjusted_vreg = vreg + kEntryAdjustment;
     size_t end = DecodeUnsignedLeb128(&table);
+    bool high_reg = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
+    bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64);
+    if (target64 && high_reg) {
+      // Wide promoted registers are associated with the sreg of the low portion.
+      adjusted_vreg--;
+    }
     for (size_t i = 0; i < end; ++i) {
       // Stop if we find what we are are looking for.
       uint16_t adjusted_entry = DecodeUnsignedLeb128(&table);