Improve promotion of double-precision regs

Minor rework of the double allocation mechanism to more explicitly
manage the allocation of preserved floating point single pairs as
doubles.

Change-Id: Id9db4b0e86e5ef54a5db587f367e00efdf7e98d6
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index fdbc1d0..401e3d5 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -158,6 +158,10 @@
 #define ENCODE_ALL              (~0ULL)
 #define ENCODE_MEM              (ENCODE_DALVIK_REG | ENCODE_LITERAL | \
                                  ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS)
+
+// Mask to denote sreg as the start of a double.  Must not interfere with low 16 bits.
+#define STARTING_DOUBLE_SREG 0x10000
+
 // TODO: replace these macros
 #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath))
 #define SLOW_INVOKE_PATH (cu_->enable_debug & (1 << kDebugSlowInvokePath))
@@ -187,7 +191,6 @@
     struct RefCounts {
       int count;
       int s_reg;
-      bool double_start;   // Starting v_reg for a double
     };
 
     /*
@@ -324,11 +327,9 @@
     void RecordCorePromotion(int reg, int s_reg);
     int AllocPreservedCoreReg(int s_reg);
     void RecordFpPromotion(int reg, int s_reg);
-    int AllocPreservedSingle(int s_reg, bool even);
+    int AllocPreservedSingle(int s_reg);
     int AllocPreservedDouble(int s_reg);
-    int AllocPreservedFPReg(int s_reg, bool double_start);
-    int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp,
-                      bool required);
+    int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required);
     int AllocTempDouble();
     int AllocFreeTemp();
     int AllocTemp();
@@ -367,7 +368,7 @@
     RegLocation UpdateRawLoc(RegLocation loc);
     RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
     RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts);
+    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
     void DoPromotion();
     int VRegOffset(int v_reg);
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index a0f22fc..7927ff9 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -174,17 +174,12 @@
   promotion_map_[p_map_idx].FpReg = reg;
 }
 
-/*
- * Reserve a callee-save fp single register.  Try to fullfill request for
- * even/odd  allocation, but go ahead and allocate anything if not
- * available.  If nothing's available, return -1.
- */
-int Mir2Lir::AllocPreservedSingle(int s_reg, bool even) {
-  int res = -1;
+// Reserve a callee-save fp single register.
+int Mir2Lir::AllocPreservedSingle(int s_reg) {
+  int res = -1;  // Return code if none available.
   RegisterInfo* FPRegs = reg_pool_->FPRegs;
   for (int i = 0; i < reg_pool_->num_fp_regs; i++) {
-    if (!FPRegs[i].is_temp && !FPRegs[i].in_use &&
-      ((FPRegs[i].reg & 0x1) == 0) == even) {
+    if (!FPRegs[i].is_temp && !FPRegs[i].in_use) {
       res = FPRegs[i].reg;
       RecordFpPromotion(res, s_reg);
       break;
@@ -250,26 +245,6 @@
   return res;
 }
 
-
-/*
- * Reserve a callee-save fp register.   If this register can be used
- * as the first of a double, attempt to allocate an even pair of fp
- * single regs (but if can't still attempt to allocate a single, preferring
- * first to allocate an odd register.
- */
-int Mir2Lir::AllocPreservedFPReg(int s_reg, bool double_start) {
-  int res = -1;
-  if (double_start) {
-    res = AllocPreservedDouble(s_reg);
-  }
-  if (res == -1) {
-    res = AllocPreservedSingle(s_reg, false /* try odd # */);
-  }
-  if (res == -1)
-    res = AllocPreservedSingle(s_reg, true /* try even # */);
-  return res;
-}
-
 int Mir2Lir::AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp,
                            bool required) {
   int next = *next_temp;
@@ -872,18 +847,22 @@
 }
 
 /* USE SSA names to count references of base Dalvik v_regs. */
-void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts) {
+void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
     RegLocation loc = mir_graph_->reg_location_[i];
     RefCounts* counts = loc.fp ? fp_counts : core_counts;
     int p_map_idx = SRegToPMap(loc.s_reg_low);
-    // Don't count easily regenerated immediates
-    if (loc.fp || !IsInexpensiveConstant(loc)) {
+    if (loc.fp) {
+      if (loc.wide) {
+        // Treat doubles as a unit, using upper half of fp_counts array.
+        counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i);
+        i++;
+      } else {
+        counts[p_map_idx].count += mir_graph_->GetUseCount(i);
+      }
+    } else if (!IsInexpensiveConstant(loc)) {
       counts[p_map_idx].count += mir_graph_->GetUseCount(i);
     }
-    if (loc.wide && loc.fp && !loc.high_word) {
-      counts[p_map_idx].double_start = true;
-    }
   }
 }
 
@@ -902,7 +881,11 @@
 void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) {
   LOG(INFO) << msg;
   for (int i = 0; i < size; i++) {
-    LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+    if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
+      LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count;
+    } else {
+      LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+    }
   }
 }
 
@@ -925,7 +908,7 @@
    * count based on original Dalvik register name.  Count refs
    * separately based on type in order to give allocation
    * preference to fp doubles - which must be allocated sequential
-   * physical single fp registers started with an even-numbered
+   * physical single fp registers starting with an even-numbered
    * reg.
    * TUNING: replace with linear scan once we have the ability
    * to describe register live ranges for GC.
@@ -934,7 +917,7 @@
       static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs,
                                             ArenaAllocator::kAllocRegAlloc));
   RefCounts *FpRegs =
-      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs,
+      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2,
                                              ArenaAllocator::kAllocRegAlloc));
   // Set ssa names for original Dalvik registers
   for (int i = 0; i < dalvik_regs; i++) {
@@ -942,46 +925,49 @@
   }
   // Set ssa name for Method*
   core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();
-  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy
+  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy.
+  FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg();  // for consistency.
   // Set ssa names for compiler_temps
   for (int i = 1; i <= cu_->num_compiler_temps; i++) {
     CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i);
     core_regs[dalvik_regs + i].s_reg = ct->s_reg;
     FpRegs[dalvik_regs + i].s_reg = ct->s_reg;
+    FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg;
+  }
+
+  // Duplicate in upper half to represent possible fp double starting sregs.
+  for (int i = 0; i < num_regs; i++) {
+    FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG;
   }
 
   // Sum use counts of SSA regs by original Dalvik vreg.
-  CountRefs(core_regs, FpRegs);
+  CountRefs(core_regs, FpRegs, num_regs);
 
-  /*
-   * Ideally, we'd allocate doubles starting with an even-numbered
-   * register.  Bias the counts to try to allocate any vreg that's
-   * used as the start of a pair first.
-   */
-  for (int i = 0; i < num_regs; i++) {
-    if (FpRegs[i].double_start) {
-      FpRegs[i].count *= 2;
-    }
-  }
 
   // Sort the count arrays
   qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts);
-  qsort(FpRegs, num_regs, sizeof(RefCounts), SortCounts);
+  qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts);
 
   if (cu_->verbose) {
     DumpCounts(core_regs, num_regs, "Core regs after sort");
-    DumpCounts(FpRegs, num_regs, "Fp regs after sort");
+    DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort");
   }
 
   if (!(cu_->disable_opt & (1 << kPromoteRegs))) {
     // Promote FpRegs
-    for (int i = 0; (i < num_regs) && (FpRegs[i].count >= promotion_threshold); i++) {
-      int p_map_idx = SRegToPMap(FpRegs[i].s_reg);
-      if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
-        int reg = AllocPreservedFPReg(FpRegs[i].s_reg,
-          FpRegs[i].double_start);
+    for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) {
+      int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG);
+      if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
+        if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) &&
+            (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) {
+          int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG;
+          // Ignore result - if can't alloc double may still be able to alloc singles.
+          AllocPreservedDouble(low_sreg);
+        }
+      } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
+        int reg = AllocPreservedSingle(FpRegs[i].s_reg);
         if (reg < 0) {
-          break;  // No more left
+          break;  // No more left.
         }
       }
     }