Quick: Use 16-bit Thumb2 PUSH/POP when possible.

Generate correct PUSH/POP in Gen{Entry,Exit}Sequence()
to avoid extra processing during insn fixup.

Change-Id: I396168e2a42faee6980d40779c7de9657531867b
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 4d377df..3b3170e 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -614,7 +614,6 @@
   kFixupVLoad,             // FP load which *may* be pc-relative.
   kFixupCBxZ,              // Cbz, Cbnz.
   kFixupTBxZ,              // Tbz, Tbnz.
-  kFixupPushPop,           // Not really pc relative, but changes size based on args.
   kFixupCondBranch,        // Conditional branch
   kFixupT1Branch,          // Thumb1 Unconditional branch
   kFixupT2Branch,          // Thumb2 Unconditional branch
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 4e20d76..76ec9df 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -671,12 +671,12 @@
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop),
+                 | IS_LOAD, "pop", "<!0R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Push,          0xe92d0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
+                 | IS_STORE, "push", "<!0R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -1396,31 +1396,6 @@
           }
           break;
         }
-        case kFixupPushPop: {
-          if (__builtin_popcount(lir->operands[0]) == 1) {
-            /*
-             * The standard push/pop multiple instruction
-             * requires at least two registers in the list.
-             * If we've got just one, switch to the single-reg
-             * encoding.
-             */
-            lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
-                kThumb2Pop1;
-            int reg = 0;
-            while (lir->operands[0]) {
-              if (lir->operands[0] & 0x1) {
-                break;
-              } else {
-                reg++;
-                lir->operands[0] >>= 1;
-              }
-            }
-            lir->operands[0] = reg;
-            // This won't change again, don't bother unlinking, just reset fixup kind
-            lir->flags.fixup = kFixupNone;
-          }
-          break;
-        }
         case kFixupCondBranch: {
           LIR *target_lir = lir->target;
           int32_t delta = 0;
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index f15d707..99b2166 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -347,7 +347,20 @@
     }
   }
   /* Spill core callee saves */
-  NewLIR1(kThumb2Push, core_spill_mask_);
+  if (core_spill_mask_ == 0u) {
+    // Nothing to spill.
+  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
+    // Spilling only low regs and/or LR, use 16-bit PUSH.
+    constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
+    NewLIR1(kThumbPush,
+            (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
+            ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
+  } else if (IsPowerOfTwo(core_spill_mask_)) {
+    // kThumb2Push cannot be used to spill a single register.
+    NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
+  } else {
+    NewLIR1(kThumb2Push, core_spill_mask_);
+  }
   /* Need to spill any FP regs? */
   if (num_fp_spills_) {
     /*
@@ -444,13 +457,26 @@
   if (num_fp_spills_) {
     NewLIR1(kThumb2VPopCS, num_fp_spills_);
   }
-  if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) {
+  if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) {
     /* Unspill rARM_LR to rARM_PC */
     core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
     core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
   }
-  NewLIR1(kThumb2Pop, core_spill_mask_);
-  if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) {
+  if (core_spill_mask_ == 0u) {
+    // Nothing to unspill.
+  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
+    // Unspilling only low regs and/or PC, use 16-bit POP.
+    constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
+    NewLIR1(kThumbPop,
+            (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
+            ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
+  } else if (IsPowerOfTwo(core_spill_mask_)) {
+    // kThumb2Pop cannot be used to unspill a single register.
+    NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
+  } else {
+    NewLIR1(kThumb2Pop, core_spill_mask_);
+  }
+  if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) {
     /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
     NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
   }