Optimizing/ARM: Improve shifts of long values by a constant.

Change-Id: Id66ef8cdb9e64306f2be547370b90cc100a3e086
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6d05293..e30aa6e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2989,17 +2989,29 @@
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
-      // Make the output overlap, as it will be used to hold the masked
-      // second input.
-      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        // Make the output overlap, as it will be used to hold the masked
+        // second input.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->AddTemp(Location::RequiresRegister());
-      locations->SetOut(Location::RequiresRegister());
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        // For simplicity, use kOutputOverlap even though we only require that low registers
+        // don't clash with high registers which the register allocator currently guarantees.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
       break;
     }
     default:
@@ -3020,9 +3032,9 @@
     case Primitive::kPrimInt: {
       Register out_reg = out.AsRegister<Register>();
       Register first_reg = first.AsRegister<Register>();
-      // Arm doesn't mask the shift count so we need to do it ourselves.
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
+        // Arm doesn't mask the shift count so we need to do it ourselves.
         __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
         if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, out_reg);
@@ -3050,57 +3062,103 @@
       Register o_h = out.AsRegisterPairHigh<Register>();
       Register o_l = out.AsRegisterPairLow<Register>();
 
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-
       Register high = first.AsRegisterPairHigh<Register>();
       Register low = first.AsRegisterPairLow<Register>();
 
-      Register second_reg = second.AsRegister<Register>();
+      if (second.IsRegister()) {
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
 
-      if (op->IsShl()) {
-        __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // Shift the high part
-        __ Lsl(o_h, high, o_l);
-        // Shift the low part and `or` what overflew on the high part
-        __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
-        __ Lsr(temp, low, temp);
-        __ orr(o_h, o_h, ShifterOperand(temp));
-        // If the shift is > 32 bits, override the high part
-        __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Lsl(o_h, low, temp, PL);
-        // Shift the low part
-        __ Lsl(o_l, low, o_l);
-      } else if (op->IsShr()) {
-        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // Shift the low part
-        __ Lsr(o_l, low, o_h);
-        // Shift the high part and `or` what underflew on the low part
-        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ Lsl(temp, high, temp);
-        __ orr(o_l, o_l, ShifterOperand(temp));
-        // If the shift is > 32 bits, override the low part
-        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Asr(o_l, high, temp, PL);
-        // Shift the high part
-        __ Asr(o_h, high, o_h);
+        Register second_reg = second.AsRegister<Register>();
+
+        if (op->IsShl()) {
+          __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
+          // Shift the high part
+          __ Lsl(o_h, high, o_l);
+          // Shift the low part and `or` what overflew on the high part
+          __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
+          __ Lsr(temp, low, temp);
+          __ orr(o_h, o_h, ShifterOperand(temp));
+          // If the shift is > 32 bits, override the high part
+          __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Lsl(o_h, low, temp, PL);
+          // Shift the low part
+          __ Lsl(o_l, low, o_l);
+        } else if (op->IsShr()) {
+          __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+          // Shift the low part
+          __ Lsr(o_l, low, o_h);
+          // Shift the high part and `or` what underflew on the low part
+          __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ Lsl(temp, high, temp);
+          __ orr(o_l, o_l, ShifterOperand(temp));
+          // If the shift is > 32 bits, override the low part
+          __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Asr(o_l, high, temp, PL);
+          // Shift the high part
+          __ Asr(o_h, high, o_h);
+        } else {
+          __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+          // same as Shr except we use `Lsr`s and not `Asr`s
+          __ Lsr(o_l, low, o_h);
+          __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ Lsl(temp, high, temp);
+          __ orr(o_l, o_l, ShifterOperand(temp));
+          __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Lsr(o_l, high, temp, PL);
+          __ Lsr(o_h, high, o_h);
+        }
       } else {
-        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // same as Shr except we use `Lsr`s and not `Asr`s
-        __ Lsr(o_l, low, o_h);
-        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ Lsl(temp, high, temp);
-        __ orr(o_l, o_l, ShifterOperand(temp));
-        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Lsr(o_l, high, temp, PL);
-        __ Lsr(o_h, high, o_h);
+        // Register allocator doesn't create partial overlap.
+        DCHECK_NE(o_l, high);
+        DCHECK_NE(o_h, low);
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = static_cast<uint32_t>(cst & kMaxLongShiftValue);
+        if (shift_value > 32) {
+          if (op->IsShl()) {
+            __ Lsl(o_h, low, shift_value - 32);
+            __ LoadImmediate(o_l, 0);
+          } else if (op->IsShr()) {
+            __ Asr(o_l, high, shift_value - 32);
+            __ Asr(o_h, high, 31);
+          } else {
+            __ Lsr(o_l, high, shift_value - 32);
+            __ LoadImmediate(o_h, 0);
+          }
+        } else if (shift_value == 32) {
+          if (op->IsShl()) {
+            __ mov(o_h, ShifterOperand(low));
+            __ LoadImmediate(o_l, 0);
+          } else if (op->IsShr()) {
+            __ mov(o_l, ShifterOperand(high));
+            __ Asr(o_h, high, 31);
+          } else {
+            __ mov(o_l, ShifterOperand(high));
+            __ LoadImmediate(o_h, 0);
+          }
+        } else {  // shift_value < 32
+          if (op->IsShl()) {
+            __ Lsl(o_h, high, shift_value);
+            __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
+            __ Lsl(o_l, low, shift_value);
+          } else if (op->IsShr()) {
+            __ Lsr(o_l, low, shift_value);
+            __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+            __ Asr(o_h, high, shift_value);
+          } else {
+            __ Lsr(o_l, low, shift_value);
+            __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+            __ Lsr(o_h, high, shift_value);
+          }
+        }
       }
       break;
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
   }
 }
 
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 979c4c8..12f0380 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -260,6 +260,179 @@
     return arg ^ 0xf00000000000000fL;
   }
 
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK:                lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
+  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<low>>, #2
+
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl2(long arg) {
+    // Note: Shl(x, 1) is transformed to Add(x, x), so test Shl(x, 2).
+    return arg << 2;
+  }
+
+  /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+  /// CHECK:                lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1
+  /// CHECK:                lsl{{s?|.w}} {{r\d+}}, <<low>>, #31
+
+  /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl31(long arg) {
+    return arg << 31;
+  }
+
+  /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+  /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|.w}}
+
+  public static long shl32(long arg) {
+    return arg << 32;
+  }
+
+  /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl33(long arg) {
+    return arg << 33;
+  }
+
+  /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl63(long arg) {
+    return arg << 63;
+  }
+
+  /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
+  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #1
+
+  /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr1(long arg) {
+    return arg >> 1;
+  }
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr31(long arg) {
+    return arg >> 31;
+  }
+
+  /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov {{r\d+}}, <<high>>
+
+  /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|.w}}
+
+  public static long shr32(long arg) {
+    return arg >> 32;
+  }
+
+  /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr33(long arg) {
+    return arg >> 33;
+  }
+
+  /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr63(long arg) {
+    return arg >> 63;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
+  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, <<high>>, #1
+
+  /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr1(long arg) {
+    return arg >>> 1;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                lsr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr31(long arg) {
+    return arg >>> 31;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+  /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|.w}}
+
+  public static long ushr32(long arg) {
+    return arg >>> 32;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr33(long arg) {
+    return arg >>> 33;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #31
+  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr63(long arg) {
+    return arg >>> 63;
+  }
+
   /**
    * Test that the `-1` constant is not synthesized in a register and that we
    * instead simply switch between `add` and `sub` instructions with the
@@ -311,5 +484,38 @@
     assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
 
     assertLongEquals(14, addM1(7));
+
+    assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
+    assertLongEquals(shl31(longArg), 0x43b2a19080000000L);
+    assertLongEquals(shl32(longArg), 0x8765432100000000L);
+    assertLongEquals(shl33(longArg), 0x0eca864200000000L);
+    assertLongEquals(shl63(longArg), 0x8000000000000000L);
+    assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L);
+    assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L);
+    assertLongEquals(shl32(~longArg), 0x789abcde00000000L);
+    assertLongEquals(shl33(~longArg), 0xf13579bc00000000L);
+    assertLongEquals(shl63(~longArg), 0x0000000000000000L);
+
+    assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(shr31(longArg), 0x000000002468acf1L);
+    assertLongEquals(shr32(longArg), 0x0000000012345678L);
+    assertLongEquals(shr33(longArg), 0x00000000091a2b3cL);
+    assertLongEquals(shr63(longArg), 0x0000000000000000L);
+    assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL);
+    assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL);
+    assertLongEquals(shr32(~longArg), 0xffffffffedcba987L);
+    assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L);
+    assertLongEquals(shr63(~longArg), 0xffffffffffffffffL);
+
+    assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(ushr31(longArg), 0x000000002468acf1L);
+    assertLongEquals(ushr32(longArg), 0x0000000012345678L);
+    assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL);
+    assertLongEquals(ushr63(longArg), 0x0000000000000000L);
+    assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL);
+    assertLongEquals(ushr31(~longArg), 0x00000001db97530eL);
+    assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
+    assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
+    assertLongEquals(ushr63(~longArg), 0x0000000000000001L);
   }
 }