Optimizing/ARM: Improve shifts of long values by a constant.
Change-Id: Id66ef8cdb9e64306f2be547370b90cc100a3e086
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6d05293..e30aa6e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2989,17 +2989,29 @@
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
- // Make the output overlap, as it will be used to hold the masked
- // second input.
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ if (op->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Make the output overlap, as it will be used to hold the masked
+ // second input.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
break;
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister());
+ if (op->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ // For simplicity, use kOutputOverlap even though we only require that low registers
+ // don't clash with high registers which the register allocator currently guarantees.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
break;
}
default:
@@ -3020,9 +3032,9 @@
case Primitive::kPrimInt: {
Register out_reg = out.AsRegister<Register>();
Register first_reg = first.AsRegister<Register>();
- // Arm doesn't mask the shift count so we need to do it ourselves.
if (second.IsRegister()) {
Register second_reg = second.AsRegister<Register>();
+ // Arm doesn't mask the shift count so we need to do it ourselves.
__ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
if (op->IsShl()) {
__ Lsl(out_reg, first_reg, out_reg);
@@ -3050,57 +3062,103 @@
Register o_h = out.AsRegisterPairHigh<Register>();
Register o_l = out.AsRegisterPairLow<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
Register high = first.AsRegisterPairHigh<Register>();
Register low = first.AsRegisterPairLow<Register>();
- Register second_reg = second.AsRegister<Register>();
+ if (second.IsRegister()) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
- if (op->IsShl()) {
- __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
- // Shift the high part
- __ Lsl(o_h, high, o_l);
- // Shift the low part and `or` what overflew on the high part
- __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
- __ Lsr(temp, low, temp);
- __ orr(o_h, o_h, ShifterOperand(temp));
- // If the shift is > 32 bits, override the high part
- __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
- __ it(PL);
- __ Lsl(o_h, low, temp, PL);
- // Shift the low part
- __ Lsl(o_l, low, o_l);
- } else if (op->IsShr()) {
- __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
- // Shift the low part
- __ Lsr(o_l, low, o_h);
- // Shift the high part and `or` what underflew on the low part
- __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ Lsl(temp, high, temp);
- __ orr(o_l, o_l, ShifterOperand(temp));
- // If the shift is > 32 bits, override the low part
- __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ it(PL);
- __ Asr(o_l, high, temp, PL);
- // Shift the high part
- __ Asr(o_h, high, o_h);
+ Register second_reg = second.AsRegister<Register>();
+
+ if (op->IsShl()) {
+ __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
+ // Shift the high part
+ __ Lsl(o_h, high, o_l);
+ // Shift the low part and `or` what overflew on the high part
+ __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
+ __ Lsr(temp, low, temp);
+ __ orr(o_h, o_h, ShifterOperand(temp));
+ // If the shift is > 32 bits, override the high part
+ __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
+ __ it(PL);
+ __ Lsl(o_h, low, temp, PL);
+ // Shift the low part
+ __ Lsl(o_l, low, o_l);
+ } else if (op->IsShr()) {
+ __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+ // Shift the low part
+ __ Lsr(o_l, low, o_h);
+ // Shift the high part and `or` what underflew on the low part
+ __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ Lsl(temp, high, temp);
+ __ orr(o_l, o_l, ShifterOperand(temp));
+ // If the shift is > 32 bits, override the low part
+ __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ it(PL);
+ __ Asr(o_l, high, temp, PL);
+ // Shift the high part
+ __ Asr(o_h, high, o_h);
+ } else {
+ __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+ // same as Shr except we use `Lsr`s and not `Asr`s
+ __ Lsr(o_l, low, o_h);
+ __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ Lsl(temp, high, temp);
+ __ orr(o_l, o_l, ShifterOperand(temp));
+ __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ it(PL);
+ __ Lsr(o_l, high, temp, PL);
+ __ Lsr(o_h, high, o_h);
+ }
} else {
- __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
- // same as Shr except we use `Lsr`s and not `Asr`s
- __ Lsr(o_l, low, o_h);
- __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ Lsl(temp, high, temp);
- __ orr(o_l, o_l, ShifterOperand(temp));
- __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ it(PL);
- __ Lsr(o_l, high, temp, PL);
- __ Lsr(o_h, high, o_h);
+ // Register allocator doesn't create partial overlap.
+ DCHECK_NE(o_l, high);
+ DCHECK_NE(o_h, low);
+ int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t shift_value = static_cast<uint32_t>(cst & kMaxLongShiftValue);
+ if (shift_value > 32) {
+ if (op->IsShl()) {
+ __ Lsl(o_h, low, shift_value - 32);
+ __ LoadImmediate(o_l, 0);
+ } else if (op->IsShr()) {
+ __ Asr(o_l, high, shift_value - 32);
+ __ Asr(o_h, high, 31);
+ } else {
+ __ Lsr(o_l, high, shift_value - 32);
+ __ LoadImmediate(o_h, 0);
+ }
+ } else if (shift_value == 32) {
+ if (op->IsShl()) {
+ __ mov(o_h, ShifterOperand(low));
+ __ LoadImmediate(o_l, 0);
+ } else if (op->IsShr()) {
+ __ mov(o_l, ShifterOperand(high));
+ __ Asr(o_h, high, 31);
+ } else {
+ __ mov(o_l, ShifterOperand(high));
+ __ LoadImmediate(o_h, 0);
+ }
+ } else { // shift_value < 32
+ if (op->IsShl()) {
+ __ Lsl(o_h, high, shift_value);
+ __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
+ __ Lsl(o_l, low, shift_value);
+ } else if (op->IsShr()) {
+ __ Lsr(o_l, low, shift_value);
+ __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+ __ Asr(o_h, high, shift_value);
+ } else {
+ __ Lsr(o_l, low, shift_value);
+ __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+ __ Lsr(o_h, high, shift_value);
+ }
+ }
}
break;
}
default:
LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
}
}
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 979c4c8..12f0380 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -260,6 +260,179 @@
return arg ^ 0xf00000000000000fL;
}
+ /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+ /// CHECK: lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #2
+ /// CHECK: orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
+ /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<low>>, #2
+
+ /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+ /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shl2(long arg) {
+ // Note: Shl(x, 1) is transformed to Add(x, x), so test Shl(x, 2).
+ return arg << 2;
+ }
+
+ /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+ /// CHECK: lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #31
+ /// CHECK: orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1
+ /// CHECK: lsl{{s?|.w}} {{r\d+}}, <<low>>, #31
+
+ /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+ /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shl31(long arg) {
+ return arg << 31;
+ }
+
+ /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+ /// CHECK-DAG: mov {{r\d+}}, {{r\d+}}
+ /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0
+
+ /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+ /// CHECK-NOT: lsl{{s?|.w}}
+
+ public static long shl32(long arg) {
+ return arg << 32;
+ }
+
+ /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+ /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
+ /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0
+
+ /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+ /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shl33(long arg) {
+ return arg << 33;
+ }
+
+ /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+ /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+ /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0
+
+ /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+ /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shl63(long arg) {
+ return arg << 63;
+ }
+
+ /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+ /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
+ /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
+ /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #1
+
+ /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+ /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shr1(long arg) {
+ return arg >> 1;
+ }
+
+ /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+ /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
+ /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+ /// CHECK: asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+ /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+ /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shr31(long arg) {
+ return arg >> 31;
+ }
+
+ /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+ /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+ /// CHECK-DAG: mov {{r\d+}}, <<high>>
+
+ /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+ /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+ /// CHECK-NOT: lsr{{s?|.w}}
+
+ public static long shr32(long arg) {
+ return arg >> 32;
+ }
+
+ /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+ /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
+ /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+ /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+ /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shr33(long arg) {
+ return arg >> 33;
+ }
+
+ /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+ /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+ /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+ /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+ /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long shr63(long arg) {
+ return arg >> 63;
+ }
+
+ /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+ /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
+ /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
+ /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, <<high>>, #1
+
+ /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+ /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long ushr1(long arg) {
+ return arg >>> 1;
+ }
+
+ /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+ /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
+ /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+ /// CHECK: lsr{{s?|.w}} {{r\d+}}, <<high>>, #31
+
+ /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+ /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long ushr31(long arg) {
+ return arg >>> 31;
+ }
+
+ /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+ /// CHECK-DAG: mov {{r\d+}}, {{r\d+}}
+ /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0
+
+ /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+ /// CHECK-NOT: lsr{{s?|.w}}
+
+ public static long ushr32(long arg) {
+ return arg >>> 32;
+ }
+
+ /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+ /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #1
+ /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0
+
+ /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+ /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long ushr33(long arg) {
+ return arg >>> 33;
+ }
+
+ /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+ /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #31
+ /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0
+
+ /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+ /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+ public static long ushr63(long arg) {
+ return arg >>> 63;
+ }
+
/**
* Test that the `-1` constant is not synthesized in a register and that we
* instead simply switch between `add` and `sub` instructions with the
@@ -311,5 +484,38 @@
assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
assertLongEquals(14, addM1(7));
+
+ assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
+ assertLongEquals(shl31(longArg), 0x43b2a19080000000L);
+ assertLongEquals(shl32(longArg), 0x8765432100000000L);
+ assertLongEquals(shl33(longArg), 0x0eca864200000000L);
+ assertLongEquals(shl63(longArg), 0x8000000000000000L);
+ assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L);
+ assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L);
+ assertLongEquals(shl32(~longArg), 0x789abcde00000000L);
+ assertLongEquals(shl33(~longArg), 0xf13579bc00000000L);
+ assertLongEquals(shl63(~longArg), 0x0000000000000000L);
+
+ assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L);
+ assertLongEquals(shr31(longArg), 0x000000002468acf1L);
+ assertLongEquals(shr32(longArg), 0x0000000012345678L);
+ assertLongEquals(shr33(longArg), 0x00000000091a2b3cL);
+ assertLongEquals(shr63(longArg), 0x0000000000000000L);
+ assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL);
+ assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL);
+ assertLongEquals(shr32(~longArg), 0xffffffffedcba987L);
+ assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L);
+ assertLongEquals(shr63(~longArg), 0xffffffffffffffffL);
+
+ assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L);
+ assertLongEquals(ushr31(longArg), 0x000000002468acf1L);
+ assertLongEquals(ushr32(longArg), 0x0000000012345678L);
+ assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL);
+ assertLongEquals(ushr63(longArg), 0x0000000000000000L);
+ assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL);
+ assertLongEquals(ushr31(~longArg), 0x00000001db97530eL);
+ assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
+ assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
+ assertLongEquals(ushr63(~longArg), 0x0000000000000001L);
}
}