Bug fix on shift that exceeds "lane width".
Rationale:
ARM is a bit less forgiving on shifting more than
the lane width of the SIMD instruction (rejecting
such cases is no loss, since it yields 0 anyway
and should be optimized differently).
Bug: 37776122
Test: test-art-target, test-art-host
Change-Id: I22d04afbfce82b4593f17c2f48c1fd5a0805d305
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index c783dde..bbc55dd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -833,17 +833,22 @@
// TODO: accept symbolic, albeit loop invariant shift factors.
HInstruction* opa = instruction->InputAt(0);
HInstruction* opb = instruction->InputAt(1);
- if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) {
- if (generate_code) {
- // Make sure shift factor only looks at lower bits, as defined for sequential shifts.
- // Note that even the narrower SIMD shifts do the right thing after that.
- int32_t mask = (instruction->GetType() == Primitive::kPrimLong)
- ? kMaxLongShiftDistance
- : kMaxIntShiftDistance;
- HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask);
- GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+ int64_t value = 0;
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) {
+ // Make sure shift distance only looks at lower bits, as defined for sequential shifts.
+ int64_t mask = (instruction->GetType() == Primitive::kPrimLong)
+ ? kMaxLongShiftDistance
+ : kMaxIntShiftDistance;
+ int64_t distance = value & mask;
+ // Restrict shift distance to packed data type width.
+ int64_t max_distance = Primitive::ComponentSize(type) * 8;
+ if (0 <= distance && distance < max_distance) {
+ if (generate_code) {
+ HInstruction* s = graph_->GetIntConstant(distance);
+ GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+ }
+ return true;
}
- return true;
}
} else if (instruction->IsInvokeStaticOrDirect()) {
// Accept particular intrinsics.
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
index 0f7452b..10b20b8 100644
--- a/test/640-checker-byte-simd/src/Main.java
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -179,6 +179,11 @@
a[i] >>>= 33; // 1, since & 31
}
+ static void shl9() {
+ for (int i = 0; i < 128; i++)
+ a[i] <<= 9; // yields all-zeros
+ }
+
//
// Loop bounds.
//
@@ -259,6 +264,10 @@
shr33();
for (int i = 0; i < 128; i++) {
expectEquals((byte) 0x09, a[i], "shr33");
+ }
+ shl9();
+ for (int i = 0; i < 128; i++) {
+ expectEquals((byte) 0x00, a[i], "shl9");
a[i] = (byte) 0xf0; // reset
}
not();