Support all conditions in predicated vectorization
Support all condition types inside the condition when performing
diamond loop auto-vectorization. This allows diamond loop
auto-vectorization to be performed on a greater variety of loops.
To support this change, new vector condition nodes are added to
mirror the scalar condition nodes.
Also add a new gtest class to test whether predicated vectorization
can be performed on different combinations of condition types and
data types.
Authors: Chris Jones <christopher.jones@arm.com>,
Konstantin Baladurin <konstantin.baladurin@arm.com>
Test: export ART_FORCE_TRY_PREDICATED_SIMD=true && \
art/test.py --target --optimizing
Test: art/test.py --target --host --optimizing
Test: 661-checker-simd-cf-loops
Test: art/test.py --gtest art_compiler_tests
Change-Id: Ic9c925f1a58ada13d9031de3b445dcd4f77764b7
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 370c0b8..197a283 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -175,6 +175,7 @@
"optimizing/loop_analysis.cc",
"optimizing/loop_optimization.cc",
"optimizing/nodes.cc",
+ "optimizing/nodes_vector.cc",
"optimizing/optimization.cc",
"optimizing/optimizing_compiler.cc",
"optimizing/parallel_move_resolver.cc",
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index fde9a40..07e1d43 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -602,6 +602,14 @@
return vixl::aarch64::p2;
}
}
+
+ // Generate a vector comparison instruction based on the IfCondition.
+ void GenerateIntegerVecComparison(const vixl::aarch64::PRegisterWithLaneSize& pd,
+ const vixl::aarch64::PRegisterZ& pg,
+ const vixl::aarch64::ZRegister& zn,
+ const vixl::aarch64::ZRegister& zm,
+ IfCondition cond);
+ void HandleVecCondition(HVecCondition* instruction);
};
class LocationsBuilderARM64Sve : public LocationsBuilderARM64 {
@@ -615,6 +623,8 @@
FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
+ private:
+ void HandleVecCondition(HVecCondition* instruction);
};
class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index c2edce3..d6f0d59 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -5825,12 +5825,103 @@
LOG(FATAL) << "Unimplemented";
}
-void LocationsBuilderRISCV64::VisitVecCondition(HVecCondition* instruction) {
+void LocationsBuilderRISCV64::VisitVecEqual(HVecEqual* instruction) {
UNUSED(instruction);
LOG(FATAL) << "Unimplemented";
}
-void InstructionCodeGeneratorRISCV64::VisitVecCondition(HVecCondition* instruction) {
+void InstructionCodeGeneratorRISCV64::VisitVecEqual(HVecEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecNotEqual(HVecNotEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecNotEqual(HVecNotEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecLessThan(HVecLessThan* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecLessThan(HVecLessThan* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecGreaterThanOrEqual(HVecGreaterThanOrEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecGreaterThanOrEqual(
+ HVecGreaterThanOrEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecBelow(HVecBelow* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecBelow(HVecBelow* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAbove(HVecAbove* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAbove(HVecAbove* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
UNUSED(instruction);
LOG(FATAL) << "Unimplemented";
}
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index 848b5e7..53a2ec7 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -1541,12 +1541,103 @@
UNREACHABLE();
}
-void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+void LocationsBuilderARM64Neon::VisitVecEqual(HVecEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecEqual(HVecEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecGreaterThanOrEqual(HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecGreaterThanOrEqual(
+ HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index ccff02e..3d9bd91 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -1207,14 +1207,56 @@
}
}
-void LocationsBuilderARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+void InstructionCodeGeneratorARM64Sve::GenerateIntegerVecComparison(
+ const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ IfCondition cond) {
+ switch (cond) {
+ case kCondEQ:
+ __ Cmpeq(pd, pg, zn, zm);
+ return;
+ case kCondNE:
+ __ Cmpne(pd, pg, zn, zm);
+ return;
+ case kCondLT:
+ __ Cmplt(pd, pg, zn, zm);
+ return;
+ case kCondLE:
+ __ Cmple(pd, pg, zn, zm);
+ return;
+ case kCondGT:
+ __ Cmpgt(pd, pg, zn, zm);
+ return;
+ case kCondGE:
+ __ Cmpge(pd, pg, zn, zm);
+ return;
+ case kCondB:
+ __ Cmplo(pd, pg, zn, zm);
+ return;
+ case kCondBE:
+ __ Cmpls(pd, pg, zn, zm);
+ return;
+ case kCondA:
+ __ Cmphi(pd, pg, zn, zm);
+ return;
+ case kCondAE:
+ __ Cmphs(pd, pg, zn, zm);
+ return;
+ }
+ LOG(FATAL) << "Condition '" << enum_cast<uint32_t>(cond) << "' not supported: ";
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Sve::HandleVecCondition(HVecCondition* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
}
-void InstructionCodeGeneratorARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+void InstructionCodeGeneratorARM64Sve::HandleVecCondition(HVecCondition* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister left = ZRegisterFrom(locations->InAt(0));
@@ -1228,21 +1270,37 @@
HVecOperation::ToSignedType(b->GetPackedType()));
ValidateVectorLength(instruction);
- // TODO: Support other condition OPs and types.
+ // TODO: Support other types, e.g: boolean, float and double.
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- __ Cmpeq(output_p_reg.VnB(), p_reg, left.VnB(), right.VnB());
+ GenerateIntegerVecComparison(output_p_reg.VnB(),
+ p_reg,
+ left.VnB(),
+ right.VnB(),
+ instruction->GetCondition());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- __ Cmpeq(output_p_reg.VnH(), p_reg, left.VnH(), right.VnH());
+ GenerateIntegerVecComparison(output_p_reg.VnH(),
+ p_reg,
+ left.VnH(),
+ right.VnH(),
+ instruction->GetCondition());
break;
case DataType::Type::kInt32:
- __ Cmpeq(output_p_reg.VnS(), p_reg, left.VnS(), right.VnS());
+ GenerateIntegerVecComparison(output_p_reg.VnS(),
+ p_reg,
+ left.VnS(),
+ right.VnS(),
+ instruction->GetCondition());
break;
case DataType::Type::kInt64:
- __ Cmpeq(output_p_reg.VnD(), p_reg, left.VnD(), right.VnD());
+ GenerateIntegerVecComparison(output_p_reg.VnD(),
+ p_reg,
+ left.VnD(),
+ right.VnD(),
+ instruction->GetCondition());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1250,6 +1308,24 @@
}
}
+#define FOR_EACH_VEC_CONDITION_INSTRUCTION(M) \
+ M(VecEqual) \
+ M(VecNotEqual) \
+ M(VecLessThan) \
+ M(VecLessThanOrEqual) \
+ M(VecGreaterThan) \
+ M(VecGreaterThanOrEqual) \
+ M(VecBelow) \
+ M(VecBelowOrEqual) \
+ M(VecAbove) \
+ M(VecAboveOrEqual)
+#define DEFINE_VEC_CONDITION_VISITORS(Name) \
+void LocationsBuilderARM64Sve::Visit##Name(H##Name* comp) { HandleVecCondition(comp); } \
+void InstructionCodeGeneratorARM64Sve::Visit##Name(H##Name* comp) { HandleVecCondition(comp); }
+FOR_EACH_VEC_CONDITION_INSTRUCTION(DEFINE_VEC_CONDITION_VISITORS)
+#undef DEFINE_VEC_CONDITION_VISITORS
+#undef FOR_EACH_VEC_CONDITION_INSTRUCTION
+
void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DCHECK(instruction->InputAt(0)->IsVecPredSetOperation());
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 70f22af..5cf34dd 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -1079,12 +1079,104 @@
UNREACHABLE();
}
-void LocationsBuilderARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+void LocationsBuilderARMVIXL::VisitVecEqual(HVecEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+void InstructionCodeGeneratorARMVIXL::VisitVecEqual(HVecEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecGreaterThanOrEqual(
+ HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecGreaterThanOrEqual(
+ HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 1f9b257..da61764 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1411,12 +1411,104 @@
UNREACHABLE();
}
-void LocationsBuilderX86::VisitVecCondition(HVecCondition* instruction) {
+void LocationsBuilderX86::VisitVecEqual(HVecEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorX86::VisitVecCondition(HVecCondition* instruction) {
+void InstructionCodeGeneratorX86::VisitVecEqual(HVecEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecGreaterThanOrEqual(
+ HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecGreaterThanOrEqual(
+ HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 47afa3b..1ecd5f0 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1384,12 +1384,103 @@
UNREACHABLE();
}
-void LocationsBuilderX86_64::VisitVecCondition(HVecCondition* instruction) {
+void LocationsBuilderX86_64::VisitVecEqual(HVecEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorX86_64::VisitVecCondition(HVecCondition* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecEqual(HVecEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecNotEqual(HVecNotEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecLessThan(HVecLessThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecLessThanOrEqual(HVecLessThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecGreaterThan(HVecGreaterThan* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecGreaterThanOrEqual(HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecGreaterThanOrEqual(
+ HVecGreaterThanOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecBelow(HVecBelow* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecBelowOrEqual(HVecBelowOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAbove(HVecAbove* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAboveOrEqual(HVecAboveOrEqual* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 4f8551a..98b0550 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -74,6 +74,11 @@
DataType::Type type,
const CodegenTargetConfig target_config);
void TestPackedSwitch(const CodegenTargetConfig target_config);
+ void TestVectorComparison(IfCondition condition,
+ int64_t lhs_value,
+ int64_t rhs_value,
+ DataType::Type type,
+ CodeGenerator* codegen);
};
void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, int32_t expected) {
@@ -594,6 +599,35 @@
TestCode(data, true, 2);
}
+static bool GetExpectedResultFromComparison(IfCondition condition, int64_t lhs, int64_t rhs) {
+ const uint64_t unsigned_lhs = lhs;
+ const uint64_t unsigned_rhs = rhs;
+ switch (condition) {
+ case kCondEQ:
+ return lhs == rhs;
+ case kCondNE:
+ return lhs != rhs;
+ case kCondLT:
+ return lhs < rhs;
+ case kCondLE:
+ return lhs <= rhs;
+ case kCondGT:
+ return lhs > rhs;
+ case kCondGE:
+ return lhs >= rhs;
+ case kCondB:
+ return unsigned_lhs < unsigned_rhs;
+ case kCondBE:
+ return unsigned_lhs <= unsigned_rhs;
+ case kCondA:
+ return unsigned_lhs > unsigned_rhs;
+ case kCondAE:
+ return unsigned_lhs >= unsigned_rhs;
+ }
+ LOG(FATAL) << "Condition '" << enum_cast<uint32_t>(condition) << "' not supported: ";
+ UNREACHABLE();
+}
+
// Helper method.
void CodegenTest::TestComparison(IfCondition condition,
int64_t i,
@@ -613,47 +647,13 @@
op2 = graph_->GetLongConstant(j);
}
- bool expected_result = false;
- const uint64_t x = i;
- const uint64_t y = j;
- switch (condition) {
- case kCondEQ:
- expected_result = (i == j);
- break;
- case kCondNE:
- expected_result = (i != j);
- break;
- case kCondLT:
- expected_result = (i < j);
- break;
- case kCondLE:
- expected_result = (i <= j);
- break;
- case kCondGT:
- expected_result = (i > j);
- break;
- case kCondGE:
- expected_result = (i >= j);
- break;
- case kCondB:
- expected_result = (x < y);
- break;
- case kCondBE:
- expected_result = (x <= y);
- break;
- case kCondA:
- expected_result = (x > y);
- break;
- case kCondAE:
- expected_result = (x >= y);
- break;
- }
HInstruction* comparison = MakeCondition(block, condition, op1, op2);
MakeReturn(block, comparison);
graph_->BuildDominatorTree();
std::unique_ptr<CompilerOptions> compiler_options =
CommonCompilerTest::CreateCompilerOptions(target_config.GetInstructionSet(), "default");
+ bool expected_result = GetExpectedResultFromComparison(condition, i, j);
RunCode(target_config, *compiler_options, graph_, [](HGraph*) {}, true, expected_result);
}
@@ -976,6 +976,79 @@
}
}
+void CodegenTest::TestVectorComparison(IfCondition condition,
+ int64_t lhs_value,
+ int64_t rhs_value,
+ DataType::Type type,
+ CodeGenerator* codegen) {
+ HBasicBlock* block = entry_block_->GetSingleSuccessor();
+
+ size_t vector_size_in_bytes = codegen->GetSIMDRegisterWidth();
+
+ HVecPredSetAll* predicate = MakeVecPredSetAll(block,
+ graph_->GetIntConstant(1),
+ type,
+ vector_size_in_bytes);
+ HVecReplicateScalar* op1 = MakeVecReplicateScalar(block,
+ graph_->GetConstant(type, lhs_value),
+ type,
+ vector_size_in_bytes,
+ predicate);
+ HVecReplicateScalar* op2 = MakeVecReplicateScalar(block,
+ graph_->GetConstant(type, rhs_value),
+ type,
+ vector_size_in_bytes,
+ predicate);
+ HVecCondition* comparison = MakeVecCondition(block,
+ condition,
+ op1,
+ op2,
+ type,
+ vector_size_in_bytes,
+ predicate);
+ HInstruction* boolean_return = MakeVecPredToBoolean(block,
+ comparison,
+ HVecPredToBoolean::PCondKind::kFirst,
+ type,
+ vector_size_in_bytes);
+ MakeReturn(block, boolean_return);
+
+ graph_->SetHasPredicatedSIMD(true);
+ graph_->BuildDominatorTree();
+
+ if (CanExecute(*codegen)) {
+ bool expected_result = GetExpectedResultFromComparison(condition, lhs_value, rhs_value);
+ RunCode(codegen, graph_, [](HGraph*) {}, true, expected_result);
+ }
+}
+
+// Define tests ensuring that all types of conditions can be generated correctly and return the
+// expected result.
+#define DEFINE_CONDITION_TESTS(CondType) \
+TEST_F(CodegenTest, ComparisonsVector##CondType) { \
+ std::unique_ptr<CompilerOptions> compiler_options = \
+ CommonCompilerTest::CreateCompilerOptions(InstructionSet::kArm64, "default", "sve"); \
+ for (int64_t i = -1; i <= 1; i++) { \
+ for (int64_t j = -1; j <= 1; j++) { \
+ for (int cond = kCondFirst; cond <= kCondLast; cond++) { \
+ InitEntryMainExitGraph(); \
+ TestCodeGeneratorARM64 codegen(graph_, *compiler_options); \
+ if (!codegen.SupportsPredicatedSIMD()) { \
+ GTEST_SKIP() << "Predicated SIMD is not supported."; \
+ } \
+ TestVectorComparison( \
+ static_cast<IfCondition>(cond), i, j, DataType::Type::k##CondType, &codegen); \
+ } \
+ } \
+ } \
+}
+DEFINE_CONDITION_TESTS(Uint8)
+DEFINE_CONDITION_TESTS(Int8)
+DEFINE_CONDITION_TESTS(Uint16)
+DEFINE_CONDITION_TESTS(Int16)
+DEFINE_CONDITION_TESTS(Int32)
+#undef DEFINE_CONDITION_TESTS
+
#endif
} // namespace art
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index 7c2e918..f15900f 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -226,7 +226,7 @@
}
TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) {
- constexpr size_t vlen1 = kDefaultTestVectorSize;
+ constexpr size_t vlen1 = kDefaultTestVectorSizeInBytes;
constexpr size_t vlen2 = vlen1 / 2;
HBasicBlock* main = InitEntryMainExitGraphWithReturnVoid();
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 2159869..2f1aea6 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -875,13 +875,6 @@
return exit;
}
-// Determines whether predicated loop vectorization should be tried for ALL loops.
-#ifdef ART_FORCE_TRY_PREDICATED_SIMD
- static constexpr bool kForceTryPredicatedSIMD = true;
-#else
- static constexpr bool kForceTryPredicatedSIMD = false;
-#endif
-
bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -2049,7 +2042,6 @@
case InstructionSet::kArm64:
if (IsInPredicatedVectorizationMode()) {
// SVE vectorization.
- CHECK(features->AsArm64InstructionSetFeatures()->HasSVE());
size_t vector_length = simd_register_size_ / DataType::Size(type);
DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u);
switch (type) {
@@ -2396,6 +2388,13 @@
} \
break;
+// Some instructions in the scalar loop body can only occur in loops with control flow; for such
+// loops we don't support clean ups loop (generated via kSequential); see TryVectorizePredicated.
+#define GENERATE_PRED_VEC(x) \
+ DCHECK_EQ(synthesis_mode_, LoopSynthesisMode::kVector); \
+ vector = (x); \
+ break;
+
HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
@@ -2469,13 +2468,46 @@
GENERATE_VEC(
new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
new (global_allocator_) HAbs(org_type, opa, dex_pc));
- case HInstruction::kEqual: {
- // Special case.
- DCHECK_EQ(synthesis_mode_, LoopSynthesisMode::kVector);
- vector = new (global_allocator_)
- HVecCondition(global_allocator_, opa, opb, type, vector_length_, dex_pc);
- }
- break;
+ case HInstruction::kEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kNotEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecNotEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kLessThan:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecLessThan(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kLessThanOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecLessThanOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kGreaterThan:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecGreaterThan(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kGreaterThanOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecGreaterThanOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kBelow:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecBelow(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kBelowOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecBelowOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kAbove:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecAbove(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kAboveOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecAboveOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
default:
break;
} // switch
@@ -2733,8 +2765,7 @@
return false;
}
- if (!if_input->IsEqual()) {
- // TODO: Support other condition types.
+ if (!if_input->IsCondition()) {
return false;
}
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 546d42b..eaeb313 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -31,6 +31,13 @@
class CompilerOptions;
class ArchNoOptsLoopHelper;
+// Determines whether predicated loop vectorization should be tried for ALL loops.
+#ifdef ART_FORCE_TRY_PREDICATED_SIMD
+ static constexpr bool kForceTryPredicatedSIMD = true;
+#else
+ static constexpr bool kForceTryPredicatedSIMD = false;
+#endif
+
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
* the detected nested loops, such as removal of dead induction and empty loops
@@ -572,6 +579,7 @@
ArchNoOptsLoopHelper* arch_loop_helper_;
friend class LoopOptimizationTest;
+ friend class PredicatedSimdLoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
};
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 81867be..c60b66b 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -14,29 +14,29 @@
* limitations under the License.
*/
+#include "android-base/logging.h"
#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
#include "loop_optimization.h"
+#include "optimizing/data_type.h"
+#include "optimizing/nodes.h"
#include "optimizing_unit_test.h"
namespace art HIDDEN {
-/**
- * Fixture class for the loop optimization tests. These unit tests focus
- * constructing the loop hierarchy. Actual optimizations are tested
- * through the checker tests.
- */
-class LoopOptimizationTest : public OptimizingUnitTest {
+// Base class for loop optimization tests.
+class LoopOptimizationTestBase : public OptimizingUnitTest {
protected:
void SetUp() override {
OptimizingUnitTest::SetUp();
- graph_ = CreateGraph();
BuildGraph();
iva_ = new (GetAllocator()) HInductionVarAnalysis(graph_);
- compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default");
- DCHECK(compiler_options_ != nullptr);
+ if (compiler_options_ == nullptr) {
+ compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default");
+ DCHECK(compiler_options_ != nullptr);
+ }
codegen_ = CodeGenerator::Create(graph_, *compiler_options_);
DCHECK(codegen_.get() != nullptr);
loop_opt_ = new (GetAllocator()) HLoopOptimization(
@@ -51,24 +51,52 @@
OptimizingUnitTest::TearDown();
}
+ virtual void BuildGraph() = 0;
+
+ // Run loop optimization and optionally check the graph.
+ void PerformAnalysis(bool run_checker) {
+ graph_->BuildDominatorTree();
+
+ // Check the graph is valid before loop optimization.
+ std::ostringstream oss;
+ if (run_checker) {
+ ASSERT_TRUE(CheckGraph(oss)) << oss.str();
+ }
+
+ iva_->Run();
+ loop_opt_->Run();
+
+ // Check the graph is valid after loop optimization.
+ if (run_checker) {
+ ASSERT_TRUE(CheckGraph(oss)) << oss.str();
+ }
+ }
+
+ // General building fields.
+ std::unique_ptr<CompilerOptions> compiler_options_;
+ std::unique_ptr<CodeGenerator> codegen_;
+ HInductionVarAnalysis* iva_;
+ HLoopOptimization* loop_opt_;
+
+ HBasicBlock* return_block_;
+
+ HInstruction* parameter_;
+};
+
+/**
+ * Fixture class for the loop optimization tests. These unit tests mostly focus
+ * on constructing the loop hierarchy. Checker tests are also used to test
+ * specific optimizations.
+ */
+class LoopOptimizationTest : public LoopOptimizationTestBase {
+ protected:
virtual ~LoopOptimizationTest() {}
/** Constructs bare minimum graph. */
- void BuildGraph() {
+ void BuildGraph() override {
+ return_block_ = InitEntryMainExitGraph();
graph_->SetNumberOfVRegs(1);
- entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
- return_block_ = new (GetAllocator()) HBasicBlock(graph_);
- exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(entry_block_);
- graph_->AddBlock(return_block_);
- graph_->AddBlock(exit_block_);
- graph_->SetEntryBlock(entry_block_);
- graph_->SetExitBlock(exit_block_);
parameter_ = MakeParam(DataType::Type::kInt32);
- MakeReturnVoid(return_block_);
- MakeExit(exit_block_);
- entry_block_->AddSuccessor(return_block_);
- return_block_->AddSuccessor(exit_block_);
}
/** Adds a loop nest at given position before successor. */
@@ -87,13 +115,6 @@
return header;
}
- /** Performs analysis. */
- void PerformAnalysis() {
- graph_->BuildDominatorTree();
- iva_->Run();
- loop_opt_->Run();
- }
-
/** Constructs string representation of computed loop hierarchy. */
std::string LoopStructure() {
return LoopStructureRecurse(loop_opt_->top_loop_);
@@ -109,34 +130,118 @@
}
return s;
}
-
- // General building fields.
- HGraph* graph_;
-
- std::unique_ptr<CompilerOptions> compiler_options_;
- std::unique_ptr<CodeGenerator> codegen_;
- HInductionVarAnalysis* iva_;
- HLoopOptimization* loop_opt_;
-
- HBasicBlock* entry_block_;
- HBasicBlock* return_block_;
- HBasicBlock* exit_block_;
-
- HInstruction* parameter_;
};
+#ifdef ART_ENABLE_CODEGEN_arm64
+// Unit tests for predicated vectorization.
+class PredicatedSimdLoopOptimizationTest : public LoopOptimizationTestBase {
+ protected:
+ void SetUp() override {
+ // Predicated SIMD is only supported by SVE on Arm64.
+ compiler_options_ = CommonCompilerTest::CreateCompilerOptions(InstructionSet::kArm64,
+ "default",
+ "sve");
+ LoopOptimizationTestBase::SetUp();
+ }
+
+ virtual ~PredicatedSimdLoopOptimizationTest() {}
+
+ // Constructs a graph with a diamond loop which should be vectorizable with predicated
+ // vectorization. This graph includes a basic loop induction (consisting of Phi, Add, If and
+ // SuspendCheck instructions) to control the loop as well as an if comparison (consisting of
+ // Parameter, GreaterThanOrEqual and If instructions) to control the diamond loop.
+ //
+ // entry
+ // |
+ // preheader
+ // |
+ // return <------------ header <----------------+
+ // | | |
+ // exit diamond_top |
+ // / \ |
+ // diamond_true diamond_false |
+ // \ / |
+ // back_edge |
+ // | |
+ // +---------------------+
+ void BuildGraph() override {
+ return_block_ = InitEntryMainExitGraphWithReturnVoid();
+ HBasicBlock* back_edge;
+ std::tie(std::ignore, header_, back_edge) = CreateWhileLoop(return_block_);
+ std::tie(diamond_top_, diamond_true_, std::ignore) = CreateDiamondPattern(back_edge);
+
+ parameter_ = MakeParam(DataType::Type::kInt32);
+ std::tie(phi_, std::ignore) = MakeLinearLoopVar(header_, back_edge, 0, 1);
+ MakeSuspendCheck(header_);
+ HInstruction* trip = MakeCondition(header_,
+ kCondGE,
+ phi_,
+ graph_->GetIntConstant(kArm64DefaultSVEVectorLength));
+ MakeIf(header_, trip);
+ diamond_hif_ = MakeIf(diamond_top_, parameter_);
+ }
+
+ // Add an ArraySet to the loop which will be vectorized, thus setting the type of vector
+ // instructions in the graph to the given vector_type. This needs to be called to ensure the loop
+ // is not simplified by SimplifyInduction or SimplifyBlocks before vectorization.
+ void AddArraySetToLoop(DataType::Type vector_type) {
+ // Ensure the data type is a java type so it can be stored in a TypeField. The actual type does
+ // not matter as long as the size is the same so it can still be vectorized.
+ DataType::Type new_type = DataType::SignedIntegralTypeFromSize(DataType::Size(vector_type));
+
+ // Add an array set to prevent the loop from being optimized away before vectorization.
+ // Note: This uses an integer parameter and not an array reference to avoid the difficulties in
+ // allocating an array. The instruction is still treated as a valid ArraySet by loop
+ // optimization.
+ diamond_true_->AddInstruction(new (GetAllocator()) HArraySet(parameter_,
+ phi_,
+ graph_->GetIntConstant(1),
+ new_type,
+ /* dex_pc= */ 0));
+ }
+
+ // Replace the input of diamond_hif_ with a new condition of the given types.
+ void ReplaceIfCondition(DataType::Type l_type,
+ DataType::Type r_type,
+ HBasicBlock* condition_block,
+ IfCondition cond) {
+ AddArraySetToLoop(l_type);
+ HInstruction* l_param = MakeParam(l_type);
+ HInstruction* r_param = MakeParam(r_type);
+ HCondition* condition = MakeCondition(condition_block, cond, l_param, r_param);
+ diamond_hif_->ReplaceInput(condition, 0);
+ }
+
+ // Is loop optimization able to vectorize predicated code?
+ bool IsPredicatedVectorizationSupported() {
+ // Mirror the check guarding TryVectorizePredicated in TryOptimizeInnerLoopFinite.
+ return kForceTryPredicatedSIMD && loop_opt_->IsInPredicatedVectorizationMode();
+ }
+
+ HBasicBlock* header_;
+ HBasicBlock* diamond_top_;
+ HBasicBlock* diamond_true_;
+
+ HPhi* phi_;
+ HIf* diamond_hif_;
+};
+
+#endif // ART_ENABLE_CODEGEN_arm64
+
//
// The actual tests.
//
+// Loop structure tests can't run the graph checker because they don't create valid graphs.
+
TEST_F(LoopOptimizationTest, NoLoops) {
- PerformAnalysis();
+ PerformAnalysis(/*run_checker=*/ false);
EXPECT_EQ("", LoopStructure());
}
TEST_F(LoopOptimizationTest, SingleLoop) {
AddLoop(entry_block_, return_block_);
- PerformAnalysis();
+ PerformAnalysis(/*run_checker=*/ false);
EXPECT_EQ("[]", LoopStructure());
}
@@ -147,7 +252,7 @@
s = AddLoop(b, s);
b = s->GetSuccessors()[0];
}
- PerformAnalysis();
+ PerformAnalysis(/*run_checker=*/ false);
EXPECT_EQ("[[[[[[[[[[]]]]]]]]]]", LoopStructure());
}
@@ -158,7 +263,7 @@
b = AddLoop(b, s);
s = b->GetSuccessors()[1];
}
- PerformAnalysis();
+ PerformAnalysis(/*run_checker=*/ false);
EXPECT_EQ("[][][][][][][][][][]", LoopStructure());
}
@@ -175,7 +280,7 @@
bi = si->GetSuccessors()[0];
}
}
- PerformAnalysis();
+ PerformAnalysis(/*run_checker=*/ false);
EXPECT_EQ("[]"
"[[]]"
"[[[]]]"
@@ -202,7 +307,7 @@
b = AddLoop(b, s);
s = b->GetSuccessors()[1];
}
- PerformAnalysis();
+ PerformAnalysis(/*run_checker=*/ false);
EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure());
}
@@ -326,4 +431,40 @@
EXPECT_EQ(header_phi->InputAt(1), body_add);
}
+#ifdef ART_ENABLE_CODEGEN_arm64
+#define FOR_EACH_CONDITION_INSTRUCTION(M, CondType) \
+ M(EQ, CondType) \
+ M(NE, CondType) \
+ M(LT, CondType) \
+ M(LE, CondType) \
+ M(GT, CondType) \
+ M(GE, CondType) \
+ M(B, CondType) \
+ M(BE, CondType) \
+ M(A, CondType) \
+ M(AE, CondType)
+
+// Define tests ensuring that all types of conditions can be handled in predicated vectorization
+// for diamond loops.
+#define DEFINE_CONDITION_TESTS(Name, CondType) \
+TEST_F(PredicatedSimdLoopOptimizationTest, VectorizeCondition##Name##CondType) { \
+ if (!IsPredicatedVectorizationSupported()) { \
+ GTEST_SKIP() << "Predicated SIMD is not enabled."; \
+ } \
+ ReplaceIfCondition(DataType::Type::k##CondType, \
+ DataType::Type::k##CondType, \
+ diamond_top_, \
+ kCond##Name); \
+ PerformAnalysis(/*run_checker=*/ true); \
+ EXPECT_TRUE(graph_->HasPredicatedSIMD()); \
+}
+FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Uint8)
+FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int8)
+FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Uint16)
+FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int16)
+FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_TESTS, Int32)
+#undef DEFINE_CONDITION_TESTS
+#undef FOR_EACH_CONDITION_INSTRUCTION
+#endif // ART_ENABLE_CODEGEN_arm64
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7ed4323..676a9b9 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -76,6 +76,7 @@
class HPhi;
class HSuspendCheck;
class HTryBoundary;
+class HVecCondition;
class FieldInfo;
class LiveInterval;
class LocationSummary;
@@ -1423,8 +1424,17 @@
M(VecPredSetAll, VecPredSetOperation) \
M(VecPredWhile, VecPredSetOperation) \
M(VecPredToBoolean, VecOperation) \
- M(VecCondition, VecPredSetOperation) \
- M(VecPredNot, VecPredSetOperation) \
+ M(VecEqual, VecCondition) \
+ M(VecNotEqual, VecCondition) \
+ M(VecLessThan, VecCondition) \
+ M(VecLessThanOrEqual, VecCondition) \
+ M(VecGreaterThan, VecCondition) \
+ M(VecGreaterThanOrEqual, VecCondition) \
+ M(VecBelow, VecCondition) \
+ M(VecBelowOrEqual, VecCondition) \
+ M(VecAbove, VecCondition) \
+ M(VecAboveOrEqual, VecCondition) \
+ M(VecPredNot, VecPredSetOperation)
#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M) \
@@ -1492,7 +1502,8 @@
M(VecUnaryOperation, VecOperation) \
M(VecBinaryOperation, VecOperation) \
M(VecMemoryOperation, VecOperation) \
- M(VecPredSetOperation, VecOperation)
+ M(VecPredSetOperation, VecOperation) \
+ M(VecCondition, VecPredSetOperation)
#define FOR_EACH_INSTRUCTION(M) \
FOR_EACH_CONCRETE_INSTRUCTION(M) \
diff --git a/compiler/optimizing/nodes_vector.cc b/compiler/optimizing/nodes_vector.cc
new file mode 100644
index 0000000..b1a3cb8
--- /dev/null
+++ b/compiler/optimizing/nodes_vector.cc
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nodes.h"
+
+namespace art HIDDEN {
+
+HVecCondition* HVecCondition::Create(HGraph* graph,
+ IfCondition cond,
+ HInstruction* lhs,
+ HInstruction* rhs,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) {
+ ArenaAllocator* allocator = graph->GetAllocator();
+ switch (cond) {
+ case kCondEQ: return new (allocator) HVecEqual(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondNE: return new (allocator) HVecNotEqual(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondLT: return new (allocator) HVecLessThan(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondLE: return new (allocator) HVecLessThanOrEqual(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondGT: return new (allocator) HVecGreaterThan(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondGE: return new (allocator) HVecGreaterThanOrEqual(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondB: return new (allocator) HVecBelow(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondBE: return new (allocator) HVecBelowOrEqual(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondA: return new (allocator) HVecAbove(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ case kCondAE: return new (allocator) HVecAboveOrEqual(allocator,
+ lhs,
+ rhs,
+ packed_type,
+ vector_length,
+ dex_pc);
+ }
+ LOG(FATAL) << "Unexpected condition " << cond;
+ UNREACHABLE();
+}
+
+} // namespace art
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index a5f5720..a4c22f7 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -1467,19 +1467,16 @@
//
// viz. [ p1, .. , pn ] = [ x1 OP y1 , x2 OP y2, .. , xn OP yn] where OP is CondKind
// condition.
-//
-// Currently only kEqual is supported by this vector instruction - we don't even define
-// the kCondType here.
-// TODO: support other condition ops.
-class HVecCondition final : public HVecPredSetOperation {
+class HVecCondition : public HVecPredSetOperation {
public:
- HVecCondition(ArenaAllocator* allocator,
+ HVecCondition(InstructionKind kind,
+ ArenaAllocator* allocator,
HInstruction* left,
HInstruction* right,
DataType::Type packed_type,
size_t vector_length,
- uint32_t dex_pc) :
- HVecPredSetOperation(kVecCondition,
+ uint32_t dex_pc = kNoDexPc) :
+ HVecPredSetOperation(kind,
allocator,
packed_type,
SideEffects::None(),
@@ -1494,22 +1491,276 @@
SetRawInputAt(1, right);
}
- DECLARE_INSTRUCTION(VecCondition);
+ DECLARE_ABSTRACT_INSTRUCTION(VecCondition);
+
+ virtual IfCondition GetCondition() const = 0;
+
+ static HVecCondition* Create(HGraph* graph,
+ IfCondition cond,
+ HInstruction* lhs,
+ HInstruction* rhs,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc);
protected:
DEFAULT_COPY_CONSTRUCTOR(VecCondition);
};
+// Instruction to check if two vector inputs are equal to each other.
+class HVecEqual final : public HVecCondition {
+ public:
+ HVecEqual(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecEqual, allocator, left, right, packed_type, vector_length, dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecEqual);
+
+ IfCondition GetCondition() const override {
+ return kCondEQ;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecEqual);
+};
+
+// Instruction to check if two vector inputs are not equal to each other.
+class HVecNotEqual final : public HVecCondition {
+ public:
+ HVecNotEqual(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecNotEqual, allocator, left, right, packed_type, vector_length, dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecNotEqual);
+
+ IfCondition GetCondition() const override {
+ return kCondNE;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecNotEqual);
+};
+
+// Instruction to check if one vector input is less than the other.
+class HVecLessThan final : public HVecCondition {
+ public:
+ HVecLessThan(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecLessThan, allocator, left, right, packed_type, vector_length, dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecLessThan);
+
+ IfCondition GetCondition() const override {
+ return kCondLT;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecLessThan);
+};
+
+// Instruction to check if one vector input is less than or equal to the other.
+class HVecLessThanOrEqual final : public HVecCondition {
+ public:
+ HVecLessThanOrEqual(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecLessThanOrEqual,
+ allocator,
+ left,
+ right,
+ packed_type,
+ vector_length,
+ dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecLessThanOrEqual);
+
+ IfCondition GetCondition() const override {
+ return kCondLE;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecLessThanOrEqual);
+};
+
+// Instruction to check if one vector input is greater than the other.
+class HVecGreaterThan final : public HVecCondition {
+ public:
+ HVecGreaterThan(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecGreaterThan,
+ allocator,
+ left,
+ right,
+ packed_type,
+ vector_length,
+ dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecGreaterThan);
+
+ IfCondition GetCondition() const override {
+ return kCondGT;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecGreaterThan);
+};
+
+// Instruction to check if one vector input is greater than or equal to the other.
+class HVecGreaterThanOrEqual final : public HVecCondition {
+ public:
+ HVecGreaterThanOrEqual(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecGreaterThanOrEqual,
+ allocator,
+ left,
+ right,
+ packed_type,
+ vector_length,
+ dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecGreaterThanOrEqual);
+
+ IfCondition GetCondition() const override {
+ return kCondGE;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecGreaterThanOrEqual);
+};
+
+// Instruction to check if one unsigned vector input is less than the other, using unsigned
+// comparison.
+class HVecBelow final : public HVecCondition {
+ public:
+ HVecBelow(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecBelow, allocator, left, right, packed_type, vector_length, dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecBelow);
+
+ IfCondition GetCondition() const override {
+ return kCondB;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecBelow);
+};
+
+// Instruction to check if one unsigned vector input is less than or equal to the other, using
+// unsigned comparison.
+class HVecBelowOrEqual final : public HVecCondition {
+ public:
+ HVecBelowOrEqual(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecBelowOrEqual,
+ allocator,
+ left,
+ right,
+ packed_type,
+ vector_length,
+ dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecBelowOrEqual);
+
+ IfCondition GetCondition() const override {
+ return kCondBE;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecBelowOrEqual);
+};
+
+// Instruction to check if one unsigned vector input is greater than the other, using unsigned
+// comparison.
+class HVecAbove final : public HVecCondition {
+ public:
+ HVecAbove(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecAbove, allocator, left, right, packed_type, vector_length, dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecAbove);
+
+ IfCondition GetCondition() const override {
+ return kCondA;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecAbove);
+};
+
+// Instruction to check if one unsigned vector input is greater than or equal to the other, using
+// unsigned comparison.
+class HVecAboveOrEqual final : public HVecCondition {
+ public:
+ HVecAboveOrEqual(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecCondition(kVecAboveOrEqual,
+ allocator,
+ left,
+ right,
+ packed_type,
+ vector_length,
+ dex_pc) {}
+
+ DECLARE_INSTRUCTION(VecAboveOrEqual);
+
+ IfCondition GetCondition() const override {
+ return kCondAE;
+ }
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecAboveOrEqual);
+};
+
// Inverts every component in the predicate vector.
//
// viz. [ p1, .. , pn ] = [ !px1 , !px2 , .. , !pxn ].
class HVecPredNot final : public HVecPredSetOperation {
public:
HVecPredNot(ArenaAllocator* allocator,
- HInstruction* input,
- DataType::Type packed_type,
- size_t vector_length,
- uint32_t dex_pc) :
+ HInstruction* input,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) :
HVecPredSetOperation(kVecPredNot,
allocator,
packed_type,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index e2f3e0a..018ffce 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -632,7 +632,7 @@
HInstruction* index,
HInstruction* value,
DataType::Type packed_type,
- size_t vector_size_in_bytes = kDefaultTestVectorSize,
+ size_t vector_size_in_bytes = kDefaultTestVectorSizeInBytes,
uint32_t dex_pc = kNoDexPc) {
size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
SideEffects side_effects = SideEffects::ArrayWriteOfType(packed_type);
@@ -642,12 +642,42 @@
return vec_store;
}
- HVecPredToBoolean* MakeVecPredToBoolean(HBasicBlock* block,
- HInstruction* input,
- HVecPredToBoolean::PCondKind pred_cond,
- DataType::Type packed_type,
- size_t vector_size_in_bytes = kDefaultTestVectorSize,
- uint32_t dex_pc = kNoDexPc) {
+ HVecPredSetAll* MakeVecPredSetAll(HBasicBlock* block,
+ HInstruction* input,
+ DataType::Type packed_type,
+ size_t vector_size_in_bytes = kDefaultTestVectorSizeInBytes,
+ uint32_t dex_pc = kNoDexPc) {
+ size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
+ HVecPredSetAll* predicate = new (GetAllocator()) HVecPredSetAll(
+ GetAllocator(), input, packed_type, num_of_elements, dex_pc);
+ AddOrInsertInstruction(block, predicate);
+ return predicate;
+ }
+
+ HVecReplicateScalar* MakeVecReplicateScalar(
+ HBasicBlock* block,
+ HInstruction* scalar,
+ DataType::Type packed_type,
+ size_t vector_size_in_bytes = kDefaultTestVectorSizeInBytes,
+ HVecPredSetOperation* predicate = nullptr,
+ uint32_t dex_pc = kNoDexPc) {
+ size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
+ HVecReplicateScalar* vec_replicate_scalar = new (GetAllocator()) HVecReplicateScalar(
+ GetAllocator(), scalar, packed_type, num_of_elements, dex_pc);
+ AddOrInsertInstruction(block, vec_replicate_scalar);
+ if (predicate != nullptr) {
+ vec_replicate_scalar->SetMergingGoverningPredicate(predicate);
+ }
+ return vec_replicate_scalar;
+ }
+
+ HVecPredToBoolean* MakeVecPredToBoolean(
+ HBasicBlock* block,
+ HInstruction* input,
+ HVecPredToBoolean::PCondKind pred_cond,
+ DataType::Type packed_type,
+ size_t vector_size_in_bytes = kDefaultTestVectorSizeInBytes,
+ uint32_t dex_pc = kNoDexPc) {
size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
HVecPredToBoolean* vec_pred_to_boolean = new (GetAllocator()) HVecPredToBoolean(
GetAllocator(),
@@ -665,7 +695,7 @@
HInstruction* right,
HVecPredWhile::CondKind cond,
DataType::Type packed_type,
- size_t vector_size_in_bytes = kDefaultTestVectorSize,
+ size_t vector_size_in_bytes = kDefaultTestVectorSizeInBytes,
uint32_t dex_pc = kNoDexPc) {
size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
HVecPredWhile* vec_pred_while = new (GetAllocator()) HVecPredWhile(
@@ -732,6 +762,29 @@
return condition;
}
+ HVecCondition* MakeVecCondition(HBasicBlock* block,
+ IfCondition cond,
+ HInstruction* first,
+ HInstruction* second,
+ DataType::Type packed_type,
+ size_t vector_size_in_bytes = kDefaultTestVectorSizeInBytes,
+ HVecPredSetOperation* predicate = nullptr,
+ uint32_t dex_pc = kNoDexPc) {
+ size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
+ HVecCondition* condition = HVecCondition::Create(graph_,
+ cond,
+ first,
+ second,
+ packed_type,
+ num_of_elements,
+ dex_pc);
+ AddOrInsertInstruction(block, condition);
+ if (predicate != nullptr) {
+ condition->SetMergingGoverningPredicate(predicate);
+ }
+ return condition;
+ }
+
HSelect* MakeSelect(HBasicBlock* block,
HInstruction* condition,
HInstruction* true_value,
@@ -877,7 +930,7 @@
// The default size of vectors to use for tests, in bytes. 16 bytes (128 bits) is used as it is
// commonly the smallest size of vector used in vector extensions.
- static constexpr size_t kDefaultTestVectorSize = 16;
+ static constexpr size_t kDefaultTestVectorSizeInBytes = 16;
ScopedNullHandle<mirror::Class> null_klass_;
};
diff --git a/test/661-checker-simd-cf-loops/src/Main.java b/test/661-checker-simd-cf-loops/src/Main.java
index aee6c6a..2089bda 100644
--- a/test/661-checker-simd-cf-loops/src/Main.java
+++ b/test/661-checker-simd-cf-loops/src/Main.java
@@ -59,7 +59,7 @@
/// CHECK-DAG: <<LoopP:j\d+>> VecPredWhile [<<Phi>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
//
/// CHECK-DAG: <<Load1:d\d+>> VecLoad [<<Arr:l\d+>>,<<Phi>>,<<LoopP>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Cond:j\d+>> VecCondition [<<Load1>>,<<Vec100>>,<<LoopP>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cond:j\d+>> VecEqual [<<Load1>>,<<Vec100>>,<<LoopP>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<CondR:j\d+>> VecPredNot [<<Cond>>,<<LoopP>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<AddT:d\d+>> VecAdd [<<Load1>>,<<Vec99>>,<<CondR>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<StT:d\d+>> VecStore [<<Arr>>,<<Phi>>,<<AddT>>,<<CondR>>] loop:<<Loop>> outer_loop:none
@@ -289,18 +289,45 @@
// Test condition types.
//
- /// CHECK-START-ARM64: void Main.$compile$noinline$SimpleBelow(int[]) loop_optimization (after)
+ /// CHECK-START-ARM64: void Main.$compile$noinline$SimpleCondition(int[]) loop_optimization (before)
+ //
+ /// CHECK-DAG: <<C0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<C100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<C199:i\d+>> IntConstant 199 loop:none
+ //
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<C0>>,{{i\d+}}] loop:<<Loop:B\d+>>
+ /// CHECK-DAG: <<Load:i\d+>> ArrayGet [<<Arr:l\d+>>,<<Phi>>] loop:<<Loop>>
+ /// CHECK-DAG: <<Cond:z\d+>> NotEqual [<<Load>>,<<C100>>] loop:<<Loop>>
+ /// CHECK-DAG: If [<<Cond>>] loop:<<Loop>>
+ //
+ /// CHECK-DAG: ArraySet [<<Arr>>,<<Phi>>,<<C199>>] loop:<<Loop>>
+ //
+ /// CHECK-START-ARM64: void Main.$compile$noinline$SimpleCondition(int[]) loop_optimization (after)
/// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
//
- /// CHECK-NOT: VecLoad
+ /// CHECK-DAG: <<C0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<C100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<C199:i\d+>> IntConstant 199 loop:none
+ //
+ /// CHECK-DAG: <<Vec100:d\d+>> VecReplicateScalar [<<C100>>,{{j\d+}}] packed_type:Int32 loop:none
+ /// CHECK-DAG: <<Vec199:d\d+>> VecReplicateScalar [<<C199>>,{{j\d+}}] packed_type:Int32 loop:none
+ //
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<C0>>,{{i\d+}}] loop:<<Loop:B\d+>>
+ /// CHECK-DAG: <<LoopP:j\d+>> VecPredWhile [<<Phi>>,{{i\d+}}] loop:<<Loop>>
+ //
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Arr:l\d+>>,<<Phi>>,<<LoopP>>] packed_type:Int32 loop:<<Loop>>
+ /// CHECK-DAG: <<Cond:j\d+>> VecNotEqual [<<Load>>,<<Vec100>>,<<LoopP>>] packed_type:Int32 loop:<<Loop>>
+ /// CHECK-DAG: <<CondR:j\d+>> VecPredNot [<<Cond>>,<<LoopP>>] packed_type:Int32 loop:<<Loop>>
+ /// CHECK-DAG: VecStore [<<Arr>>,<<Phi>>,<<Vec199>>,<<CondR>>] packed_type:Int32 loop:<<Loop>>
//
/// CHECK-FI:
//
- // TODO: Support other conditions.
- public static void $compile$noinline$SimpleBelow(int[] x) {
+ // Example of a condition being vectorized. See loop_optimization_test.cc and codegen_test.cc for
+ // full testing of vector conditions.
+ public static void $compile$noinline$SimpleCondition(int[] x) {
for (int i = 0; i < USED_ARRAY_LENGTH; i++) {
int val = x[i];
- if (val < MAGIC_VALUE_C) {
+ if (val == MAGIC_VALUE_C) {
x[i] += MAGIC_ADD_CONST;
}
}
@@ -451,6 +478,44 @@
}
//
+ // Non-condition if statements.
+ //
+
+ /// CHECK-START-ARM64: void Main.$compile$noinline$SingleBoolean(int[], boolean) loop_optimization (after)
+ /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
+ //
+ /// CHECK-NOT: VecLoad
+ //
+ /// CHECK-FI:
+ //
+ // Check that single boolean if statements are not vectorized because only binary condition if
+ // statements are supported.
+ public static void $compile$noinline$SingleBoolean(int[] x, boolean y) {
+ for (int i = 0; i < USED_ARRAY_LENGTH; i++) {
+ if (y) {
+ x[i] += MAGIC_ADD_CONST;
+ }
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.$compile$noinline$InstanceOf(int[], java.lang.Object) loop_optimization (after)
+ /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
+ //
+ /// CHECK-NOT: VecLoad
+ //
+ /// CHECK-FI:
+ //
+ // Check that control flow without a condition is not vectorized because only binary condition if
+ // statements are supported.
+ public static void $compile$noinline$InstanceOf(int[] x, Object y) {
+ for (int i = 0; i < USED_ARRAY_LENGTH; i++) {
+ if (y instanceof Main) {
+ x[i] += MAGIC_ADD_CONST;
+ }
+ }
+ }
+
+ //
// Main driver.
//
@@ -513,8 +578,8 @@
// Conditions.
initIntArray(intArray);
- $compile$noinline$SimpleBelow(intArray);
- expectIntEquals(23121, IntArraySum(intArray));
+ $compile$noinline$SimpleCondition(intArray);
+ expectIntEquals(18864, IntArraySum(intArray));
// Idioms.
initIntArray(intArray);
@@ -552,6 +617,16 @@
$compile$noinline$BrokenInduction(intArray);
expectIntEquals(18963, IntArraySum(intArray));
+ // Non-condition if statements.
+ initIntArray(intArray);
+ $compile$noinline$SingleBoolean(intArray, true);
+ expectIntEquals(27279, IntArraySum(intArray));
+
+ initIntArray(intArray);
+ Main instance = new Main();
+ $compile$noinline$InstanceOf(intArray, instance);
+ expectIntEquals(27279, IntArraySum(intArray));
+
System.out.println("passed");
}