aco: Fix emit_boolean_exclusive_scan in wave32 mode.
Use the lane mask instead of s2 for the register class.
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6699>
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index ba1e3dd..fc618de 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6985,6 +6985,7 @@
Temp emit_boolean_exclusive_scan(isel_context *ctx, nir_op op, Temp src)
{
Builder bld(ctx->program, ctx->block);
+ assert(src.regClass() == bld.lm);
//subgroupExclusiveAnd(val) -> mbcnt(exec & ~val) == 0
//subgroupExclusiveOr(val) -> mbcnt(val & exec) != 0
@@ -6993,7 +6994,7 @@
if (op == nir_op_iand)
tmp = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
else
- tmp = bld.sop2(Builder::s_and, bld.def(s2), bld.def(s1, scc), src, Operand(exec, bld.lm));
+ tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
Builder::Result lohi = bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), bld.def(s1), tmp);
Temp lo = lohi.def(0).getTemp();