aco: use v_mov_b32_sdwa for some 16-bit constants Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7216>

commit: 70ff262cda8a8e3566f73afec669386ddb4fa70c [log] [tgz]
author: Rhys Perry <pendingchaos02@gmail.com> Fri Oct 16 13:18:08 2020 +0100
committer: Marge Bot <eric+marge@anholt.net> Tue Oct 27 15:24:38 2020 +0000
tree: 246ff7ee398bc7a52026acece553bd8e0d732dba
parent: b882598ee1723358e1e19449cbb2619da7abc246 [diff]
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 8c6766b..f7a4012 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp

@@ -1028,7 +1028,14 @@
       }
    } else if (dst.regClass() == v2b && op.isConstant() && !op.isLiteral()) {
       assert(ctx->program->chip_class >= GFX8);
-      bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand(0u));
+      if (op.constantValue() >= 0xfff0 || op.constantValue() <= 64) {
+         /* use v_mov_b32 to avoid possible issues with denormal flushing or
+          * NaN. v_add_f16 is still needed for float constants. */
+         uint32_t val32 = (int32_t)(int16_t)op.constantValue();
+         bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, Operand(val32));
+      } else {
+         bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand(0u));
+      }
    } else if (dst.regClass() == v2b && op.isLiteral()) {
       if (ctx->program->chip_class < GFX10 || !(ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) {
          unsigned offset = dst.physReg().byte() * 8u;
commit	70ff262cda8a8e3566f73afec669386ddb4fa70c	[log] [tgz]
author	Rhys Perry <pendingchaos02@gmail.com>	Fri Oct 16 13:18:08 2020 +0100
committer	Marge Bot <eric+marge@anholt.net>	Tue Oct 27 15:24:38 2020 +0000
tree	246ff7ee398bc7a52026acece553bd8e0d732dba
parent	b882598ee1723358e1e19449cbb2619da7abc246 [diff]