aco: properly recognize that s_waitcnt mitigates VMEMtoScalarWriteHazard
fossil-db (Navi):
Totals from 555 (0.41% of 135946) affected shaders:
CodeSize: 1005716 -> 1003400 (-0.23%)
Instrs: 195326 -> 194744 (-0.30%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5923>
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index bb703d7..a877172 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -602,6 +602,14 @@
if (program->wave_size == 64)
ctx.sgprs_read_by_VMEM.set(exec_hi);
} else if (instr->isSALU() || instr->format == Format::SMEM) {
+ if (instr->opcode == aco_opcode::s_waitcnt) {
+ /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
+ uint16_t imm = static_cast<SOPP_instruction*>(instr.get())->imm;
+ unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
+ if (vmcnt == 0)
+ ctx.sgprs_read_by_VMEM.reset();
+ }
+
/* Check if SALU writes an SGPR that was previously read by the VALU */
if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) {
ctx.sgprs_read_by_VMEM.reset();
@@ -610,12 +618,6 @@
aco_ptr<VOP1_instruction> nop{create_instruction<VOP1_instruction>(aco_opcode::v_nop, Format::VOP1, 0, 0)};
new_instructions.emplace_back(std::move(nop));
}
- } else if (instr->opcode == aco_opcode::s_waitcnt) {
- /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
- uint16_t imm = static_cast<SOPP_instruction*>(instr.get())->imm;
- unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
- if (vmcnt == 0)
- ctx.sgprs_read_by_VMEM.reset();
} else if (instr->isVALU()) {
/* Hazard is mitigated by any VALU instruction */
ctx.sgprs_read_by_VMEM.reset();