aco: Support subvector loops in aco_assembler.

These are currently not used, but could be useful later.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
diff --git a/src/amd/compiler/README b/src/amd/compiler/README
index 990eb62..585c4e6 100644
--- a/src/amd/compiler/README
+++ b/src/amd/compiler/README
@@ -109,6 +109,13 @@
 and it shouldn't be set in these cases. Setting the DLC for these cases can result
 in graphical glitches.
 
+## RDNA subvector mode
+
+The documentation of S_SUBVECTOR_LOOP_BEGIN and S_SUBVECTOR_LOOP_END is not clear
+on what sort of addressing should be used, but it says that it
+"is equivalent to an S_CBRANCH with extra math", so the subvector loop handling
+in ACO is done according to the S_CBRANCH doc.
+
 # Hardware Bugs
 
 ## SMEM corrupts VCCZ on SI/CI
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index fcad107..73432a7 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -20,6 +20,8 @@
       else if (chip_class == GFX10)
          opcode = &instr_info.opcode_gfx10[0];
    }
+
+   int subvector_begin_pos = -1;
 };
 
 void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
@@ -80,6 +82,22 @@
       break;
    }
    case Format::SOPK: {
+      SOPK_instruction *sopk = static_cast<SOPK_instruction*>(instr);
+
+      if (instr->opcode == aco_opcode::s_subvector_loop_begin) {
+         assert(ctx.chip_class >= GFX10);
+         assert(ctx.subvector_begin_pos == -1);
+         ctx.subvector_begin_pos = out.size();
+      } else if (instr->opcode == aco_opcode::s_subvector_loop_end) {
+         assert(ctx.chip_class >= GFX10);
+         assert(ctx.subvector_begin_pos != -1);
+         /* Adjust s_subvector_loop_begin instruction to the address after the end  */
+         out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);
+         /* Adjust s_subvector_loop_end instruction to the address after the beginning  */
+         sopk->imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
+         ctx.subvector_begin_pos = -1;
+      }
+
       uint32_t encoding = (0b1011 << 28);
       encoding |= opcode << 23;
       encoding |=
@@ -87,7 +105,7 @@
          instr->definitions[0].physReg() << 16 :
          !instr->operands.empty() && !(instr->operands[0].physReg() == scc) ?
          instr->operands[0].physReg() << 16 : 0;
-      encoding |= static_cast<SOPK_instruction*>(instr)->imm;
+      encoding |= sopk->imm;
       out.push_back(encoding);
       break;
    }