pan/bi: Decode all 32-bit register modes

There's actually more than 16 of them, disambiguated by `r2 == r3` and
`first?` as conditions for another "fun" encoding. The extra space
allows for writing half-registers.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6793>
diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c
index 9ec7352..4d10cf5 100644
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@@ -394,8 +394,8 @@
         if (!has_port2)
                 regs.port[2] = regs.port[3];
 
-        s.reg3 = regs.port[3];
-        s.reg2 = regs.port[2];
+        s.reg2 = regs.port[3];
+        s.reg3 = regs.port[2];
         s.uniform_const = regs.uniform_constant;
 
         memcpy(&packed, &s, sizeof(s));
diff --git a/src/panfrost/bifrost/bifrost.h b/src/panfrost/bifrost/bifrost.h
index faf873b..8b24360 100644
--- a/src/panfrost/bifrost/bifrost.h
+++ b/src/panfrost/bifrost/bifrost.h
@@ -191,8 +191,8 @@
 
 struct bifrost_regs {
         unsigned uniform_const : 8;
-        unsigned reg2 : 6;
         unsigned reg3 : 6;
+        unsigned reg2 : 6;
         unsigned reg0 : 5;
         unsigned reg1 : 6;
         unsigned ctrl : 4;
@@ -285,6 +285,9 @@
  *
  * IDLE is a special mode disabling both ports, except for the first
  * instruction in the clause which uses IDLE_1 for the same purpose.
+ *
+ * All fields 0 used as sentinel for reserved encoding, so IDLE(_1) have FMA
+ * set (and ignored) as a placeholder to differentiate from reserved.
  */
 enum bifrost_reg_mode {
         BIFROST_R_WL_FMA  = 1,
@@ -315,4 +318,47 @@
         BIFROST_IDLE      = 27,
 };
 
+enum bifrost_reg_op {
+        BIFROST_OP_IDLE = 0,
+        BIFROST_OP_READ = 1,
+        BIFROST_OP_WRITE = 2,
+        BIFROST_OP_WRITE_LO = 3,
+        BIFROST_OP_WRITE_HI = 4,
+};
+
+struct bifrost_reg_ctrl_23 {
+        enum bifrost_reg_op slot2;
+        enum bifrost_reg_op slot3;
+        bool slot3_fma;
+};
+
+static const struct bifrost_reg_ctrl_23 bifrost_reg_ctrl_lut[32] = {
+        [BIFROST_R_WL_FMA]  = { BIFROST_OP_READ,     BIFROST_OP_WRITE_LO, true },
+        [BIFROST_R_WH_FMA]  = { BIFROST_OP_READ,     BIFROST_OP_WRITE_HI, true },
+        [BIFROST_R_W_FMA]   = { BIFROST_OP_READ,     BIFROST_OP_WRITE,    true },
+        [BIFROST_R_WL_ADD]  = { BIFROST_OP_READ,     BIFROST_OP_WRITE_LO, false },
+        [BIFROST_R_WH_ADD]  = { BIFROST_OP_READ,     BIFROST_OP_WRITE_HI, false },
+        [BIFROST_R_W_ADD]   = { BIFROST_OP_READ,     BIFROST_OP_WRITE,    false },
+        [BIFROST_WL_WL_ADD] = { BIFROST_OP_WRITE_LO, BIFROST_OP_WRITE_LO, false },
+        [BIFROST_WL_WH_ADD] = { BIFROST_OP_WRITE_LO, BIFROST_OP_WRITE_HI, false },
+        [BIFROST_WL_W_ADD]  = { BIFROST_OP_WRITE_LO, BIFROST_OP_WRITE,    false },
+        [BIFROST_WH_WL_ADD] = { BIFROST_OP_WRITE_HI, BIFROST_OP_WRITE_LO, false },
+        [BIFROST_WH_WH_ADD] = { BIFROST_OP_WRITE_HI, BIFROST_OP_WRITE_HI, false },
+        [BIFROST_WH_W_ADD]  = { BIFROST_OP_WRITE_HI, BIFROST_OP_WRITE,    false },
+        [BIFROST_W_WL_ADD]  = { BIFROST_OP_WRITE,    BIFROST_OP_WRITE_LO, false },
+        [BIFROST_W_WH_ADD]  = { BIFROST_OP_WRITE,    BIFROST_OP_WRITE_HI, false },
+        [BIFROST_W_W_ADD]   = { BIFROST_OP_WRITE,    BIFROST_OP_WRITE,    false },
+        [BIFROST_IDLE_1]    = { BIFROST_OP_IDLE,     BIFROST_OP_IDLE,     true },
+        [BIFROST_I_W_FMA]   = { BIFROST_OP_IDLE,     BIFROST_OP_WRITE,    true },
+        [BIFROST_I_WL_FMA]  = { BIFROST_OP_IDLE,     BIFROST_OP_WRITE_LO, true },
+        [BIFROST_I_WH_FMA]  = { BIFROST_OP_IDLE,     BIFROST_OP_WRITE_HI, true },
+        [BIFROST_R_I]       = { BIFROST_OP_READ,     BIFROST_OP_IDLE,     false },
+        [BIFROST_I_W_ADD]   = { BIFROST_OP_IDLE,     BIFROST_OP_WRITE,    false },
+        [BIFROST_I_WL_ADD]  = { BIFROST_OP_IDLE,     BIFROST_OP_WRITE_LO, false },
+        [BIFROST_I_WH_ADD]  = { BIFROST_OP_IDLE,     BIFROST_OP_WRITE_HI, false },
+        [BIFROST_WL_WH_MIX] = { BIFROST_OP_WRITE_LO, BIFROST_OP_WRITE_HI, false },
+        [BIFROST_WH_WL_MIX] = { BIFROST_OP_WRITE_HI, BIFROST_OP_WRITE_LO, false },
+        [BIFROST_IDLE]      = { BIFROST_OP_IDLE,     BIFROST_OP_IDLE,     true },
+};
+
 #endif
diff --git a/src/panfrost/bifrost/disassemble.c b/src/panfrost/bifrost/disassemble.c
index 2d83a30..4f0f128 100644
--- a/src/panfrost/bifrost/disassemble.c
+++ b/src/panfrost/bifrost/disassemble.c
@@ -69,9 +69,7 @@
 struct bifrost_reg_ctrl {
         bool read_reg0;
         bool read_reg1;
-        bool read_reg3;
-        enum bifrost_reg_write_unit fma_write_unit;
-        enum bifrost_reg_write_unit add_write_unit;
+        struct bifrost_reg_ctrl_23 slot23;
         bool clause_start;
 };
 
@@ -156,90 +154,46 @@
                 ctrl = regs.ctrl;
                 decoded.read_reg0 = decoded.read_reg1 = true;
         }
-        switch (ctrl) {
-        case 1:
-                decoded.fma_write_unit = REG_WRITE_TWO;
-                break;
-        case 2:
-        case 3:
-                decoded.fma_write_unit = REG_WRITE_TWO;
-                decoded.read_reg3 = true;
-                break;
-        case 4:
-                decoded.read_reg3 = true;
-                break;
-        case 5:
-                decoded.add_write_unit = REG_WRITE_TWO;
-                break;
-        case 6:
-                decoded.add_write_unit = REG_WRITE_TWO;
-                decoded.read_reg3 = true;
-                break;
-        case 8:
-                decoded.clause_start = true;
-                break;
-        case 9:
-                decoded.fma_write_unit = REG_WRITE_TWO;
-                decoded.clause_start = true;
-                break;
-        case 11:
-                break;
-        case 12:
-                decoded.read_reg3 = true;
-                decoded.clause_start = true;
-                break;
-        case 13:
-                decoded.add_write_unit = REG_WRITE_TWO;
-                decoded.clause_start = true;
-                break;
 
-        case 7:
-        case 15:
-                decoded.fma_write_unit = REG_WRITE_THREE;
-                decoded.add_write_unit = REG_WRITE_TWO;
-                break;
-        default:
-                fprintf(fp, "# unknown reg ctrl %d\n", ctrl);
-        }
+        /* Modify control based on state */
+        if (first)
+                ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
+        else if (regs.reg2 == regs.reg3)
+                ctrl += 16;
+
+        decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
+        ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 };
+        assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
 
         return decoded;
 }
 
-// Pass in the add_write_unit or fma_write_unit, and this returns which register
-// the ADD/FMA units are writing to
-static unsigned GetRegToWrite(enum bifrost_reg_write_unit unit, struct bifrost_regs regs)
-{
-        switch (unit) {
-        case REG_WRITE_TWO:
-                return regs.reg2;
-        case REG_WRITE_THREE:
-                return regs.reg3;
-        default: /* REG_WRITE_NONE */
-                assert(0);
-                return 0;
-        }
-}
-
 static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
 {
         struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
         fprintf(fp, "# ");
         if (ctrl.read_reg0)
-                fprintf(fp, "port 0: r%d ", get_reg0(srcs));
+                fprintf(fp, "slot 0: r%d ", get_reg0(srcs));
         if (ctrl.read_reg1)
-                fprintf(fp, "port 1: r%d ", get_reg1(srcs));
+                fprintf(fp, "slot 1: r%d ", get_reg1(srcs));
 
-        if (ctrl.fma_write_unit == REG_WRITE_TWO)
-                fprintf(fp, "port 2: r%d (write FMA) ", srcs.reg2);
-        else if (ctrl.add_write_unit == REG_WRITE_TWO)
-                fprintf(fp, "port 2: r%d (write ADD) ", srcs.reg2);
+        const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
 
-        if (ctrl.fma_write_unit == REG_WRITE_THREE)
-                fprintf(fp, "port 3: r%d (write FMA) ", srcs.reg3);
-        else if (ctrl.add_write_unit == REG_WRITE_THREE)
-                fprintf(fp, "port 3: r%d (write ADD) ", srcs.reg3);
-        else if (ctrl.read_reg3)
-                fprintf(fp, "port 3: r%d (read) ", srcs.reg3);
+        if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
+                fprintf(fp, "slot 2: r%d (write FMA) ", srcs.reg2);
+        else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
+                fprintf(fp, "slot 2: r%d (write lo FMA) ", srcs.reg2);
+        else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
+                fprintf(fp, "slot 2: r%d (write hi FMA) ", srcs.reg2);
+        else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
+                fprintf(fp, "slot 2: r%d (read) ", srcs.reg2);
+
+        if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
+                fprintf(fp, "slot 3: r%d (write %s) ", srcs.reg3, slot3_fma);
+        else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
+                fprintf(fp, "slot 3: r%d (write lo %s) ", srcs.reg3, slot3_fma);
+        else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
+                fprintf(fp, "slot 3: r%d (write hi %s) ", srcs.reg3, slot3_fma);
 
         if (srcs.uniform_const) {
                 if (srcs.uniform_const & 0x80) {
@@ -250,24 +204,39 @@
         fprintf(fp, "\n");
 }
 
+static void
+bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
+{
+        if (op == BIFROST_OP_WRITE_LO)
+                fprintf(fp, ".h0");
+        else if (op == BIFROST_OP_WRITE_HI)
+                fprintf(fp, ".h1");
+}
+
 void
 bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool first)
 {
-    struct bifrost_reg_ctrl next_ctrl = DecodeRegCtrl(fp, *next_regs, first);
-    if (next_ctrl.fma_write_unit != REG_WRITE_NONE)
-        fprintf(fp, "r%u:t0", GetRegToWrite(next_ctrl.fma_write_unit, *next_regs));
-    else
+    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, first);
+    if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
+        fprintf(fp, "r%u:t0", next_regs->reg2);
+        bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
+    } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
+        fprintf(fp, "r%u:t0", next_regs->reg3);
+        bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
+    } else
         fprintf(fp, "t0");
 }
 
 void
 bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool first)
 {
-    struct bifrost_reg_ctrl next_ctrl = DecodeRegCtrl(fp, *next_regs, first);
-    if (next_ctrl.add_write_unit != REG_WRITE_NONE)
-        fprintf(fp, "r%u:t1", GetRegToWrite(next_ctrl.add_write_unit, *next_regs));
-    else
-        fprintf(fp, "t1");
+    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, first);
+
+    if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
+        fprintf(fp, "r%u:t0", next_regs->reg3);
+        bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
+    } else
+        fprintf(fp, "t0");
 }
 
 static void dump_const_imm(FILE *fp, uint32_t imm)
@@ -400,7 +369,7 @@
                 fprintf(fp, "r%d", get_reg1(srcs));
                 break;
         case 2:
-                fprintf(fp, "r%d", srcs.reg3);
+                fprintf(fp, "r%d", srcs.reg2);
                 break;
         case 3:
                 if (isFMA)