freedreno/ir3: Fix disasm of register offsets in ldp/stp.

I had a stp testcase that was getting its offset wrong, and by twiddling
bits and feeding it to qc disasm, I found that the comment was sort of
right: some the cat6a bits implicated in the old comment do get used, as
the high bits of the cat6c offset.  Reallocating those bits also fixes how
we were getting r960.y for r0.y.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5815>
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index 10faa42..d6a1c15 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -964,8 +964,8 @@
 				};
 				fprintf(ctx->out, "+");
 				print_src(ctx, &dstoff_reg);
-			} else if (cat6->c.off) {
-				fprintf(ctx->out, "%+d", cat6->c.off);
+			} else if (cat6->c.off || cat6->c.off_high) {
+				fprintf(ctx->out, "%+d", ((uint32_t)cat6->c.off_high << 8) | cat6->c.off);
 			}
 		} else {
 			dst.reg = (reg_t)(cat6->d.dst);
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index d271508..4ffcb7a 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -715,7 +715,8 @@
 typedef struct PACKED {
 	/* dword0: */
 	uint32_t mustbe0  : 1;
-	uint32_t src1     : 13;
+	uint32_t src1     : 8;
+	uint32_t pad      : 5;
 	uint32_t ignore0  : 8;
 	uint32_t src1_im  : 1;
 	uint32_t src2_im  : 1;
@@ -728,15 +729,11 @@
 /* dword1 encoding for dst_off: */
 typedef struct PACKED {
 	/* dword0: */
-	uint32_t dword0;
+	uint32_t dw0_pad1 : 9;
+	int32_t off_high : 5;
+	uint32_t dw0_pad2 : 18;
 
-	/* note: there is some weird stuff going on where sometimes
-	 * cat6->a.off is involved.. but that seems like a bug in
-	 * the blob, since it is used even if !cat6->src_off
-	 * It would make sense for there to be some more bits to
-	 * bring us to 11 bits worth of offset, but not sure..
-	 */
-	int32_t off       : 8;
+	uint32_t off      : 8;
 	uint32_t mustbe1  : 1;
 	uint32_t dst      : 8;
 	uint32_t pad1     : 15;
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 8b34418..c6d78b2 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -882,6 +882,7 @@
 			}
 		} else {
 			cat6c->off = instr->cat6.dst_offset;
+			cat6c->off_high = instr->cat6.dst_offset >> 8;
 		}
 	} else {
 		instr_cat6d_t *cat6d = ptr;
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 00658ac..e6f8f7b 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -179,10 +179,12 @@
 	INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
 
 	/* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
-	INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y+80], r960.y, 1"), /* stp.f32 p[r11.y-176], r0.y, 1 */
-	INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r962.z, 2"), /* stp.f32 p[r11.y-104], r2.z, 2 */
-	INSTR_6XX(c1465ba0_01803e2a, "stp.u32 p[r11.y-96], r965.y, 1"), /* stp.u32 p[r11.y-96], r5.y, 1 */
+	INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
+	INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),
+	INSTR_6XX(c1465ba0_01803e2a, "stp.u32 p[r11.y-96], r5.y, 1"),
 	INSTR_6XX(c0860008_01860001, "ldp.u32 r2.x, p[r6.x], 1"),
+	/* Custom stp based on above to catch a disasm bug. */
+	INSTR_6XX(c1465b00_0180022a, "stp.u32 p[r11.y+256], r5.y, 1"),
 
 	/* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */
 	INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"),