pan/bi: Rewrite to fit dest = src constraint

Needed for TEXC as well as atomics.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7081>
diff --git a/src/panfrost/bifrost/bi_lower_combine.c b/src/panfrost/bifrost/bi_lower_combine.c
index 189e2a6..3cafe22 100644
--- a/src/panfrost/bifrost/bi_lower_combine.c
+++ b/src/panfrost/bifrost/bi_lower_combine.c
@@ -84,28 +84,6 @@
         bi_emit_before(ctx, parent, sel);
 }
 
-/* Rewrites uses of an index. Again, this could be O(n) to the program but is
- * currently O(nc) to the program and number of combines, so the pass becomes
- * effectively O(n^2). Better bookkeeping would bring down to linear if that's
- * an issue. */
-
-static void
-bi_rewrite_uses(bi_context *ctx,
-                unsigned old, unsigned oldc,
-                unsigned new, unsigned newc)
-{
-        bi_foreach_instr_global(ctx, ins) {
-                bi_foreach_src(ins, s) {
-                        if (ins->src[s] != old) continue;
-
-                        for (unsigned i = 0; i < 16; ++i)
-                                ins->swizzle[s][i] += (newc - oldc);
-
-                        ins->src[s] = new;
-                }
-        }
-}
-
 /* Copies result of combine from the temp R to the instruction destination,
  * given a bitsize sz */
 
diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c
index 1936958..6760546 100644
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
@@ -173,6 +173,19 @@
         struct lcra_state *l = NULL;
         bool success = false;
 
+        /* For instructions that both read and write from a data register, it's
+         * the *same* data register. We enforce that constraint by just doing a
+         * quick rewrite. TODO: are there cases where this causes RA to have no
+         * solutions due to copyprop? */
+        bi_foreach_instr_global(ctx, ins) {
+                unsigned props = bi_class_props[ins->type];
+                unsigned both = BI_DATA_REG_SRC | BI_DATA_REG_DEST;
+                if ((props & both) != both) continue;
+
+                bi_rewrite_uses(ctx, ins->dest, 0, ins->src[0], 0);
+                ins->dest = ins->src[0];
+        }
+
         do {
                 if (l) {
                         lcra_free(l);
diff --git a/src/panfrost/bifrost/bi_tables.c b/src/panfrost/bifrost/bi_tables.c
index 634d687..90862c8e 100644
--- a/src/panfrost/bifrost/bi_tables.c
+++ b/src/panfrost/bifrost/bi_tables.c
@@ -55,7 +55,7 @@
         [BI_TABLE]              = BI_SCHED_ADD,
         [BI_SELECT]             = BI_SCHED_ALL | BI_SWIZZLABLE,
         [BI_TEXS] 		= BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
-        [BI_TEXC] 		= BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
+        [BI_TEXC] 		= BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_SRC | BI_DATA_REG_DEST,
         [BI_TEXC_DUAL] 		= BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
         [BI_ROUND] 		= BI_ROUNDMODE | BI_SCHED_ALL,
         [BI_IMUL]       = BI_SCHED_FMA,
diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c
index 61a5ffc..548594a 100644
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@@ -180,3 +180,26 @@
         unsigned shift = ins->dest_offset * 4; /* 32-bit words */
         return (mask << shift);
 }
+
+/* Rewrites uses of an index. This is O(nc) to the program and number of
+ * uses, so combine lowering is effectively O(n^2).  Better bookkeeping
+ * would bring down to linear if that's an issue. */
+
+void
+bi_rewrite_uses(bi_context *ctx,
+                unsigned old, unsigned oldc,
+                unsigned new, unsigned newc)
+{
+        bi_foreach_instr_global(ctx, ins) {
+                bi_foreach_src(ins, s) {
+                        if (ins->src[s] != old) continue;
+
+                        for (unsigned i = 0; i < 16; ++i)
+                                ins->swizzle[s][i] += (newc - oldc);
+
+                        ins->src[s] = new;
+                }
+        }
+}
+
+
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 23641b3..04858bd 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -118,7 +118,7 @@
 #define BI_VECTOR (1 << 8)
 
 /* Use a data register for src0/dest respectively, bypassing the usual
- * register accessor. Mutually exclusive. */
+ * register accessor. */
 #define BI_DATA_REG_SRC (1 << 9)
 #define BI_DATA_REG_DEST (1 << 10)
 
@@ -624,6 +624,7 @@
 uint64_t bi_get_immediate(bi_instruction *ins, unsigned index);
 bool bi_writes_component(bi_instruction *ins, unsigned comp);
 unsigned bi_writemask(bi_instruction *ins);
+void bi_rewrite_uses(bi_context *ctx, unsigned old, unsigned oldc, unsigned new, unsigned newc);
 
 /* BIR passes */