pan/bi: Rewrite to fit dest = src constraint
Needed for TEXC as well as atomics.
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7081>
diff --git a/src/panfrost/bifrost/bi_lower_combine.c b/src/panfrost/bifrost/bi_lower_combine.c
index 189e2a6..3cafe22 100644
--- a/src/panfrost/bifrost/bi_lower_combine.c
+++ b/src/panfrost/bifrost/bi_lower_combine.c
@@ -84,28 +84,6 @@
bi_emit_before(ctx, parent, sel);
}
-/* Rewrites uses of an index. Again, this could be O(n) to the program but is
- * currently O(nc) to the program and number of combines, so the pass becomes
- * effectively O(n^2). Better bookkeeping would bring down to linear if that's
- * an issue. */
-
-static void
-bi_rewrite_uses(bi_context *ctx,
- unsigned old, unsigned oldc,
- unsigned new, unsigned newc)
-{
- bi_foreach_instr_global(ctx, ins) {
- bi_foreach_src(ins, s) {
- if (ins->src[s] != old) continue;
-
- for (unsigned i = 0; i < 16; ++i)
- ins->swizzle[s][i] += (newc - oldc);
-
- ins->src[s] = new;
- }
- }
-}
-
/* Copies result of combine from the temp R to the instruction destination,
* given a bitsize sz */
diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c
index 1936958..6760546 100644
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
@@ -173,6 +173,19 @@
struct lcra_state *l = NULL;
bool success = false;
+ /* For instructions that both read and write from a data register, it's
+ * the *same* data register. We enforce that constraint by just doing a
+ * quick rewrite. TODO: are there cases where this causes RA to have no
+ * solutions due to copyprop? */
+ bi_foreach_instr_global(ctx, ins) {
+ unsigned props = bi_class_props[ins->type];
+ unsigned both = BI_DATA_REG_SRC | BI_DATA_REG_DEST;
+ if ((props & both) != both) continue;
+
+ bi_rewrite_uses(ctx, ins->dest, 0, ins->src[0], 0);
+ ins->dest = ins->src[0];
+ }
+
do {
if (l) {
lcra_free(l);
diff --git a/src/panfrost/bifrost/bi_tables.c b/src/panfrost/bifrost/bi_tables.c
index 634d687..90862c8e 100644
--- a/src/panfrost/bifrost/bi_tables.c
+++ b/src/panfrost/bifrost/bi_tables.c
@@ -55,7 +55,7 @@
[BI_TABLE] = BI_SCHED_ADD,
[BI_SELECT] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_TEXS] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
- [BI_TEXC] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
+ [BI_TEXC] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_SRC | BI_DATA_REG_DEST,
[BI_TEXC_DUAL] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_ROUND] = BI_ROUNDMODE | BI_SCHED_ALL,
[BI_IMUL] = BI_SCHED_FMA,
diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c
index 61a5ffc..548594a 100644
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@@ -180,3 +180,26 @@
unsigned shift = ins->dest_offset * 4; /* 32-bit words */
return (mask << shift);
}
+
+/* Rewrites uses of an index. This is O(nc) to the program and number of
+ * uses, so combine lowering is effectively O(n^2). Better bookkeeping
+ * would bring down to linear if that's an issue. */
+
+void
+bi_rewrite_uses(bi_context *ctx,
+ unsigned old, unsigned oldc,
+ unsigned new, unsigned newc)
+{
+ bi_foreach_instr_global(ctx, ins) {
+ bi_foreach_src(ins, s) {
+ if (ins->src[s] != old) continue;
+
+ for (unsigned i = 0; i < 16; ++i)
+ ins->swizzle[s][i] += (newc - oldc);
+
+ ins->src[s] = new;
+ }
+ }
+}
+
+
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 23641b3..04858bd 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -118,7 +118,7 @@
#define BI_VECTOR (1 << 8)
/* Use a data register for src0/dest respectively, bypassing the usual
- * register accessor. Mutually exclusive. */
+ * register accessor. */
#define BI_DATA_REG_SRC (1 << 9)
#define BI_DATA_REG_DEST (1 << 10)
@@ -624,6 +624,7 @@
uint64_t bi_get_immediate(bi_instruction *ins, unsigned index);
bool bi_writes_component(bi_instruction *ins, unsigned comp);
unsigned bi_writemask(bi_instruction *ins);
+void bi_rewrite_uses(bi_context *ctx, unsigned old, unsigned oldc, unsigned new, unsigned newc);
/* BIR passes */