pan/midgard: Add post-RA move elimination

Think of this pass as register coalescing part 2. After RA runs, but
before scheduling, we scan for code of the form:

   mov rN, rN

and delete the move, since it's totally redundant. This pass helps
already, but it'd of course be much more effective paired with
register coalescing to encourage moves in general to end up in this
form. Nevertheless, even by itself:

total instructions in shared programs: 3665 -> 3613 (-1.42%)
instructions in affected programs: 2046 -> 1994 (-2.54%)
helped: 52
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.19% max: 25.00% x̄: 8.02% x̃: 4.00%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -10.26% -5.79%
Instructions are helped.

total bundles in shared programs: 2256 -> 2213 (-1.91%)
bundles in affected programs: 1154 -> 1111 (-3.73%)
helped: 43
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.33% max: 25.00% x̄: 9.10% x̃: 5.56%
95% mean confidence interval for bundles value: -1.00 -1.00
95% mean confidence interval for bundles %-change: -11.60% -6.60%
Bundles are helped.

total quadwords in shared programs: 3689 -> 3642 (-1.27%)
quadwords in affected programs: 2025 -> 1978 (-2.32%)
helped: 47
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.19% max: 25.00% x̄: 7.86% x̃: 3.85%
95% mean confidence interval for quadwords value: -1.00 -1.00
95% mean confidence interval for quadwords %-change: -10.30% -5.42%
Quadwords are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index 034a256..f3fd92a 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -509,5 +509,6 @@
 bool midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block);
 bool midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block);
 bool midgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block);
+void midgard_opt_post_move_eliminate(compiler_context *ctx, midgard_block *block, struct ra_graph *g);
 
 #endif
diff --git a/src/panfrost/midgard/midgard_opt_dce.c b/src/panfrost/midgard/midgard_opt_dce.c
index 698650a..d0d8b1a 100644
--- a/src/panfrost/midgard/midgard_opt_dce.c
+++ b/src/panfrost/midgard/midgard_opt_dce.c
@@ -87,3 +87,52 @@
 
         return progress;
 }
+
+/* An even further special case - to be run after RA runs but before
+ * scheduling, eliminating moves that end up being useless even though they
+ * appeared meaningful in the SSA. Part #2 of register coalescing. */
+
+void
+midgard_opt_post_move_eliminate(compiler_context *ctx, midgard_block *block, struct ra_graph *g)
+{
+        mir_foreach_instr_in_block_safe(block, ins) {
+                if (ins->type != TAG_ALU_4) continue;
+                if (ins->compact_branch) continue;
+                if (!OP_IS_MOVE(ins->alu.op)) continue;
+
+                /* Check we're to the same place post-RA */
+                unsigned iA = ins->ssa_args.dest;
+                unsigned iB = ins->ssa_args.src1;
+
+                if ((iA < 0) || (iB < 0)) continue;
+
+                unsigned A = iA >= SSA_FIXED_MINIMUM ?
+                        SSA_REG_FROM_FIXED(iA) : 
+                        ra_get_node_reg(g, iA);
+
+                unsigned B = iB >= SSA_FIXED_MINIMUM ?
+                        SSA_REG_FROM_FIXED(iB) : 
+                        ra_get_node_reg(g, iB);
+
+                if (A != B) continue;
+                if (ins->ssa_args.inline_constant) continue;
+
+                /* Check we're in the work zone. TODO: promoted
+                 * uniforms? */
+                if (A >= 16) continue;
+
+                /* Ensure there aren't side effects */
+                if (mir_nontrivial_source2_mod(ins)) continue;
+                if (mir_nontrivial_outmod(ins)) continue;
+                if (ins->mask != 0xF) continue;
+
+                /* We do want to rewrite to keep the graph sane for pipeline
+                 * register creation (TODO: is this the best approach?) */
+                mir_rewrite_index_dst(ctx, ins->ssa_args.src1, ins->ssa_args.dest);
+
+                /* We're good to go */
+                mir_remove_instruction(ins);
+
+        }
+
+}
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index a2c0c76..6b689e4 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -827,6 +827,12 @@
                 g = allocate_registers(ctx, &spilled);
         } while(spilled && ((iter_count--) > 0));
 
+        /* We can simplify a bit after RA */
+
+        mir_foreach_block(ctx, block) {
+                midgard_opt_post_move_eliminate(ctx, block, g);
+        }
+
         /* After RA finishes, we schedule all at once */
 
         mir_foreach_block(ctx, block) {