agx/opt_preamble: improve preamble cost function

save lots of uniforms by no longer pushing silly things.

total instructions in shared programs: 2039473 -> 2039480 (<.01%)
instructions in affected programs: 28335 -> 28342 (0.02%)
helped: 51
HURT: 47
Inconclusive result (value mean confidence interval includes 0).

total bytes in shared programs: 13983778 -> 13983802 (<.01%)
bytes in affected programs: 451678 -> 451702 (<.01%)
helped: 151
HURT: 79
Inconclusive result (value mean confidence interval includes 0).

total regs in shared programs: 590373 -> 590670 (0.05%)
regs in affected programs: 2354 -> 2651 (12.62%)
helped: 13
HURT: 65
Regs are HURT.

total uniforms in shared programs: 1532271 -> 1516549 (-1.03%)
uniforms in affected programs: 295709 -> 279987 (-5.32%)
helped: 2302
HURT: 0
Uniforms are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c
index d7cf487..79451c8 100644
--- a/src/asahi/compiler/agx_nir_opt_preamble.c
+++ b/src/asahi/compiler/agx_nir_opt_preamble.c
@@ -6,6 +6,8 @@
 
 #include "compiler/nir/nir_builder.h"
 #include "agx_compiler.h"
+#include "nir.h"
+#include "nir_opcodes.h"
 
 static void
 def_size(nir_def *def, unsigned *size, unsigned *align)
@@ -16,6 +18,41 @@
    *align = bit_size / 16;
 }
 
+static bool
+all_uses_float(nir_def *def)
+{
+   nir_foreach_use_including_if(use, def) {
+      if (nir_src_is_if(use))
+         return false;
+
+      nir_instr *use_instr = nir_src_parent_instr(use);
+      if (use_instr->type != nir_instr_type_alu)
+         return false;
+
+      nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+      unsigned src_index = ~0;
+      for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
+         if (&use_alu->src[i].src == use) {
+            src_index = i;
+            break;
+         }
+      }
+
+      assert(src_index != ~0);
+      nir_alu_type src_type = nir_alu_type_get_base_type(
+         nir_op_infos[use_alu->op].input_types[src_index]);
+
+      if (src_type != nir_type_float)
+         return false;
+
+      /* No float modifiers on G13 */
+      if (use_alu->op == nir_op_fmax || use_alu->op == nir_op_fmin)
+         return false;
+   }
+
+   return true;
+}
+
 static float
 instr_cost(nir_instr *instr, const void *data)
 {
@@ -37,12 +74,21 @@
       /* Texturing involes lots of memory bandwidth */
       return 20.0;
 
-   case nir_instr_type_alu:
-      /* We optimistically assume that moves get coalesced */
-      if (nir_op_is_vec_or_mov(nir_instr_as_alu(instr)->op))
-         return 0.0;
-      else
-         return 2.0;
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+      switch (alu->op) {
+      case nir_op_fneg:
+      case nir_op_fabs:
+      case nir_op_f2f32:
+         /* Float source modifiers will be propagated */
+         return all_uses_float(&alu->def) ? 0.0 : 2.0;
+
+      default:
+         /* We optimistically assume that moves get coalesced */
+         return nir_op_is_vec_or_mov(alu->op) ? 0.0 : 2.0;
+      }
+   }
 
    default:
       return 1.0;