radeonsi: eliminate unused shader outputs for separate NGG geometry shaders
This just works because the same output export code is used for VS too.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6634>
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 7d88c73..af1e3e2 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1791,10 +1791,13 @@
uint64_t linked = outputs_written & inputs_read;
key->opt.kill_outputs = ~linked & outputs_written;
- key->opt.ngg_culling = sctx->ngg_culling;
- if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
- key->mono.u.vs_export_prim_id = 1;
+ if (vs->info.stage != MESA_SHADER_GEOMETRY) {
+ key->opt.ngg_culling = sctx->ngg_culling;
+
+ if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
+ key->mono.u.vs_export_prim_id = 1;
+ }
/* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */
if (sctx->chip_class >= GFX10 &&
@@ -1877,6 +1880,10 @@
key->as_ngg = stages_key.u.ngg;
+ /* Only NGG can eliminate GS outputs, because the code is shared with VS. */
+ if (stages_key.u.ngg)
+ si_shader_selector_key_hw_vs(sctx, sel, key);
+
/* Merged ES-GS can have unbalanced wave usage.
*
* ES threads are per-vertex, while GS threads are