radeonsi/ngg: try GS multi-cycling mode if default mode failed
If gsprim_lds_size is larger than target_lds_size then gfx10_ngg_calculate_subgroup_info
will fail.
This commit adds a logic to try the multi-cycling in this case because it's
using less memory.
This fix glsl-1.50-gs-max-output when using NGG.
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5401>
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 62aa8b4..4c3176f 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1934,9 +1934,11 @@
max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
if (gs_type == PIPE_SHADER_GEOMETRY) {
+ bool force_multi_cycling = false;
unsigned max_out_verts_per_gsprim = gs_sel->gs_max_out_vertices * gs_num_invocations;
- if (max_out_verts_per_gsprim <= 256) {
+retry_select_mode:
+ if (max_out_verts_per_gsprim <= 256 && !force_multi_cycling) {
if (max_out_verts_per_gsprim) {
max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim);
}
@@ -1951,6 +1953,13 @@
esvert_lds_size = es_sel->esgs_itemsize / 4;
gsprim_lds_size = (gs_sel->gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
+
+ if (gsprim_lds_size > target_lds_size && !force_multi_cycling) {
+ if (gs_sel->tess_turns_off_ngg || es_sel->type != PIPE_SHADER_TESS_EVAL) {
+ force_multi_cycling = true;
+ goto retry_select_mode;
+ }
+ }
} else {
/* VS and TES. */
/* LDS size for passing data from ES to GS. */