aco/ngg: Clean up and reorganize NGG VS/TES code.

Make the NGG VS/TES code easier to follow, give better names to
some functions and make ngg_nogs_early_prim_export a variable.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6964>
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 8c809de..18cbfc2 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10686,12 +10686,6 @@
    return lanecount_to_mask(ctx, count);
 }
 
-bool ngg_early_prim_export(isel_context *ctx)
-{
-   /* TODO: Check edge flags, and if they are written, return false. (Needed for OpenGL, not for Vulkan.) */
-   return true;
-}
-
 Temp ngg_max_vertex_count(isel_context *ctx)
 {
    Builder bld(ctx->program, ctx->block);
@@ -10801,7 +10795,7 @@
         false /* compressed */, true/* done */, false /* valid mask */);
 }
 
-void ngg_emit_nogs_gsthreads(isel_context *ctx)
+void ngg_nogs_export_primitives(isel_context *ctx)
 {
    /* Emit the things that NGG GS threads need to do, for shaders that don't have SW GS.
     * These must always come before VS exports.
@@ -10860,28 +10854,9 @@
    end_divergent_if(ctx, &ic);
 }
 
-void ngg_emit_nogs_output(isel_context *ctx)
+void ngg_nogs_export_vertices(isel_context *ctx)
 {
-   /* Emits NGG GS output, for stages that don't have SW GS. */
-
-   if_context ic;
    Builder bld(ctx->program, ctx->block);
-   bool late_prim_export = !ngg_early_prim_export(ctx);
-
-   /* NGG streamout is currently disabled by default. */
-   assert(!ctx->args->shader_info->so.num_outputs);
-
-   if (late_prim_export) {
-      /* VS exports are output to registers in a predecessor block. Emit phis to get them into this block. */
-      create_export_phis(ctx);
-      /* Do what we need to do in the GS threads. */
-      ngg_emit_nogs_gsthreads(ctx);
-
-      /* What comes next should be executed on ES threads. */
-      Temp is_es_thread = merged_wave_info_to_mask(ctx, 0);
-      begin_divergent_if_then(ctx, &ic, is_es_thread);
-      bld.reset(ctx->block);
-   }
 
    /* Export VS outputs */
    ctx->block->kind |= block_kind_export_end;
@@ -10905,7 +10880,7 @@
          /* TES: Just use the patch ID as the primitive ID. */
          prim_id = get_arg(ctx, ctx->args->ac.tes_patch_id);
       } else {
-         unreachable("unsupported NGG shader stage.");
+         unreachable("unsupported NGG non-GS shader stage.");
       }
 
       ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_ID] |= 0x1;
@@ -10913,12 +10888,32 @@
 
       export_vs_varying(ctx, VARYING_SLOT_PRIMITIVE_ID, false, nullptr);
    }
+}
 
-   if (late_prim_export) {
-      begin_divergent_if_else(ctx, &ic);
-      end_divergent_if(ctx, &ic);
-      bld.reset(ctx->block);
-   }
+void ngg_nogs_prelude(isel_context *ctx)
+{
+   ngg_emit_sendmsg_gs_alloc_req(ctx);
+
+   if (ctx->ngg_nogs_early_prim_export)
+      ngg_nogs_export_primitives(ctx);
+}
+
+void ngg_nogs_late_export_finale(isel_context *ctx)
+{
+   assert(!ctx->ngg_nogs_early_prim_export);
+
+   /* VS exports are output to registers in a predecessor block. Emit phis to get them into this block. */
+   create_export_phis(ctx);
+   /* Export VS/TES primitives. */
+   ngg_nogs_export_primitives(ctx);
+
+   /* What comes next must be executed on ES threads. */
+   if_context ic;
+   Temp is_es_thread = merged_wave_info_to_mask(ctx, 0);
+   begin_divergent_if_then(ctx, &ic, is_es_thread);
+   ngg_nogs_export_vertices(ctx);
+   begin_divergent_if_else(ctx, &ic);
+   end_divergent_if(ctx, &ic);
 }
 
 } /* end namespace */
@@ -10950,12 +10945,8 @@
          split_arguments(&ctx, startpgm);
       }
 
-      if (ngg_no_gs) {
-         ngg_emit_sendmsg_gs_alloc_req(&ctx);
-
-         if (ngg_early_prim_export(&ctx))
-            ngg_emit_nogs_gsthreads(&ctx);
-      }
+      if (ngg_no_gs)
+         ngg_nogs_prelude(&ctx);
 
       /* In a merged VS+TCS HS, the VS implementation can be completely empty. */
       nir_function_impl *func = nir_shader_get_entrypoint(nir);
@@ -10994,8 +10985,8 @@
       if (ctx.stage & hw_vs) {
          create_vs_exports(&ctx);
          ctx.block->kind |= block_kind_export_end;
-      } else if (ngg_no_gs && ngg_early_prim_export(&ctx)) {
-         ngg_emit_nogs_output(&ctx);
+      } else if (ngg_no_gs && ctx.ngg_nogs_early_prim_export) {
+         ngg_nogs_export_vertices(&ctx);
       } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
          Builder bld(ctx.program, ctx.block);
          bld.barrier(aco_opcode::p_barrier,
@@ -11015,8 +11006,8 @@
          end_divergent_if(&ctx, &ic_merged_wave_info);
       }
 
-      if (ngg_no_gs && !ngg_early_prim_export(&ctx))
-         ngg_emit_nogs_output(&ctx);
+      if (ngg_no_gs && !ctx.ngg_nogs_early_prim_export)
+         ngg_nogs_late_export_finale(&ctx);
 
       if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
          /* Outputs of the previous stage are inputs to the next stage */
diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h
index 0fb9088..4e6a6b7 100644
--- a/src/amd/compiler/aco_instruction_selection.h
+++ b/src/amd/compiler/aco_instruction_selection.h
@@ -93,6 +93,7 @@
    Temp persp_centroid, linear_centroid;
 
    /* GS inputs */
+   bool ngg_nogs_early_prim_export = false;
    Temp gs_wave_id;
 
    /* VS output information */
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 3ed0dbc..020cebd 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -474,6 +474,13 @@
       radv_vs_output_info *outinfo = &ctx->program->info->vs.outinfo;
       setup_vs_output_info(ctx, nir, outinfo->export_prim_id,
                            ctx->options->key.vs_common_out.export_clip_dists, outinfo);
+
+      /* TODO: NGG streamout */
+      if (ctx->stage & hw_ngg_gs)
+         assert(!ctx->args->shader_info->so.num_outputs);
+
+      /* TODO: check if the shader writes edge flags (not in Vulkan) */
+      ctx->ngg_nogs_early_prim_export = true;
    } else if (ctx->stage == vertex_ls) {
       ctx->tcs_num_inputs = ctx->program->info->vs.num_linked_outputs;
    }
@@ -558,6 +565,13 @@
       radv_vs_output_info *outinfo = &ctx->program->info->tes.outinfo;
       setup_vs_output_info(ctx, nir, outinfo->export_prim_id,
                            ctx->options->key.vs_common_out.export_clip_dists, outinfo);
+
+      /* TODO: NGG streamout */
+      if (ctx->stage & hw_ngg_gs)
+         assert(!ctx->args->shader_info->so.num_outputs);
+
+      /* Tess eval shaders can't write edge flags, so this can be always true. */
+      ctx->ngg_nogs_early_prim_export = true;
    }
 }