aco/ngg: Clean up and reorganize NGG VS/TES code.
Make the NGG VS/TES code easier to follow, give better names to
some functions and make ngg_nogs_early_prim_export a variable.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6964>
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 8c809de..18cbfc2 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10686,12 +10686,6 @@
return lanecount_to_mask(ctx, count);
}
-bool ngg_early_prim_export(isel_context *ctx)
-{
- /* TODO: Check edge flags, and if they are written, return false. (Needed for OpenGL, not for Vulkan.) */
- return true;
-}
-
Temp ngg_max_vertex_count(isel_context *ctx)
{
Builder bld(ctx->program, ctx->block);
@@ -10801,7 +10795,7 @@
false /* compressed */, true/* done */, false /* valid mask */);
}
-void ngg_emit_nogs_gsthreads(isel_context *ctx)
+void ngg_nogs_export_primitives(isel_context *ctx)
{
/* Emit the things that NGG GS threads need to do, for shaders that don't have SW GS.
* These must always come before VS exports.
@@ -10860,28 +10854,9 @@
end_divergent_if(ctx, &ic);
}
-void ngg_emit_nogs_output(isel_context *ctx)
+void ngg_nogs_export_vertices(isel_context *ctx)
{
- /* Emits NGG GS output, for stages that don't have SW GS. */
-
- if_context ic;
Builder bld(ctx->program, ctx->block);
- bool late_prim_export = !ngg_early_prim_export(ctx);
-
- /* NGG streamout is currently disabled by default. */
- assert(!ctx->args->shader_info->so.num_outputs);
-
- if (late_prim_export) {
- /* VS exports are output to registers in a predecessor block. Emit phis to get them into this block. */
- create_export_phis(ctx);
- /* Do what we need to do in the GS threads. */
- ngg_emit_nogs_gsthreads(ctx);
-
- /* What comes next should be executed on ES threads. */
- Temp is_es_thread = merged_wave_info_to_mask(ctx, 0);
- begin_divergent_if_then(ctx, &ic, is_es_thread);
- bld.reset(ctx->block);
- }
/* Export VS outputs */
ctx->block->kind |= block_kind_export_end;
@@ -10905,7 +10880,7 @@
/* TES: Just use the patch ID as the primitive ID. */
prim_id = get_arg(ctx, ctx->args->ac.tes_patch_id);
} else {
- unreachable("unsupported NGG shader stage.");
+ unreachable("unsupported NGG non-GS shader stage.");
}
ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_ID] |= 0x1;
@@ -10913,12 +10888,32 @@
export_vs_varying(ctx, VARYING_SLOT_PRIMITIVE_ID, false, nullptr);
}
+}
- if (late_prim_export) {
- begin_divergent_if_else(ctx, &ic);
- end_divergent_if(ctx, &ic);
- bld.reset(ctx->block);
- }
+void ngg_nogs_prelude(isel_context *ctx)
+{
+ ngg_emit_sendmsg_gs_alloc_req(ctx);
+
+ if (ctx->ngg_nogs_early_prim_export)
+ ngg_nogs_export_primitives(ctx);
+}
+
+void ngg_nogs_late_export_finale(isel_context *ctx)
+{
+ assert(!ctx->ngg_nogs_early_prim_export);
+
+ /* VS exports are output to registers in a predecessor block. Emit phis to get them into this block. */
+ create_export_phis(ctx);
+ /* Export VS/TES primitives. */
+ ngg_nogs_export_primitives(ctx);
+
+ /* What comes next must be executed on ES threads. */
+ if_context ic;
+ Temp is_es_thread = merged_wave_info_to_mask(ctx, 0);
+ begin_divergent_if_then(ctx, &ic, is_es_thread);
+ ngg_nogs_export_vertices(ctx);
+ begin_divergent_if_else(ctx, &ic);
+ end_divergent_if(ctx, &ic);
}
} /* end namespace */
@@ -10950,12 +10945,8 @@
split_arguments(&ctx, startpgm);
}
- if (ngg_no_gs) {
- ngg_emit_sendmsg_gs_alloc_req(&ctx);
-
- if (ngg_early_prim_export(&ctx))
- ngg_emit_nogs_gsthreads(&ctx);
- }
+ if (ngg_no_gs)
+ ngg_nogs_prelude(&ctx);
/* In a merged VS+TCS HS, the VS implementation can be completely empty. */
nir_function_impl *func = nir_shader_get_entrypoint(nir);
@@ -10994,8 +10985,8 @@
if (ctx.stage & hw_vs) {
create_vs_exports(&ctx);
ctx.block->kind |= block_kind_export_end;
- } else if (ngg_no_gs && ngg_early_prim_export(&ctx)) {
- ngg_emit_nogs_output(&ctx);
+ } else if (ngg_no_gs && ctx.ngg_nogs_early_prim_export) {
+ ngg_nogs_export_vertices(&ctx);
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
Builder bld(ctx.program, ctx.block);
bld.barrier(aco_opcode::p_barrier,
@@ -11015,8 +11006,8 @@
end_divergent_if(&ctx, &ic_merged_wave_info);
}
- if (ngg_no_gs && !ngg_early_prim_export(&ctx))
- ngg_emit_nogs_output(&ctx);
+ if (ngg_no_gs && !ctx.ngg_nogs_early_prim_export)
+ ngg_nogs_late_export_finale(&ctx);
if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
/* Outputs of the previous stage are inputs to the next stage */
diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h
index 0fb9088..4e6a6b7 100644
--- a/src/amd/compiler/aco_instruction_selection.h
+++ b/src/amd/compiler/aco_instruction_selection.h
@@ -93,6 +93,7 @@
Temp persp_centroid, linear_centroid;
/* GS inputs */
+ bool ngg_nogs_early_prim_export = false;
Temp gs_wave_id;
/* VS output information */
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 3ed0dbc..020cebd 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -474,6 +474,13 @@
radv_vs_output_info *outinfo = &ctx->program->info->vs.outinfo;
setup_vs_output_info(ctx, nir, outinfo->export_prim_id,
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
+
+ /* TODO: NGG streamout */
+ if (ctx->stage & hw_ngg_gs)
+ assert(!ctx->args->shader_info->so.num_outputs);
+
+ /* TODO: check if the shader writes edge flags (not in Vulkan) */
+ ctx->ngg_nogs_early_prim_export = true;
} else if (ctx->stage == vertex_ls) {
ctx->tcs_num_inputs = ctx->program->info->vs.num_linked_outputs;
}
@@ -558,6 +565,13 @@
radv_vs_output_info *outinfo = &ctx->program->info->tes.outinfo;
setup_vs_output_info(ctx, nir, outinfo->export_prim_id,
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
+
+ /* TODO: NGG streamout */
+ if (ctx->stage & hw_ngg_gs)
+ assert(!ctx->args->shader_info->so.num_outputs);
+
+ /* Tess eval shaders can't write edge flags, so this can be always true. */
+ ctx->ngg_nogs_early_prim_export = true;
}
}