radeonsi: kill disabled clip distances and planes at per-channel granularity
Apps often enable only 1 plane for gl_ClipVertex, which means 1 scalar
clip distance.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6948>
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8e688cd..888a731 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1290,7 +1290,7 @@
stage == MESA_SHADER_VERTEX) &&
!key->as_es && !key->as_ls) {
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->opt.kill_outputs);
- fprintf(f, " opt.clip_disable = %u\n", key->opt.clip_disable);
+ fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->opt.kill_clip_distances);
if (stage != MESA_SHADER_GEOMETRY)
fprintf(f, " opt.ngg_culling = 0x%x\n", key->opt.ngg_culling);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index a8aba0b..4985ce6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -647,8 +647,8 @@
struct {
/* For HW VS (it can be VS, TES, GS) */
uint64_t kill_outputs; /* "get_unique_index" bits */
+ unsigned kill_clip_distances : 8;
unsigned kill_pointsize : 1;
- unsigned clip_disable : 1;
/* For NGG VS and TES. */
unsigned ngg_culling : 5; /* SI_NGG_CULL_* */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
index 96313d1..d996ccc 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -372,20 +372,29 @@
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
+ unsigned clipdist_mask = ctx->shader->selector->clipdist_mask &
+ ~ctx->shader->key.opt.kill_clip_distances;
for (reg_index = 0; reg_index < 2; reg_index++) {
struct ac_export_args *args = &pos[2 + reg_index];
- args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMConstReal(ctx->ac.f32, 0.0f);
+ if (!(clipdist_mask & BITFIELD_RANGE(reg_index * 4, 4)))
+ continue;
+
+ args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMGetUndef(ctx->ac.f32);
/* Compute dot products of position and user clip plane vectors */
for (chan = 0; chan < 4; chan++) {
+ if (!(clipdist_mask & BITFIELD_BIT(reg_index * 4 + chan)))
+ continue;
+
for (const_chan = 0; const_chan < 4; const_chan++) {
LLVMValueRef addr =
LLVMConstInt(ctx->ac.i32, ((reg_index * 4 + chan) * 4 + const_chan) * 4, 0);
base_elt = si_buffer_load_const(ctx, const_resource, addr);
args->out[chan] =
- ac_build_fmad(&ctx->ac, base_elt, out_elts[const_chan], args->out[chan]);
+ ac_build_fmad(&ctx->ac, base_elt, out_elts[const_chan],
+ const_chan == 0 ? ctx->ac.f32_0 : args->out[chan]);
}
}
@@ -541,7 +550,10 @@
struct ac_export_args pos_args[4] = {};
LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL,
viewport_index_value = NULL;
- unsigned pos_idx;
+ unsigned pos_idx, index;
+ unsigned clipdist_mask = (shader->selector->clipdist_mask &
+ ~shader->key.opt.kill_clip_distances) |
+ shader->selector->culldist_mask;
int i;
si_vertex_color_clamping(ctx, outputs, noutput);
@@ -566,16 +578,14 @@
break;
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
- if (!shader->key.opt.clip_disable) {
- unsigned index = 2 + (outputs[i].semantic - VARYING_SLOT_CLIP_DIST0);
- si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + index,
- &pos_args[index]);
+ index = outputs[i].semantic - VARYING_SLOT_CLIP_DIST0;
+ if (clipdist_mask & BITFIELD_RANGE(index * 4, 4)) {
+ si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + 2 + index,
+ &pos_args[2 + index]);
}
break;
case VARYING_SLOT_CLIP_VERTEX:
- if (!shader->key.opt.clip_disable) {
- si_llvm_emit_clipvertex(ctx, pos_args, outputs[i].values);
- }
+ si_llvm_emit_clipvertex(ctx, pos_args, outputs[i].values);
break;
}
}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f13ca4f..36d05cd 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -737,14 +737,7 @@
unsigned clipdist_mask = vs_sel->clipdist_mask;
unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
unsigned culldist_mask = vs_sel->culldist_mask;
- unsigned total_mask;
-
- if (vs->key.opt.clip_disable) {
- assert(!info->base.cull_distance_array_size);
- clipdist_mask = 0;
- culldist_mask = 0;
- }
- total_mask = clipdist_mask | culldist_mask;
+ unsigned vs_out_mask = (clipdist_mask & ~vs->key.opt.kill_clip_distances) | culldist_mask;
/* Clip distances on points have no effect, so need to be implemented
* as cull distances. This applies for the clipvertex case as well.
@@ -756,8 +749,8 @@
culldist_mask |= clipdist_mask;
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
- unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+ unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |
S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
clipdist_mask | (culldist_mask << 8);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 967f6de..b5ce55a 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1774,9 +1774,7 @@
{
struct si_shader_selector *ps = sctx->ps_shader.cso;
- key->opt.clip_disable = sctx->queued.named.rasterizer->clip_plane_enable == 0 &&
- (vs->info.base.clip_distance_array_size || vs->info.writes_clipvertex) &&
- !vs->info.base.cull_distance_array_size;
+ key->opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
/* Find out if PS is disabled. */
bool ps_disabled = true;
@@ -2920,7 +2918,7 @@
old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask ||
old_hw_vs->culldist_mask != next_hw_vs->culldist_mask || !old_hw_vs_variant ||
!next_hw_vs_variant ||
- old_hw_vs_variant->key.opt.clip_disable != next_hw_vs_variant->key.opt.clip_disable))
+ old_hw_vs_variant->key.opt.kill_clip_distances != next_hw_vs_variant->key.opt.kill_clip_distances))
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
}
@@ -3862,7 +3860,7 @@
struct si_compiler_ctx_state compiler_state;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct si_shader *old_vs = si_get_vs_state(sctx);
- bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false;
+ unsigned old_kill_clip_distances = old_vs ? old_vs->key.opt.kill_clip_distances : 0;
struct si_shader *old_ps = sctx->ps_shader.current;
union si_vgt_stages_key key;
unsigned old_spi_shader_col_format =
@@ -3988,7 +3986,7 @@
si_update_vgt_shader_config(sctx, key);
- if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)
+ if (old_kill_clip_distances != si_get_vs_state(sctx)->key.opt.kill_clip_distances)
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
if (sctx->ps_shader.cso) {