tu: Implement clip/cull distances
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index a94e391..c1eb9e8 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -423,8 +423,8 @@
.shaderStorageImageArrayDynamicIndexing = true,
.shaderStorageImageReadWithoutFormat = true,
.shaderStorageImageWriteWithoutFormat = true,
- .shaderClipDistance = false,
- .shaderCullDistance = false,
+ .shaderClipDistance = true,
+ .shaderCullDistance = true,
.shaderFloat64 = false,
.shaderInt64 = false,
.shaderInt16 = false,
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index c1e6000..a855627 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -803,7 +803,11 @@
const struct reg_config *cfg = ®_config[last_shader->type];
- struct ir3_shader_linkage linkage = { .primid_loc = 0xff };
+ struct ir3_shader_linkage linkage = {
+ .primid_loc = 0xff,
+ .clip0_loc = 0xff,
+ .clip1_loc = 0xff,
+ };
if (fs)
ir3_link_shaders(&linkage, last_shader, fs, true);
@@ -829,6 +833,10 @@
ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER);
const uint32_t view_regid =
ir3_find_output_regid(last_shader, VARYING_SLOT_VIEWPORT);
+ const uint32_t clip0_regid =
+ ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0);
+ const uint32_t clip1_regid =
+ ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
uint32_t primitive_regid = gs ?
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
uint32_t flags_regid = gs ?
@@ -865,6 +873,19 @@
ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc);
}
+ uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask;
+
+ /* Handle the case where clip/cull distances aren't read by the FS */
+ uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc;
+ if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) {
+ clip0_loc = linkage.max_loc;
+ ir3_link_add(&linkage, clip0_regid, clip_cull_mask & 0xf, linkage.max_loc);
+ }
+ if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) {
+ clip1_loc = linkage.max_loc;
+ ir3_link_add(&linkage, clip1_regid, clip_cull_mask >> 4, linkage.max_loc);
+ }
+
tu6_setup_streamout(cs, last_shader, &linkage);
/* The GPU hangs on some models when there are no outputs (xs_pack::CNT),
@@ -902,17 +923,21 @@
A6XX_VPC_VS_PACK_EXTRAPOS(extra_pos));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1);
- tu_cs_emit(cs, 0xffff00);
+ tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
+ A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
+ A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1);
- tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) |
+ A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask));
tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1);
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
- CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
+ CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
+ A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
@@ -2052,6 +2077,7 @@
key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);
+ key.ucp_enables = MASK(last_shader->ir3_shader->nir->info.clip_distance_array_size);
pipeline->tess.patch_type = key.tessellation;
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index ad5c7c3..9b4b9f8 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -172,6 +172,8 @@
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
+ NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+
NIR_PASS_V(nir, nir_lower_frexp);
ir3_optimize_loop(nir);