freedreno/a6xx: Rename and document HLSQ_UPDATE_CNTL

It turns out that this clears CP_LOAD_STATE6 packets, including
disabling any pending loads for SS6_INDIRECT/SS6_BINDLESS (these loads
don't actually happen until the draw itself, and I'm not sure if they
happen if the state is unused by the shader) and marking constants and
UBO descriptors loaded with SS6_DIRECT as invalid. It's used very
differently from HLSQ_UPDATE_CNTL on a4xx from whence the name came, and
unlike on a4xx it's not readable, so this probably doesn't line up with
HLSQ_UPDATE_CNTL on a4xx.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5877>
diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c
index 72b0f06..21d7d1c 100644
--- a/src/freedreno/computerator/a6xx.c
+++ b/src/freedreno/computerator/a6xx.c
@@ -117,8 +117,15 @@
 	const struct ir3_info *i = &v->info;
 	enum a3xx_threadsize thrsz = FOUR_QUADS;
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xff);
+	OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1);
+	OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_HS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_DS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_GS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_FS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_CS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_CS_IBO |
+                   A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO);
 
 	unsigned constlen = align(v->constlen, 4);
 	OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml
index 56e1746..f821832 100644
--- a/src/freedreno/registers/a6xx.xml
+++ b/src/freedreno/registers/a6xx.xml
@@ -3408,8 +3408,31 @@
 		<bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/>
 	</reg32>
 
-	<!-- probably: -->
-	<reg32 offset="0xbb08" name="HLSQ_UPDATE_CNTL"/>
+	<reg32 offset="0xbb08" name="HLSQ_INVALIDATE_CMD">
+		<doc>
+			This register clears pending loads queued up by
+			CP_LOAD_STATE6. Each bit resets a particular kind(s) of
+			CP_LOAD_STATE6.
+		</doc>
+
+		<!-- per-stage state: shader, non-bindless UBO, textures, and samplers -->
+		<bitfield name="VS_STATE" pos="0" type="boolean"/>
+		<bitfield name="HS_STATE" pos="1" type="boolean"/>
+		<bitfield name="DS_STATE" pos="2" type="boolean"/>
+		<bitfield name="GS_STATE" pos="3" type="boolean"/>
+		<bitfield name="FS_STATE" pos="4" type="boolean"/>
+		<bitfield name="CS_STATE" pos="5" type="boolean"/>
+
+		<bitfield name="CS_IBO" pos="6" type="boolean"/>
+		<bitfield name="GFX_IBO" pos="7" type="boolean"/>
+
+		<bitfield name="CS_SHARED_CONST" pos="19" type="boolean"/>
+		<bitfield name="GFX_SHARED_CONST" pos="8" type="boolean"/>
+
+		<!-- SS6_BINDLESS: one bit per bindless base -->
+		<bitfield name="CS_BINDLESS" low="9" high="13" type="hex"/>
+		<bitfield name="GFX_BINDLESS" low="14" high="18" type="hex"/>
+	</reg32>
 
 	<reg32 offset="0xbb10" name="HLSQ_FS_CNTL" type="a6xx_hlsq_xs_cntl"/>
 
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index 29b9525..2be3e38 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -428,7 +428,18 @@
       .const_state = &dummy_const_state,
    };
 
-   tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .gfx_ibo = true,
+         .cs_ibo = true,
+         .gfx_shared_const = true,
+         .gfx_bindless = 0x1f,
+         .cs_bindless = 0x1f));
 
    tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS]));
    tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0);
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 765732a..d1145bf 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -719,7 +719,19 @@
 
    tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
 
-   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .gfx_ibo = true,
+         .cs_ibo = true,
+         .gfx_shared_const = true,
+         .cs_shared_const = true,
+         .gfx_bindless = 0x1f,
+         .cs_bindless = 0x1f));
 
    tu_cs_emit_regs(cs,
                    A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
@@ -1684,7 +1696,7 @@
    }
    assert(dyn_idx == dynamicOffsetCount);
 
-   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_update_value;
+   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
    uint64_t addr[MAX_SETS + 1] = {};
    struct tu_cs cs;
 
@@ -1709,7 +1721,7 @@
    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
       sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
       hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
-      hlsq_update_value = 0x7c000;
+      hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
 
       cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS;
    } else {
@@ -1717,7 +1729,7 @@
 
       sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
       hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
-      hlsq_update_value = 0x3e00;
+      hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
 
       cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
    }
@@ -1728,7 +1740,7 @@
    tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
    tu_cs_emit_pkt4(&cs, hlsq_bindless_base_reg, 10);
    tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
-   tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(.dword = hlsq_update_value));
+   tu_cs_emit_regs(&cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value));
 
    struct tu_cs_entry ib = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 88cdca7..84cb9c4 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -468,8 +468,15 @@
                    const struct ir3_shader_variant *v,
                    uint32_t binary_iova)
 {
-   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-   tu_cs_emit(cs, 0xff);
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .cs_ibo = true,
+         .gfx_ibo = true));
 
    tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, binary_iova);
 
@@ -1355,8 +1362,15 @@
 
    STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-   tu_cs_emit(cs, 0xff); /* XXX */
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .cs_ibo = true,
+         .gfx_ibo = true));
 
   /* Don't use the binning pass variant when GS is present because we don't
    * support compiling correct binning pass variants with GS.
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
index 4385576..75d4b96 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@@ -34,6 +34,7 @@
 #include "fd6_const.h"
 #include "fd6_context.h"
 #include "fd6_emit.h"
+#include "fd6_pack.h"
 
 struct fd6_compute_stateobj {
 	struct ir3_shader *shader;
@@ -78,8 +79,16 @@
 	const struct ir3_info *i = &v->info;
 	enum a3xx_threadsize thrsz = FOUR_QUADS;
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xff);
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+		));
 
 	OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
 	OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index 4fa32b5..ab8fdea 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -356,8 +356,19 @@
 	OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
 	OUT_RING(ring, fd6_ctx->magic.RB_CCU_CNTL_bypass);
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0x7ffff);
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+			.gfx_shared_const = true,
+			.gfx_bindless = 0x1f,
+			.cs_bindless = 0x1f
+		));
 
 	emit_marker6(ring, 7);
 	OUT_PKT7(ring, CP_SET_MARKER, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index f20666c..4740f60 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -1130,8 +1130,20 @@
 
 	fd6_cache_inv(batch, ring);
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xfffff);
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+			.gfx_shared_const = true,
+			.cs_shared_const = true,
+			.gfx_bindless = 0x1f,
+			.cs_bindless = 0x1f
+		));
 
 	OUT_WFI5(ring);
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 72a47c1..4ee227b 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -39,6 +39,7 @@
 #include "fd6_emit.h"
 #include "fd6_texture.h"
 #include "fd6_format.h"
+#include "fd6_pack.h"
 
 void
 fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
@@ -225,8 +226,16 @@
 static void
 setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state)
 {
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xff);        /* XXX */
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+		));
 
 	debug_assert(state->vs->constlen >= state->bs->constlen);