v3dv/pipeline: add basic ref counting support for variants

As soon as we start to cache variants on pipeline caches, the same
variant could be used by different pipelines and pipeline caches.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 89563d4..fb0747f 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -2740,6 +2740,9 @@
    assert(variant);
    assert(vk_result == VK_SUCCESS);
 
+   if (p_stage->current_variant != variant) {
+      v3dv_shader_variant_unref(cmd_buffer->device, p_stage->current_variant);
+   }
    p_stage->current_variant = variant;
 }
 
@@ -2768,6 +2771,9 @@
    assert(variant);
    assert(vk_result == VK_SUCCESS);
 
+   if (p_stage->current_variant != variant) {
+      v3dv_shader_variant_unref(cmd_buffer->device, p_stage->current_variant);
+   }
    p_stage->current_variant = variant;
 
    /* Now the vs_bin */
@@ -2787,6 +2793,9 @@
    assert(variant);
    assert(vk_result == VK_SUCCESS);
 
+   if (p_stage->current_variant != variant) {
+      v3dv_shader_variant_unref(cmd_buffer->device, p_stage->current_variant);
+   }
    p_stage->current_variant = variant;
 }
 
@@ -2814,6 +2823,9 @@
    assert(variant);
    assert(result == VK_SUCCESS);
 
+   if (p_stage->current_variant != variant) {
+      v3dv_shader_variant_unref(cmd_buffer->device, p_stage->current_variant);
+   }
    p_stage->current_variant = variant;
 }
 
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
index a509eed..ac71590 100644
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@@ -41,7 +41,6 @@
 
 #include "drm-uapi/v3d_drm.h"
 #include "format/u_format.h"
-#include "u_atomic.h"
 #include "vk_util.h"
 
 #include "util/build_id.h"
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 9ce41e2..9e77656 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -89,6 +89,16 @@
    vk_free2(&device->alloc, pAllocator, module);
 }
 
+void
+v3dv_shader_variant_destroy(struct v3dv_device *device,
+                            struct v3dv_shader_variant *variant)
+{
+   if (variant->assembly_bo)
+      v3dv_bo_free(device, variant->assembly_bo);
+   ralloc_free(variant->prog_data.base);
+   vk_free(&device->alloc, variant);
+}
+
 static void
 destroy_pipeline_stage(struct v3dv_device *device,
                        struct v3dv_pipeline_stage *p_stage,
@@ -99,18 +109,13 @@
 
    hash_table_foreach(p_stage->cache, entry) {
       struct v3dv_shader_variant *variant = entry->data;
-
-      if (variant->assembly_bo) {
-         v3dv_bo_free(device, variant->assembly_bo);
-         ralloc_free(variant->prog_data.base);
-         vk_free2(&device->alloc, pAllocator, variant);
-      }
+      if (variant)
+         v3dv_shader_variant_unref(device, variant);
    }
 
    ralloc_free(p_stage->nir);
-
+   v3dv_shader_variant_unref(device, p_stage->current_variant);
    _mesa_hash_table_destroy(p_stage->cache, NULL);
-
    vk_free2(&device->alloc, pAllocator, p_stage);
 }
 
@@ -1348,6 +1353,8 @@
  *
  * If the method returns NULL it means that it was not able to allocate the
  * resources for the variant. out_vk_result would return which OOM applies.
+ *
+ * Returns a new reference of the shader_variant to the caller.
  */
 struct v3dv_shader_variant*
 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
@@ -1361,6 +1368,7 @@
 
    if (entry) {
       *out_vk_result = VK_SUCCESS;
+      v3dv_shader_variant_ref(entry->data);
       return entry->data;
    }
 
@@ -1374,6 +1382,7 @@
       *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
       return NULL;
    }
+   variant->ref_cnt = 1;
 
    struct v3dv_physical_device *physical_device =
       &pipeline->device->instance->physicalDevice;
@@ -1409,7 +1418,7 @@
    } else {
       if (!upload_assembly(p_stage, variant, qpu_insts, qpu_insts_size)) {
          free(qpu_insts);
-         vk_free2(&device->alloc, pAllocator, variant);
+         v3dv_shader_variant_unref(device, variant);
 
          *out_vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
          return NULL;
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 143d62c..a58063e 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -58,6 +58,7 @@
 #include "util/set.h"
 #include "util/hash_table.h"
 #include "util/xmlconfig.h"
+#include "u_atomic.h"
 
 #include "v3dv_entrypoints.h"
 #include "v3dv_extensions.h"
@@ -1218,6 +1219,8 @@
 }
 
 struct v3dv_shader_variant {
+   uint32_t ref_cnt;
+
    union {
       struct v3d_prog_data *base;
       struct v3d_vs_prog_data *vs;
@@ -1714,6 +1717,26 @@
                         const VkAllocationCallbacks *pAllocator,
                         VkResult *out_vk_result);
 
+void
+v3dv_shader_variant_destroy(struct v3dv_device *device,
+                            struct v3dv_shader_variant *variant);
+
+static inline void
+v3dv_shader_variant_ref(struct v3dv_shader_variant *variant)
+{
+   assert(variant && variant->ref_cnt >= 1);
+   p_atomic_inc(&variant->ref_cnt);
+}
+
+static inline void
+v3dv_shader_variant_unref(struct v3dv_device *device,
+                          struct v3dv_shader_variant *variant)
+{
+   assert(variant && variant->ref_cnt >= 1);
+   if (p_atomic_dec_zero(&variant->ref_cnt))
+      v3dv_shader_variant_destroy(device, variant);
+}
+
 struct v3dv_descriptor *
 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
                                    struct v3dv_descriptor_map *map,
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c
index 9e3b784..a5a7164 100644
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -26,8 +26,6 @@
 
 #include "broadcom/clif/clif_dump.h"
 
-#include "u_atomic.h"
-
 #include <errno.h>
 #include <time.h>