v3dv/pipeline: when looking for a variant, check first current variant

So far, when checking for a variant fulfilling a specific v3d key, we
were checking the caches, and if that failed, we compiled a new
variant, and update the current variant.

But we could check first if the current variant fullfils that. This
was not really problematic so far, as checking on the caches was fast,
but now that we could be without any kind of shader cache using
V3DV_ENABLE_PIPELINE_CACHE, it is far better to check first current
variant.

Without this vkQuake3 at 720p drops to 1fps when disabling the cache.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 9fb31c5..bccc3d8 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -1372,6 +1372,8 @@
                            gl_shader_stage stage,
                            bool is_coord,
                            const unsigned char *variant_sha1,
+                           const struct v3d_key *key,
+                           uint32_t key_size,
                            struct v3d_prog_data *prog_data,
                            uint32_t prog_data_size,
                            const uint64_t *qpu_insts,
@@ -1390,6 +1392,8 @@
    variant->ref_cnt = 1;
    variant->stage = stage;
    variant->is_coord = is_coord;
+   memcpy(&variant->key, key, key_size);
+   variant->v3d_key_size = key_size;
    memcpy(variant->variant_sha1, variant_sha1, sizeof(variant->variant_sha1));
    variant->prog_data_size = prog_data_size;
    variant->prog_data.base = prog_data;
@@ -1428,17 +1432,27 @@
                         const VkAllocationCallbacks *pAllocator,
                         VkResult *out_vk_result)
 {
-   /* First we search on the pipeline cache if provided by the user, or the
-    * default one
+   /* First we check if the current pipeline variant is such variant. For this
+    * we can just use the v3d_key
     */
+
+   if (p_stage->current_variant &&
+       memcmp(key, &p_stage->current_variant->key, key_size) == 0) {
+      *out_vk_result = VK_SUCCESS;
+      return p_stage->current_variant;
+   }
+
+   /* We search on the pipeline cache if provided by the user, or the default
+    * one
+    */
+   unsigned char variant_sha1[20];
+   pipeline_hash_variant(p_stage, key, key_size, variant_sha1);
+
    struct v3dv_pipeline *pipeline = p_stage->pipeline;
    struct v3dv_device *device = pipeline->device;
    if (cache == NULL && device->instance->pipeline_cache_enabled)
        cache = &device->default_pipeline_cache;
 
-   unsigned char variant_sha1[20];
-   pipeline_hash_variant(p_stage, key, key_size, variant_sha1);
-
    struct v3dv_shader_variant *variant =
       v3dv_pipeline_cache_search_for_variant(pipeline,
                                              cache,
@@ -1488,6 +1502,7 @@
 
    variant = v3dv_shader_variant_create(device, p_stage->stage, p_stage->is_coord,
                                         variant_sha1,
+                                        key, key_size,
                                         prog_data, v3d_prog_data_size(p_stage->stage),
                                         qpu_insts, qpu_insts_size,
                                         out_vk_result);
diff --git a/src/broadcom/vulkan/v3dv_pipeline_cache.c b/src/broadcom/vulkan/v3dv_pipeline_cache.c
index d3a58c2..de7da8e 100644
--- a/src/broadcom/vulkan/v3dv_pipeline_cache.c
+++ b/src/broadcom/vulkan/v3dv_pipeline_cache.c
@@ -310,6 +310,9 @@
    gl_shader_stage stage = blob_read_uint32(blob);
    bool is_coord = blob_read_uint8(blob);
 
+   uint32_t v3d_key_size = blob_read_uint32(blob);
+   const struct v3d_key *v3d_key = blob_read_bytes(blob, v3d_key_size);
+
    const unsigned char *variant_sha1 = blob_read_bytes(blob, 20);
 
    uint32_t prog_data_size = blob_read_uint32(blob);
@@ -352,6 +355,7 @@
 
    return v3dv_shader_variant_create(device, stage, is_coord,
                                      variant_sha1,
+                                     v3d_key, v3d_key_size,
                                      new_prog_data, prog_data_size,
                                      qpu_insts, qpu_insts_size,
                                      &result);
@@ -578,6 +582,9 @@
    blob_write_uint32(blob, variant->stage);
    blob_write_uint8(blob, variant->is_coord);
 
+   blob_write_uint32(blob, variant->v3d_key_size);
+   blob_write_bytes(blob, &variant->key, variant->v3d_key_size);
+
    blob_write_bytes(blob, variant->variant_sha1, sizeof(variant->variant_sha1));
 
    blob_write_uint32(blob, variant->prog_data_size);
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index d418c1c..78c9594 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -1229,6 +1229,16 @@
    gl_shader_stage stage;
    bool is_coord;
 
+   /* v3d_key used to compile the variant. Sometimes we can just skip the
+    * pipeline caches, and look using this.
+    */
+   union {
+      struct v3d_key base;
+      struct v3d_vs_key vs;
+      struct v3d_fs_key fs;
+   } key;
+   uint32_t v3d_key_size;
+
    /* key for the pipeline cache, it is p_stage shader_sha1 + v3d compiler
     * sha1
     */
@@ -1737,6 +1747,8 @@
                            gl_shader_stage stage,
                            bool is_coord,
                            const unsigned char *variant_sha1,
+                           const struct v3d_key *key,
+                           uint32_t key_size,
                            struct v3d_prog_data *prog_data,
                            uint32_t prog_data_size,
                            const uint64_t *qpu_insts,