v3dv: don't cache subpass color clear pipelines Subpass color clear pipelines are those used to emit partial attachment clears as draw calls inside the render pass currently bound by the application in the command buffer, leading to a huge performance improvement compared to the case where we emit them in their own render pass. Unfortunately, because the pipeline references the render pass object in which it is used and the render pass object is owned by the application (and can be destroyed at any point), we can't cache these pipelines (unless we implement a refcounting mechanism or other similar strategy). Performance impact looks negligible based on experiments with vkQuake3, probably because the underlying pipeline cache is preventing the redundant shader recompiles. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>

commit: db0bb36ace1315839181bea6d01b8f89a0f2a408 [log] [tgz]
author: Iago Toral Quiroga <itoral@igalia.com> Tue Aug 25 14:25:45 2020 +0200
committer: Marge Bot <eric+marge@anholt.net> Tue Oct 13 21:21:33 2020 +0000
tree: 03550ae6294dfae56d365442228965be4561d991
parent: 07828c0456fd446c9664cd04d70acb2bfaafce22 [diff]
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
index 00a1747..fdad296 100644
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c

@@ -1249,6 +1249,17 @@
    init_meta_blit_resources(device);
 }
 
+void
+v3dv_meta_color_clear_pipeline_destroy(VkDevice _device,
+                                       struct v3dv_meta_color_clear_pipeline *p,
+                                       VkAllocationCallbacks *alloc)
+{
+   v3dv_DestroyPipeline(_device, p->pipeline, alloc);
+   if (p->cached)
+      v3dv_DestroyRenderPass(_device, p->pass, alloc);
+   vk_free(alloc, p);
+}
+
 static void
 destroy_device_meta(struct v3dv_device *device)
 {
@@ -1258,10 +1269,8 @@
 
    hash_table_foreach(device->meta.color_clear.cache, entry) {
       struct v3dv_meta_color_clear_pipeline *item = entry->data;
-      v3dv_DestroyPipeline(_device, item->pipeline, &device->alloc);
-      if (item->free_render_pass)
-         v3dv_DestroyRenderPass(_device, item->pass, &device->alloc);
-      vk_free(&device->alloc, item);
+      v3dv_meta_color_clear_pipeline_destroy(v3dv_device_to_handle(device),
+                                             item, &device->alloc);
    }
    _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
 

diff --git a/src/broadcom/vulkan/v3dv_meta_clear.c b/src/broadcom/vulkan/v3dv_meta_clear.c
index 26e7e8f..bb01b6d 100644
--- a/src/broadcom/vulkan/v3dv_meta_clear.c
+++ b/src/broadcom/vulkan/v3dv_meta_clear.c

@@ -529,15 +529,33 @@
    if (result != VK_SUCCESS)
       return result;
 
-   const uint64_t key =
-      get_color_clear_pipeline_cache_key(rt_idx, format, samples, components);
-   mtx_lock(&device->meta.mtx);
-   struct hash_entry *entry =
-      _mesa_hash_table_search(device->meta.color_clear.cache, &key);
-   if (entry) {
-      mtx_unlock(&device->meta.mtx);
-      *pipeline = entry->data;
-      return VK_SUCCESS;
+   /* If pass != NULL it means that we are emitting the clear as a draw call
+    * in the current pass bound by the application. In that case, we can't
+    * cache the pipeline, since it will be referencing that pass and the
+    * application could be destroying it at any point. Hopefully, the perf
+    * impact is not too big since we still have the device pipeline cache
+    * around and we won't end up re-compiling the clear shader.
+    *
+    * FIXME: alternatively, we could refcount (or maybe clone) the render pass
+    * provided by the application and include it in the pipeline key setup
+    * to make caching safe in this scenario, however, based on tests with
+    * vkQuake3, the fact that we are not caching here doesn't seem to have
+    * any significant impact in performance, so it might not be worth it.
+    */
+   const bool can_cache_pipeline = (pass == NULL);
+
+   uint64_t key;
+   if (can_cache_pipeline) {
+      key =
+         get_color_clear_pipeline_cache_key(rt_idx, format, samples, components);
+      mtx_lock(&device->meta.mtx);
+      struct hash_entry *entry =
+         _mesa_hash_table_search(device->meta.color_clear.cache, &key);
+      if (entry) {
+         mtx_unlock(&device->meta.mtx);
+         *pipeline = entry->data;
+         return VK_SUCCESS;
+      }
    }
 
    *pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8,
@@ -557,7 +575,6 @@
       if (result != VK_SUCCESS)
          goto fail;
 
-      (*pipeline)->free_render_pass = true;
       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
    } else {
       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
@@ -575,19 +592,24 @@
    if (result != VK_SUCCESS)
       goto fail;
 
-   (*pipeline)->key = key;
-   _mesa_hash_table_insert(device->meta.color_clear.cache,
-                           &(*pipeline)->key, *pipeline);
+   if (can_cache_pipeline) {
+      (*pipeline)->key = key;
+      (*pipeline)->cached = true;
+      _mesa_hash_table_insert(device->meta.color_clear.cache,
+                              &(*pipeline)->key, *pipeline);
 
-   mtx_unlock(&device->meta.mtx);
+      mtx_unlock(&device->meta.mtx);
+   }
+
    return VK_SUCCESS;
 
 fail:
-   mtx_unlock(&device->meta.mtx);
+   if (can_cache_pipeline)
+      mtx_unlock(&device->meta.mtx);
 
    VkDevice _device = v3dv_device_to_handle(device);
    if (*pipeline) {
-      if ((*pipeline)->free_render_pass)
+      if ((*pipeline)->cached)
          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->alloc);
       if ((*pipeline)->pipeline)
          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc);
@@ -740,6 +762,12 @@
    }
    assert(pipeline && pipeline->pipeline && pipeline->pass);
 
+   /* Since we are not emitting the draw call in the current subpass we should
+    * be caching the clear pipeline and we don't have to take care of destorying
+    * it below.
+    */
+   assert(pipeline->cached);
+
    /* Store command buffer state for the current subpass before we interrupt
     * it to emit the color clear pass and then finish the job for the
     * interrupted subpass.
@@ -1002,6 +1030,15 @@
       v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
    }
 
+   /* Subpass pipelines can't be cached because they include a reference to the
+    * render pass currently bound by the application, which means that we need
+    * to destroy them manually here.
+    */
+   assert(!pipeline->cached);
+   v3dv_cmd_buffer_add_private_obj(
+      cmd_buffer, (uintptr_t)pipeline,
+      (v3dv_cmd_buffer_private_obj_destroy_cb) v3dv_meta_color_clear_pipeline_destroy);
+
    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
 }
 

diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index aea3789..218c1ad 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h

@@ -233,7 +233,7 @@
 struct v3dv_meta_color_clear_pipeline {
    VkPipeline pipeline;
    VkRenderPass pass;
-   bool free_render_pass;
+   bool cached;
    uint64_t key;
 };
 
@@ -1859,6 +1859,11 @@
 void v3dv_shader_module_internal_init(struct v3dv_shader_module *module,
                                       nir_shader *nir);
 
+void
+v3dv_meta_color_clear_pipeline_destroy(VkDevice _device,
+                                       struct v3dv_meta_color_clear_pipeline *p,
+                                       VkAllocationCallbacks *alloc);
+
 #define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType)   \
                                                         \
    static inline struct __v3dv_type *                    \
commit	db0bb36ace1315839181bea6d01b8f89a0f2a408	[log] [tgz]
author	Iago Toral Quiroga <itoral@igalia.com>	Tue Aug 25 14:25:45 2020 +0200
committer	Marge Bot <eric+marge@anholt.net>	Tue Oct 13 21:21:33 2020 +0000
tree	03550ae6294dfae56d365442228965be4561d991
parent	07828c0456fd446c9664cd04d70acb2bfaafce22 [diff]