v3dv: vkCmdCopyBufferToImage for depth/stencil formats
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c
index 9d53854..2fd1ffb 100644
--- a/src/broadcom/vulkan/v3dv_meta_copy.c
+++ b/src/broadcom/vulkan/v3dv_meta_copy.c
@@ -91,16 +91,18 @@
/* This chooses a tile buffer format that is appropriate for the copy operation.
* Typically, this is the image render target type, however, if we are copying
- * depth/stencil to a buffer the hardware can't do raster stores, so we need to
- * load and store to/from a tile color buffer using a compatible color format.
+ * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
+ * we need to load and store to/from a tile color buffer using a compatible
+ * color format.
*/
static uint32_t
choose_tlb_format(struct v3dv_image *image,
VkImageAspectFlags aspect,
bool for_store,
- bool is_copy_to_buffer)
+ bool is_copy_to_buffer,
+ bool is_copy_from_buffer)
{
- if (is_copy_to_buffer) {
+ if (is_copy_to_buffer || is_copy_from_buffer) {
switch (image->vk_format) {
case VK_FORMAT_D16_UNORM:
return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
@@ -115,13 +117,30 @@
* store outputs. For the load input we still want RGBA8UI since the
* source image contains 4 channels (including the 3 channels
* containing the 24-bit depth value).
+ *
+ * When loading the stencil aspect of a combined depth/stencil image
+ * from a buffer, we read packed 8-bit stencil values from the buffer
+ * that we need to put into the LSB of the 32-bit format (the R
+ * channel), so we use R8UI. For the store, if we used R8UI then we
+ * would write 8-bit stencil values consecutively over depth channels,
+ * so we need to use RGBA8UI. This will write each stencil value in
+ * its correct position, but will overwrite depth values (channels G
+ * B,A) with undefined values. To fix this, we will have to restore
+ * the depth aspect from the Z tile buffer, which we should pre-load
+ * from the image before the store).
*/
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
} else {
assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
- return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
- V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
+ if (is_copy_to_buffer) {
+ return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
+ V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
+ } else {
+ assert(is_copy_from_buffer);
+ return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
+ V3D_OUTPUT_IMAGE_FORMAT_R8UI;
+ }
}
default: /* Color formats */
return image->format->rt_type;
@@ -379,15 +398,16 @@
VkImageAspectFlags aspect,
uint32_t layer,
uint32_t mip_level,
- bool is_copy_to_buffer)
+ bool is_copy_to_buffer,
+ bool is_copy_from_buffer)
{
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
- /* For image to buffer copies we always load to and store from RT0, even
- * for depth/stencil aspects, because the hardware can't do raster stores
- * from the depth/stencil tile buffers.
+ /* For image to/from buffer copies we always load to and store from RT0,
+ * even for depth/stencil aspects, because the hardware can't do raster
+ * stores or loads from/to the depth/stencil tile buffers.
*/
- bool load_to_color_tlb = is_copy_to_buffer ||
+ bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
@@ -397,8 +417,9 @@
load.address = v3dv_cl_address(image->mem->bo, layer_offset);
- load.input_image_format =
- choose_tlb_format(image, aspect, false, is_copy_to_buffer);
+ load.input_image_format = choose_tlb_format(image, aspect, false,
+ is_copy_to_buffer,
+ is_copy_from_buffer);
load.memory_format = slice->tiling;
/* When copying depth/stencil images to a buffer, for D24 formats Vulkan
@@ -440,8 +461,6 @@
else
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
}
-
- cl_emit(cl, END_OF_LOADS, end);
}
static void
@@ -450,11 +469,12 @@
VkImageAspectFlags aspect,
uint32_t layer,
uint32_t mip_level,
- bool is_copy_to_buffer)
+ bool is_copy_to_buffer,
+ bool is_copy_from_buffer)
{
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
- bool store_from_color_tlb = is_copy_to_buffer ||
+ bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
@@ -465,8 +485,19 @@
store.address = v3dv_cl_address(image->mem->bo, layer_offset);
store.clear_buffer_being_stored = false;
- store.output_image_format =
- choose_tlb_format(image, aspect, true, is_copy_to_buffer);
+ /* See rationale in emit_image_load() */
+ if (is_copy_from_buffer) {
+ if (image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
+ (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
+ (aspect & VK_IMAGE_ASPECT_DEPTH_BIT))) {
+ store.r_b_swap = true;
+ store.channel_reverse = true;
+ }
+ }
+
+ store.output_image_format = choose_tlb_format(image, aspect, true,
+ is_copy_to_buffer,
+ is_copy_from_buffer);
store.memory_format = slice->tiling;
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
slice->tiling == VC5_TILING_UIF_XOR) {
@@ -502,7 +533,9 @@
/* Load image to TLB */
emit_image_load(cl, image, imgrsc->aspectMask,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
- true);
+ true, false);
+
+ cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
@@ -528,7 +561,8 @@
uint32_t buffer_offset =
region->bufferOffset + height * buffer_stride * layer;
- uint32_t format = choose_tlb_format(image, imgrsc->aspectMask, true, true);
+ uint32_t format = choose_tlb_format(image, imgrsc->aspectMask,
+ true, true, false);
bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
@@ -667,7 +701,9 @@
emit_image_load(cl, src, srcrsc->aspectMask,
srcrsc->baseArrayLayer + layer, srcrsc->mipLevel,
- false);
+ false, false);
+
+ cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
@@ -675,7 +711,8 @@
assert(layer < dstrsc->layerCount);
emit_image_store(cl, dst, dstrsc->aspectMask,
- dstrsc->baseArrayLayer + layer, dstrsc->mipLevel, false);
+ dstrsc->baseArrayLayer + layer, dstrsc->mipLevel,
+ false, false);
cl_emit(cl, END_OF_TILE_MARKER, end);
@@ -813,7 +850,7 @@
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
- emit_image_store(cl, image, aspects, layer, level, false);
+ emit_image_store(cl, image, aspects, layer, level, false, false);
cl_emit(cl, END_OF_TILE_MARKER, end);
@@ -1316,22 +1353,64 @@
else
height = region->bufferImageHeight;
- uint32_t buffer_stride = width * image->cpp;
+ uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
+ 1 : image->cpp;
+ uint32_t buffer_stride = width * cpp;
uint32_t buffer_offset =
region->bufferOffset + height * buffer_stride * layer;
- uint32_t format = choose_tlb_format(image, imgrsc->aspectMask, true, false);
+ uint32_t format = choose_tlb_format(image, imgrsc->aspectMask,
+ false, false, true);
emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
buffer_offset, buffer_stride, format);
+ /* Because we can't do raster loads/stores of Z/S formats we need to
+ * use a color tile buffer with a compatible RGBA color format instead.
+ * However, when we are uploading a single aspect to a combined
+ * depth/stencil image we have the problem that our tile buffer stores don't
+ * allow us to mask out the other aspect, so we always write all four RGBA
+ * channels to the image and we end up overwriting that other aspect with
+ * undefined values. To work around that, we first load the aspect we are
+ * not copying from the image memory into a proper Z/S tile buffer. Then we
+ * do our store from the color buffer for the aspect we are copying, and
+ * after that, we do another store from the Z/S tile buffer to restore the
+ * other aspect to its original value.
+ */
+ if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ emit_image_load(cl, image, VK_IMAGE_ASPECT_STENCIL_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false);
+ } else {
+ assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
+ emit_image_load(cl, image, VK_IMAGE_ASPECT_DEPTH_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false);
+ }
+ }
+
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
- /* Store TLB to image */
+ /* Store TLB to image */
emit_image_store(cl, image, imgrsc->aspectMask,
- imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, false);
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, true);
+
+ if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ emit_image_store(cl, image, VK_IMAGE_ASPECT_STENCIL_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false);
+ } else {
+ assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
+ emit_image_store(cl, image, VK_IMAGE_ASPECT_DEPTH_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false);
+ }
+ }
cl_emit(cl, END_OF_TILE_MARKER, end);