blob: 7c3bf3678c3527da6c8b8aecc4ec1de2fa514fba [file] [log] [blame]
/*
* Copyright © 2019 Raspberry Pi
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "v3dv_private.h"
#include "compiler/nir/nir_builder.h"
#include "broadcom/cle/v3dx_pack.h"
#include "vk_format_info.h"
#include "util/u_pack_color.h"
static uint32_t
meta_blit_key_hash(const void *key)
{
return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
}
static bool
meta_blit_key_compare(const void *key1, const void *key2)
{
return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
}
void
v3dv_meta_blit_init(struct v3dv_device *device)
{
for (uint32_t i = 0; i < 3; i++) {
device->meta.blit.cache[i] =
_mesa_hash_table_create(NULL,
meta_blit_key_hash,
meta_blit_key_compare);
}
}
void
v3dv_meta_blit_finish(struct v3dv_device *device)
{
VkDevice _device = v3dv_device_to_handle(device);
for (uint32_t i = 0; i < 3; i++) {
hash_table_foreach(device->meta.blit.cache[i], entry) {
struct v3dv_meta_blit_pipeline *item = entry->data;
v3dv_DestroyPipeline(_device, item->pipeline, &device->alloc);
v3dv_DestroyRenderPass(_device, item->pass, &device->alloc);
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->alloc);
vk_free(&device->alloc, item);
}
_mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
}
if (device->meta.blit.playout) {
v3dv_DestroyPipelineLayout(_device, device->meta.blit.playout,
&device->alloc);
}
if (device->meta.blit.dslayout) {
v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.dslayout,
&device->alloc);
}
}
static inline bool
can_use_tlb(struct v3dv_image *image,
const VkOffset3D *offset,
VkFormat *compat_format);
/**
* Copy operations implemented in this file don't operate on a framebuffer
* object provided by the user, however, since most use the TLB for this,
* we still need to have some representation of the framebuffer. For the most
* part, the job's frame tiling information is enough for this, however we
* still need additional information such us the internal type of our single
* render target, so we use this auxiliary struct to pass that information
* around.
*/
struct framebuffer_data {
/* The internal type of the single render target */
uint32_t internal_type;
/* Supertile coverage */
uint32_t min_x_supertile;
uint32_t min_y_supertile;
uint32_t max_x_supertile;
uint32_t max_y_supertile;
/* Format info */
VkFormat vk_format;
const struct v3dv_format *format;
uint8_t internal_depth_type;
};
static void
setup_framebuffer_data(struct framebuffer_data *fb,
VkFormat vk_format,
uint32_t internal_type,
const struct v3dv_frame_tiling *tiling)
{
fb->internal_type = internal_type;
/* Supertile coverage always starts at 0,0 */
uint32_t supertile_w_in_pixels =
tiling->tile_width * tiling->supertile_width;
uint32_t supertile_h_in_pixels =
tiling->tile_height * tiling->supertile_height;
fb->min_x_supertile = 0;
fb->min_y_supertile = 0;
fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
fb->vk_format = vk_format;
fb->format = v3dv_get_format(vk_format);
fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
if (vk_format_is_depth_or_stencil(vk_format))
fb->internal_depth_type = v3dv_get_internal_depth_type(vk_format);
}
/* This chooses a tile buffer format that is appropriate for the copy operation.
* Typically, this is the image render target type, however, if we are copying
* depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
* we need to load and store to/from a tile color buffer using a compatible
* color format.
*/
static uint32_t
choose_tlb_format(struct framebuffer_data *framebuffer,
VkImageAspectFlags aspect,
bool for_store,
bool is_copy_to_buffer,
bool is_copy_from_buffer)
{
if (is_copy_to_buffer || is_copy_from_buffer) {
switch (framebuffer->vk_format) {
case VK_FORMAT_D16_UNORM:
return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
case VK_FORMAT_D32_SFLOAT:
return V3D_OUTPUT_IMAGE_FORMAT_R32F;
case VK_FORMAT_X8_D24_UNORM_PACK32:
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
case VK_FORMAT_D24_UNORM_S8_UINT:
/* When storing the stencil aspect of a combined depth/stencil image
* to a buffer, the Vulkan spec states that the output buffer must
* have packed stencil values, so we choose an R8UI format for our
* store outputs. For the load input we still want RGBA8UI since the
* source image contains 4 channels (including the 3 channels
* containing the 24-bit depth value).
*
* When loading the stencil aspect of a combined depth/stencil image
* from a buffer, we read packed 8-bit stencil values from the buffer
* that we need to put into the LSB of the 32-bit format (the R
* channel), so we use R8UI. For the store, if we used R8UI then we
* would write 8-bit stencil values consecutively over depth channels,
* so we need to use RGBA8UI. This will write each stencil value in
* its correct position, but will overwrite depth values (channels G
* B,A) with undefined values. To fix this, we will have to restore
* the depth aspect from the Z tile buffer, which we should pre-load
* from the image before the store).
*/
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
} else {
assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
if (is_copy_to_buffer) {
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
} else {
assert(is_copy_from_buffer);
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
V3D_OUTPUT_IMAGE_FORMAT_R8UI;
}
}
default: /* Color formats */
return framebuffer->format->rt_type;
break;
}
} else {
return framebuffer->format->rt_type;
}
}
static inline bool
format_needs_rb_swap(VkFormat format)
{
const uint8_t *swizzle = v3dv_get_format_swizzle(format);
return swizzle[0] == PIPE_SWIZZLE_Z;
}
static void
get_internal_type_bpp_for_image_aspects(VkFormat vk_format,
VkImageAspectFlags aspect_mask,
uint32_t *internal_type,
uint32_t *internal_bpp)
{
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT;
/* We can't store depth/stencil pixel formats to a raster format, so
* so instead we load our depth/stencil aspects to a compatible color
* format.
*/
/* FIXME: pre-compute this at image creation time? */
if (aspect_mask & ds_aspects) {
switch (vk_format) {
case VK_FORMAT_D16_UNORM:
*internal_type = V3D_INTERNAL_TYPE_16UI;
*internal_bpp = V3D_INTERNAL_BPP_64;
break;
case VK_FORMAT_D32_SFLOAT:
*internal_type = V3D_INTERNAL_TYPE_32F;
*internal_bpp = V3D_INTERNAL_BPP_128;
break;
case VK_FORMAT_X8_D24_UNORM_PACK32:
case VK_FORMAT_D24_UNORM_S8_UINT:
/* Use RGBA8 format so we can relocate the X/S bits in the appropriate
* place to match Vulkan expectations. See the comment on the tile
* load command for more details.
*/
*internal_type = V3D_INTERNAL_TYPE_8UI;
*internal_bpp = V3D_INTERNAL_BPP_32;
break;
default:
assert(!"unsupported format");
break;
}
} else {
const struct v3dv_format *format = v3dv_get_format(vk_format);
v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
internal_type,
internal_bpp);
}
}
struct rcl_clear_info {
const union v3dv_clear_value *clear_value;
struct v3dv_image *image;
VkImageAspectFlags aspects;
uint32_t layer;
uint32_t level;
};
static struct v3dv_cl *
emit_rcl_prologue(struct v3dv_job *job,
struct framebuffer_data *fb,
const struct rcl_clear_info *clear_info)
{
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
struct v3dv_cl *rcl = &job->rcl;
v3dv_cl_ensure_space_with_branch(rcl, 200 +
tiling->layers * 256 *
cl_packet_length(SUPERTILE_COORDINATES));
if (job->cmd_buffer->state.oom)
return NULL;
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
config.early_z_disable = true;
config.image_width_pixels = tiling->width;
config.image_height_pixels = tiling->height;
config.number_of_render_targets = 1;
config.multisample_mode_4x = tiling->msaa;
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
config.internal_depth_type = fb->internal_depth_type;
}
if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
uint32_t clear_pad = 0;
if (clear_info->image) {
const struct v3dv_image *image = clear_info->image;
const struct v3d_resource_slice *slice =
&image->slices[clear_info->level];
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
slice->tiling == VC5_TILING_UIF_XOR) {
int uif_block_height = v3d_utile_height(image->cpp) * 2;
uint32_t implicit_padded_height =
align(tiling->height, uif_block_height) / uif_block_height;
if (slice->padded_height_of_output_image_in_uif_blocks -
implicit_padded_height >= 15) {
clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
}
}
}
const uint32_t *color = &clear_info->clear_value->color[0];
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
clear.clear_color_low_32_bits = color[0];
clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
clear.render_target_number = 0;
};
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
clear.clear_color_mid_low_32_bits =
((color[1] >> 24) | (color[2] << 8));
clear.clear_color_mid_high_24_bits =
((color[2] >> 24) | ((color[3] & 0xffff) << 8));
clear.render_target_number = 0;
};
}
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
clear.uif_padded_height_in_uif_blocks = clear_pad;
clear.clear_color_high_16_bits = color[3] >> 16;
clear.render_target_number = 0;
};
}
}
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
rt.render_target_0_internal_bpp = tiling->internal_bpp;
rt.render_target_0_internal_type = fb->internal_type;
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
}
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
};
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
init.use_auto_chained_tile_lists = true;
init.size_of_first_block_in_chained_tile_lists =
TILE_ALLOCATION_BLOCK_SIZE_64B;
}
return rcl;
}
static void
emit_frame_setup(struct v3dv_job *job,
uint32_t layer,
const union v3dv_clear_value *clear_value)
{
v3dv_return_if_oom(NULL, job);
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
struct v3dv_cl *rcl = &job->rcl;
const uint32_t tile_alloc_offset =
64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
}
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
config.number_of_bin_tile_lists = 1;
config.total_frame_width_in_tiles = tiling->draw_tiles_x;
config.total_frame_height_in_tiles = tiling->draw_tiles_y;
config.supertile_width_in_tiles = tiling->supertile_width;
config.supertile_height_in_tiles = tiling->supertile_height;
config.total_frame_width_in_supertiles =
tiling->frame_width_in_supertiles;
config.total_frame_height_in_supertiles =
tiling->frame_height_in_supertiles;
}
/* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
* it here.
*/
for (int i = 0; i < 2; i++) {
cl_emit(rcl, TILE_COORDINATES, coords);
cl_emit(rcl, END_OF_LOADS, end);
cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = NONE;
}
if (clear_value && i == 0) {
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
clear.clear_z_stencil_buffer = true;
clear.clear_all_render_targets = true;
}
}
cl_emit(rcl, END_OF_TILE_MARKER, end);
}
cl_emit(rcl, FLUSH_VCD_CACHE, flush);
}
static void
emit_supertile_coordinates(struct v3dv_job *job,
struct framebuffer_data *framebuffer)
{
v3dv_return_if_oom(NULL, job);
struct v3dv_cl *rcl = &job->rcl;
const uint32_t min_y = framebuffer->min_y_supertile;
const uint32_t max_y = framebuffer->max_y_supertile;
const uint32_t min_x = framebuffer->min_x_supertile;
const uint32_t max_x = framebuffer->max_x_supertile;
for (int y = min_y; y <= max_y; y++) {
for (int x = min_x; x <= max_x; x++) {
cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
coords.column_number_in_supertiles = x;
coords.row_number_in_supertiles = y;
}
}
}
}
static void
emit_linear_load(struct v3dv_cl *cl,
uint32_t buffer,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t stride,
uint32_t format)
{
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
load.buffer_to_load = buffer;
load.address = v3dv_cl_address(bo, offset);
load.input_image_format = format;
load.memory_format = VC5_TILING_RASTER;
load.height_in_ub_or_stride = stride;
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
}
}
static void
emit_linear_store(struct v3dv_cl *cl,
uint32_t buffer,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t stride,
bool msaa,
uint32_t format)
{
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = RENDER_TARGET_0;
store.address = v3dv_cl_address(bo, offset);
store.clear_buffer_being_stored = false;
store.output_image_format = format;
store.memory_format = VC5_TILING_RASTER;
store.height_in_ub_or_stride = stride;
store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
V3D_DECIMATE_MODE_SAMPLE_0;
}
}
static void
emit_image_load(struct v3dv_cl *cl,
struct framebuffer_data *framebuffer,
struct v3dv_image *image,
VkImageAspectFlags aspect,
uint32_t layer,
uint32_t mip_level,
bool is_copy_to_buffer,
bool is_copy_from_buffer)
{
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
/* For image to/from buffer copies we always load to and store from RT0,
* even for depth/stencil aspects, because the hardware can't do raster
* stores or loads from/to the depth/stencil tile buffers.
*/
bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
load.buffer_to_load = load_to_color_tlb ?
RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
load.address = v3dv_cl_address(image->mem->bo, layer_offset);
load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
is_copy_to_buffer,
is_copy_from_buffer);
load.memory_format = slice->tiling;
/* When copying depth/stencil images to a buffer, for D24 formats Vulkan
* expects the depth value in the LSB bits of each 32-bit pixel.
* Unfortunately, the hardware seems to put the S8/X8 bits there and the
* depth bits on the MSB. To work around that we can reverse the channel
* order and then swap the R/B channels to get what we want.
*
* NOTE: reversing and swapping only gets us the behavior we want if the
* operations happen in that exact order, which seems to be the case when
* done on the tile buffer load operations. On the store, it seems the
* order is not the same. The order on the store is probably reversed so
* that reversing and swapping on both the load and the store preserves
* the original order of the channels in memory.
*
* Notice that we only need to do this when copying to a buffer, where
* depth and stencil aspects are copied as separate regions and
* the spec expects them to be tightly packed.
*/
bool needs_rb_swap = false;
bool needs_chan_reverse = false;
if (is_copy_to_buffer &&
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
needs_rb_swap = true;
needs_chan_reverse = true;
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
/* This is not a raw data copy (i.e. we are clearing the image),
* so we need to make sure we respect the format swizzle.
*/
needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
}
load.r_b_swap = needs_rb_swap;
load.channel_reverse = needs_chan_reverse;
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
slice->tiling == VC5_TILING_UIF_XOR) {
load.height_in_ub_or_stride =
slice->padded_height_of_output_image_in_uif_blocks;
} else if (slice->tiling == VC5_TILING_RASTER) {
load.height_in_ub_or_stride = slice->stride;
}
if (image->samples > VK_SAMPLE_COUNT_1_BIT)
load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
else
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
}
}
static void
emit_image_store(struct v3dv_cl *cl,
struct framebuffer_data *framebuffer,
struct v3dv_image *image,
VkImageAspectFlags aspect,
uint32_t layer,
uint32_t mip_level,
bool is_copy_to_buffer,
bool is_copy_from_buffer,
bool is_multisample_resolve)
{
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = store_from_color_tlb ?
RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
store.address = v3dv_cl_address(image->mem->bo, layer_offset);
store.clear_buffer_being_stored = false;
/* See rationale in emit_image_load() */
bool needs_rb_swap = false;
bool needs_chan_reverse = false;
if (is_copy_from_buffer &&
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
needs_rb_swap = true;
needs_chan_reverse = true;
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
}
store.r_b_swap = needs_rb_swap;
store.channel_reverse = needs_chan_reverse;
store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
is_copy_to_buffer,
is_copy_from_buffer);
store.memory_format = slice->tiling;
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
slice->tiling == VC5_TILING_UIF_XOR) {
store.height_in_ub_or_stride =
slice->padded_height_of_output_image_in_uif_blocks;
} else if (slice->tiling == VC5_TILING_RASTER) {
store.height_in_ub_or_stride = slice->stride;
}
if (image->samples > VK_SAMPLE_COUNT_1_BIT)
store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
else if (is_multisample_resolve)
store.decimate_mode = V3D_DECIMATE_MODE_4X;
else
store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
}
}
static void
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
uint32_t layer,
const VkBufferImageCopy *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
layer < image->extent.depth);
/* Load image to TLB */
emit_image_load(cl, framebuffer, image, imgrsc->aspectMask,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
true, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
/* Store TLB to buffer */
uint32_t width, height;
if (region->bufferRowLength == 0)
width = region->imageExtent.width;
else
width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
height = region->imageExtent.height;
else
height = region->bufferImageHeight;
/* Handle copy from compressed format */
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
/* If we are storing stencil from a combined depth/stencil format the
* Vulkan spec states that the output buffer must have packed stencil
* values, where each stencil value is 1 byte.
*/
uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1 : image->cpp;
uint32_t buffer_stride = width * cpp;
uint32_t buffer_offset =
buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
true, true, false);
bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
buffer_offset, buffer_stride, msaa, format);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_copy_layer_to_buffer(struct v3dv_job *job,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
struct framebuffer_data *framebuffer,
uint32_t layer,
const VkBufferImageCopy *region)
{
emit_frame_setup(job, layer, NULL);
emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
image, layer, region);
emit_supertile_coordinates(job, framebuffer);
}
static void
emit_copy_image_to_buffer_rcl(struct v3dv_job *job,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
struct framebuffer_data *framebuffer,
const VkBufferImageCopy *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
/* Implements a copy using the TLB.
*
* This only works if we are copying from offset (0,0), since a TLB store for
* tile (x,y) will be written at the same tile offset into the destination.
* When this requirement is not met, we need to use a blit instead.
*
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*
*/
static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
const VkBufferImageCopy *region)
{
VkFormat fb_format;
if (!can_use_tlb(image, &region->imageOffset, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
get_internal_type_bpp_for_image_aspects(fb_format,
region->imageSubresource.aspectMask,
&internal_type, &internal_bpp);
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
/* Handle copy from compressed format using a compatible format */
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
&job->frame_tiling);
v3dv_job_emit_binning_flush(job);
emit_copy_image_to_buffer_rcl(job, buffer, image, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
}
static bool
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
VkFormat dst_format,
struct v3dv_image *src,
VkFormat src_format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
const VkImageBlit *region,
VkFilter filter,
bool dst_is_padded_image);
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
const VkBufferImageCopy *region)
{
bool handled = false;
/* Generally, the bpp of the data in the buffer matches that of the
* source image. The exception is the case where we are copying
* stencil (8bpp) to a combined d24s8 image (32bpp).
*/
uint32_t buffer_bpp = image->cpp;
VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
/* Because we are going to implement the copy as a blit, we need to create
* a linear image from the destination buffer and we also want our blit
* source and destination formats to be the same (to avoid any format
* conversions), so we choose a canonical format that matches the
* source image bpp.
*
* The exception to the above is copying from combined depth/stencil images
* because we are copying only one aspect of the image, so we need to setup
* our formats, color write mask and source swizzle mask to match that.
*/
VkFormat dst_format;
VkFormat src_format;
VkColorComponentFlags cmask = 0; /* All components */
VkComponentMapping cswizzle = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
};
switch (buffer_bpp) {
case 16:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
dst_format = VK_FORMAT_R32G32B32A32_UINT;
src_format = dst_format;
break;
case 8:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
dst_format = VK_FORMAT_R16G16B16A16_UINT;
src_format = dst_format;
break;
case 4:
switch (copy_aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = VK_FORMAT_R8G8B8A8_UINT;
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
if (image->vk_format == VK_FORMAT_D32_SFLOAT) {
src_format = VK_FORMAT_R32_UINT;
dst_format = VK_FORMAT_R32_UINT;
} else {
/* We want to write depth in the buffer in the first 24-bits,
* however, the hardware has depth in bits 8-31, so swizzle the
* the source components to match what we want. Also, we don't
* want to write bits 24-31 in the destination.
*/
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = VK_FORMAT_R8G8B8A8_UINT;
cmask = VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT;
cswizzle.r = VK_COMPONENT_SWIZZLE_G;
cswizzle.g = VK_COMPONENT_SWIZZLE_B;
cswizzle.b = VK_COMPONENT_SWIZZLE_A;
cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
}
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
/* Copying from S8D24. We want to write 8-bit stencil values only,
* so adjust the buffer bpp for that. Since the hardware stores stencil
* in the LSB, we can just do a RGBA8UI to R8UI blit.
*/
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = VK_FORMAT_R8_UINT;
buffer_bpp = 1;
break;
default:
unreachable("unsupported aspect");
return handled;
};
break;
case 2:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
dst_format = VK_FORMAT_R16_UINT;
src_format = dst_format;
break;
case 1:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
dst_format = VK_FORMAT_R8_UINT;
src_format = dst_format;
break;
default:
unreachable("unsupported bit-size");
return handled;
};
/* The hardware doesn't support linear depth/stencil stores, so we
* implement copies of depth/stencil aspect as color copies using a
* compatible color format.
*/
assert(vk_format_is_color(src_format));
assert(vk_format_is_color(dst_format));
copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
/* We should be able to handle the blit if we got this far */
handled = true;
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
if (region->bufferRowLength == 0)
buf_width = region->imageExtent.width;
else
buf_width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
buf_height = region->imageExtent.height;
else
buf_height = region->bufferImageHeight;
/* Compute layers to copy */
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
/* Copy requested layers */
struct v3dv_device *device = cmd_buffer->device;
VkDevice _device = v3dv_device_to_handle(device);
for (uint32_t i = 0; i < num_layers; i++) {
/* Create the destination blit image from the destination buffer */
VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = dst_format,
.extent = { buf_width, buf_height, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkImage buffer_image;
VkResult result =
v3dv_CreateImage(_device, &image_info, &device->alloc, &buffer_image);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)buffer_image,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
/* Bind the buffer memory to the image */
VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
i * buf_width * buf_height * buffer_bpp;
result = v3dv_BindImageMemory(_device, buffer_image,
v3dv_device_memory_to_handle(buffer->mem),
buffer_offset);
if (result != VK_SUCCESS)
return handled;
/* Blit-copy the requested image extent.
*
* Since we are copying, the blit must use the same format on the
* destination and source images to avoid format conversions. The
* only exception is copying stencil, which we upload to a R8UI source
* image, but that we need to blit to a S8D24 destination (the only
* stencil format we support).
*/
const VkImageBlit blit_region = {
.srcSubresource = {
.aspectMask = copy_aspect,
.mipLevel = region->imageSubresource.mipLevel,
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
.layerCount = 1,
},
.srcOffsets = {
{
region->imageOffset.x,
region->imageOffset.y,
region->imageOffset.z + i,
},
{
region->imageOffset.x + region->imageExtent.width,
region->imageOffset.y + region->imageExtent.height,
region->imageOffset.z + i + 1,
},
},
.dstSubresource = {
.aspectMask = copy_aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.dstOffsets = {
{ 0, 0, 0 },
{ region->imageExtent.width, region->imageExtent.height, 1 },
},
};
handled = blit_shader(cmd_buffer,
v3dv_image_from_handle(buffer_image), dst_format,
image, src_format,
cmask, &cswizzle,
&blit_region, VK_FILTER_NEAREST, false);
if (!handled) {
/* This is unexpected, we should have a supported blit spec */
unreachable("Unable to blit buffer to destination image");
return false;
}
}
assert(handled);
return true;
}
static VkFormat
get_compatible_tlb_format(VkFormat format)
{
switch (format) {
case VK_FORMAT_R8G8B8A8_SNORM:
return VK_FORMAT_R8G8B8A8_UINT;
case VK_FORMAT_R8G8_SNORM:
return VK_FORMAT_R8G8_UINT;
case VK_FORMAT_R8_SNORM:
return VK_FORMAT_R8_UINT;
case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
return VK_FORMAT_A8B8G8R8_UINT_PACK32;
case VK_FORMAT_R16_UNORM:
case VK_FORMAT_R16_SNORM:
return VK_FORMAT_R16_UINT;
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16_SNORM:
return VK_FORMAT_R16G16_UINT;
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16G16B16A16_SNORM:
return VK_FORMAT_R16G16B16A16_UINT;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return VK_FORMAT_R32_SFLOAT;
/* We can't render to compressed formats using the TLB so instead we use
* a compatible format with the same bpp as the compressed format. Because
* the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
* case of ETC), when we implement copies with the compatible format we
* will have to divide offsets and dimensions on the compressed image by
* the compressed block size.
*/
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
return VK_FORMAT_R32G32B32A32_UINT;
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case VK_FORMAT_EAC_R11_UNORM_BLOCK:
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
return VK_FORMAT_R16G16B16A16_UINT;
default:
return VK_FORMAT_UNDEFINED;
}
}
static inline bool
can_use_tlb(struct v3dv_image *image,
const VkOffset3D *offset,
VkFormat *compat_format)
{
if (offset->x != 0 || offset->y != 0)
return false;
if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
if (compat_format)
*compat_format = image->vk_format;
return true;
}
/* If the image format is not TLB-supported, then check if we can use
* a compatible format instead.
*/
if (compat_format) {
*compat_format = get_compatible_tlb_format(image->vk_format);
if (*compat_format != VK_FORMAT_UNDEFINED)
return true;
}
return false;
}
void
v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, image, srcImage);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, destBuffer);
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
for (uint32_t i = 0; i < regionCount; i++) {
if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &pRegions[i]))
continue;
if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &pRegions[i]))
continue;
unreachable("Unsupported image to buffer copy.");
}
}
static void
emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_image *dst,
struct v3dv_image *src,
uint32_t layer,
const VkImageCopy *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *srcrsc = &region->srcSubresource;
assert((src->type != VK_IMAGE_TYPE_3D && layer < srcrsc->layerCount) ||
layer < src->extent.depth);
emit_image_load(cl, framebuffer, src, srcrsc->aspectMask,
srcrsc->baseArrayLayer + layer, srcrsc->mipLevel,
false, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
const VkImageSubresourceLayers *dstrsc = &region->dstSubresource;
assert((dst->type != VK_IMAGE_TYPE_3D && layer < dstrsc->layerCount) ||
layer < dst->extent.depth);
emit_image_store(cl, framebuffer, dst, dstrsc->aspectMask,
dstrsc->baseArrayLayer + layer, dstrsc->mipLevel,
false, false, false);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_copy_image_layer(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct framebuffer_data *framebuffer,
uint32_t layer,
const VkImageCopy *region)
{
emit_frame_setup(job, layer, NULL);
emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
emit_supertile_coordinates(job, framebuffer);
}
static void
emit_copy_image_rcl(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct framebuffer_data *framebuffer,
const VkImageCopy *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageCopy *region)
{
VkFormat fb_format;
if (!can_use_tlb(src, &region->srcOffset, &fb_format) ||
!can_use_tlb(dst, &region->dstOffset, &fb_format)) {
return false;
}
/* From the Vulkan spec, VkImageCopy valid usage:
*
* "If neither the calling command’s srcImage nor the calling command’s
* dstImage has a multi-planar image format then the aspectMask member
* of srcSubresource and dstSubresource must match."
*/
assert(region->dstSubresource.aspectMask ==
region->srcSubresource.aspectMask);
uint32_t internal_type, internal_bpp;
get_internal_type_bpp_for_image_aspects(fb_format,
region->dstSubresource.aspectMask,
&internal_type, &internal_bpp);
/* From the Vulkan spec, VkImageCopy valid usage:
*
* "The layerCount member of srcSubresource and dstSubresource must match"
*/
assert(region->srcSubresource.layerCount ==
region->dstSubresource.layerCount);
uint32_t num_layers;
if (dst->type != VK_IMAGE_TYPE_3D)
num_layers = region->dstSubresource.layerCount;
else
num_layers = region->extent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
/* Handle copy to compressed image using compatible format */
const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp,
src->samples > VK_SAMPLE_COUNT_1_BIT);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
&job->frame_tiling);
v3dv_job_emit_binning_flush(job);
emit_copy_image_rcl(job, dst, src, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
}
/**
* Takes the image provided as argument and creates a new image that has
* the same specification and aliases the same memory storage, except that:
*
* - It has the uncompressed format passed in.
* - Its original width/height are scaled by the factors passed in.
*
* This is useful to implement copies from compressed images using the blit
* path. The idea is that we create uncompressed "image views" of both the
* source and destination images using the uncompressed format and then we
* define the copy blit in terms of that format.
*/
static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *src,
float width_scale,
float height_scale,
VkFormat format)
{
assert(!vk_format_is_compressed(format));
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
VkImageCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = src->type,
.format = format,
.extent = {
.width = src->extent.width * width_scale,
.height = src->extent.height * height_scale,
.depth = src->extent.depth,
},
.mipLevels = src->levels,
.arrayLayers = src->array_size,
.samples = src->samples,
.tiling = src->tiling,
.usage = src->usage,
};
VkImage _image;
VkResult result =
v3dv_CreateImage(_device, &info, &cmd_buffer->device->alloc, &_image);
if (result != VK_SUCCESS) {
v3dv_flag_oom(cmd_buffer, NULL);
return NULL;
}
struct v3dv_image *image = v3dv_image_from_handle(_image);
image->mem = src->mem;
image->mem_offset = src->mem_offset;
return image;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageCopy *region)
{
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
const float block_scale_w = (float)src_block_w / (float)dst_block_w;
const float block_scale_h = (float)src_block_h / (float)dst_block_h;
/* We need to choose a single format for the blit to ensure that this is
* really a copy and there are not format conversions going on. Since we
* going to blit, we need to make sure that the selected format can be
* both rendered to and textured from.
*/
VkFormat format;
float src_scale_w = 1.0f;
float src_scale_h = 1.0f;
float dst_scale_w = block_scale_w;
float dst_scale_h = block_scale_h;
if (vk_format_is_compressed(src->vk_format)) {
/* If we are copying from a compressed format we should be aware that we
* are going to texture from the source image, and the texture setup
* knows the actual size of the image, so we need to choose a format
* that has a per-texel (not per-block) bpp that is compatible for that
* image size. For example, for a source image with size Bw*WxBh*H
* and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
* each of the Bw*WxBh*H texels in the compressed source image is 8-bit
* (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
* so we could specify a blit with size Bw*WxBh*H and a format with
* a bpp of 8-bit per texel (R8_UINT).
*
* Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
* which is 64-bit per texel, then we would need a 4-bit format, which
* we don't have, so instead we still choose an 8-bit format, but we
* apply a divisor to the row dimensions of the blit, since we are
* copying two texels per item.
*
* Generally, we can choose any format so long as we compute appropriate
* divisors for the width and height depending on the source image's
* bpp.
*/
assert(src->cpp == dst->cpp);
uint32_t divisor_w, divisor_h;
format = VK_FORMAT_R32G32_UINT;
switch (src->cpp) {
case 16:
format = VK_FORMAT_R32G32B32A32_UINT;
divisor_w = 4;
divisor_h = 4;
break;
case 8:
format = VK_FORMAT_R16G16B16A16_UINT;
divisor_w = 4;
divisor_h = 4;
break;
default:
unreachable("Unsupported compressed format");
}
/* Create image views of the src/dst images that we can interpret in
* terms of the canonical format.
*/
src_scale_w /= divisor_w;
src_scale_h /= divisor_h;
dst_scale_w /= divisor_w;
dst_scale_h /= divisor_h;
src = create_image_alias(cmd_buffer, src,
src_scale_w, src_scale_h, format);
dst = create_image_alias(cmd_buffer, dst,
dst_scale_w, dst_scale_h, format);
} else {
format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
src->vk_format : get_compatible_tlb_format(src->vk_format);
if (format == VK_FORMAT_UNDEFINED)
return false;
const struct v3dv_format *f = v3dv_get_format(format);
if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
return false;
}
/* Given an uncompressed image with size WxH, if we copy it to a compressed
* image, it will result in an image with size W*bWxH*bH, where bW and bH
* are the compressed format's block width and height. This means that
* copies between compressed and uncompressed images involve different
* image sizes, and therefore, we need to take that into account when
* setting up the source and destination blit regions below, so they are
* consistent from the point of view of the single compatible format
* selected for the copy.
*
* We should take into account that the dimensions of the region provided
* to the copy command are specified in terms of the source image. With that
* in mind, below we adjust the blit destination region to be consistent with
* the source region for the compatible format, so basically, we apply
* the block scale factor to the destination offset provided by the copy
* command (because it is specified in terms of the destination image, not
* the source), and then we just add the region copy dimensions to that
* (since the region dimensions are already specified in terms of the source
* image).
*/
const VkOffset3D src_start = {
region->srcOffset.x * src_scale_w,
region->srcOffset.y * src_scale_h,
region->srcOffset.z,
};
const VkOffset3D src_end = {
src_start.x + region->extent.width * src_scale_w,
src_start.y + region->extent.height * src_scale_h,
src_start.z + region->extent.depth,
};
const VkOffset3D dst_start = {
region->dstOffset.x * dst_scale_w,
region->dstOffset.y * dst_scale_h,
region->dstOffset.z,
};
const VkOffset3D dst_end = {
dst_start.x + region->extent.width * src_scale_w,
dst_start.y + region->extent.height * src_scale_h,
dst_start.z + region->extent.depth,
};
const VkImageBlit blit_region = {
.srcSubresource = region->srcSubresource,
.srcOffsets = { src_start, src_end },
.dstSubresource = region->dstSubresource,
.dstOffsets = { dst_start, dst_end },
};
bool handled = blit_shader(cmd_buffer,
dst, format,
src, format,
0, NULL,
&blit_region, VK_FILTER_NEAREST, true);
/* We should have selected formats that we can blit */
assert(handled);
return handled;
}
void
v3dv_CmdCopyImage(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageCopy *pRegions)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
assert(src->samples == dst->samples);
for (uint32_t i = 0; i < regionCount; i++) {
if (copy_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
continue;
if (copy_image_blit(cmd_buffer, dst, src, &pRegions[i]))
continue;
unreachable("Image copy not supported");
}
}
static void
emit_clear_image_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_image *image,
VkImageAspectFlags aspects,
uint32_t layer,
uint32_t level)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
emit_image_store(cl, framebuffer, image, aspects, layer, level,
false, false, false);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_clear_image(struct v3dv_job *job,
struct v3dv_image *image,
struct framebuffer_data *framebuffer,
VkImageAspectFlags aspects,
uint32_t layer,
uint32_t level)
{
emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level);
emit_supertile_coordinates(job, framebuffer);
}
static void
emit_clear_image_rcl(struct v3dv_job *job,
struct v3dv_image *image,
struct framebuffer_data *framebuffer,
const union v3dv_clear_value *clear_value,
VkImageAspectFlags aspects,
uint32_t layer,
uint32_t level)
{
const struct rcl_clear_info clear_info = {
.clear_value = clear_value,
.image = image,
.aspects = aspects,
.layer = layer,
.level = level,
};
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, clear_value);
emit_clear_image(job, image, framebuffer, aspects, layer, level);
cl_emit(rcl, END_OF_RENDERING, end);
}
static void
get_hw_clear_color(const VkClearColorValue *color,
VkFormat fb_format,
VkFormat image_format,
uint32_t internal_type,
uint32_t internal_bpp,
uint32_t *hw_color)
{
const uint32_t internal_size = 4 << internal_bpp;
/* If the image format doesn't match the framebuffer format, then we are
* trying to clear an unsupported tlb format using a compatible
* format for the framebuffer. In this case, we want to make sure that
* we pack the clear value according to the original format semantics,
* not the compatible format.
*/
if (fb_format == image_format) {
v3dv_get_hw_clear_color(color, internal_type, internal_size, hw_color);
} else {
union util_color uc;
enum pipe_format pipe_image_format =
vk_format_to_pipe_format(image_format);
util_pack_color(color->float32, pipe_image_format, &uc);
memcpy(hw_color, uc.ui, internal_size);
}
}
/* Returns true if the implementation is able to handle the case, false
* otherwise.
*/
static bool
clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
const VkClearValue *clear_value,
const VkImageSubresourceRange *range)
{
const VkOffset3D origin = { 0, 0, 0 };
VkFormat fb_format;
if (!can_use_tlb(image, &origin, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
get_internal_type_bpp_for_image_aspects(fb_format, range->aspectMask,
&internal_type, &internal_bpp);
union v3dv_clear_value hw_clear_value = { 0 };
if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
get_hw_clear_color(&clear_value->color, fb_format, image->vk_format,
internal_type, internal_bpp, &hw_clear_value.color[0]);
} else {
assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
(range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
hw_clear_value.z = clear_value->depthStencil.depth;
hw_clear_value.s = clear_value->depthStencil.stencil;
}
uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ?
image->levels - range->baseMipLevel :
range->levelCount;
uint32_t min_level = range->baseMipLevel;
uint32_t max_level = range->baseMipLevel + level_count;
/* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
* Instead, we need to consider the full depth dimension of the image, which
* goes from 0 up to the level's depth extent.
*/
uint32_t min_layer;
uint32_t max_layer;
if (image->type != VK_IMAGE_TYPE_3D) {
uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
image->array_size - range->baseArrayLayer :
range->layerCount;
min_layer = range->baseArrayLayer;
max_layer = range->baseArrayLayer + layer_count;
} else {
min_layer = 0;
max_layer = 0;
}
for (uint32_t level = min_level; level < max_level; level++) {
if (image->type == VK_IMAGE_TYPE_3D)
max_layer = u_minify(image->extent.depth, level);
for (uint32_t layer = min_layer; layer < max_layer; layer++) {
uint32_t width = u_minify(image->extent.width, level);
uint32_t height = u_minify(image->extent.height, level);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
/* We start a a new job for each layer so the frame "depth" is 1 */
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp,
image->samples > VK_SAMPLE_COUNT_1_BIT);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
&job->frame_tiling);
v3dv_job_emit_binning_flush(job);
/* If this triggers it is an application bug: the spec requires
* that any aspects to clear are present in the image.
*/
assert(range->aspectMask & image->aspects);
emit_clear_image_rcl(job, image, &framebuffer, &hw_clear_value,
range->aspectMask, layer, level);
v3dv_cmd_buffer_finish_job(cmd_buffer);
}
}
return true;
}
void
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
VkImage _image,
VkImageLayout imageLayout,
const VkClearColorValue *pColor,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, image, _image);
const VkClearValue clear_value = {
.color = *pColor,
};
for (uint32_t i = 0; i < rangeCount; i++) {
if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
continue;
unreachable("Unsupported color clear.");
}
}
void
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
VkImage _image,
VkImageLayout imageLayout,
const VkClearDepthStencilValue *pDepthStencil,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, image, _image);
const VkClearValue clear_value = {
.depthStencil = *pDepthStencil,
};
for (uint32_t i = 0; i < rangeCount; i++) {
if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
continue;
unreachable("Unsupported depth/stencil clear.");
}
}
static void
emit_copy_buffer_per_tile_list(struct v3dv_job *job,
struct v3dv_bo *dst,
struct v3dv_bo *src,
uint32_t dst_offset,
uint32_t src_offset,
uint32_t stride,
uint32_t format)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
emit_linear_store(cl, RENDER_TARGET_0,
dst, dst_offset, stride, false, format);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_copy_buffer(struct v3dv_job *job,
struct v3dv_bo *dst,
struct v3dv_bo *src,
uint32_t dst_offset,
uint32_t src_offset,
struct framebuffer_data *framebuffer,
uint32_t format)
{
const uint32_t stride = job->frame_tiling.width * 4;
emit_copy_buffer_per_tile_list(job, dst, src,
dst_offset, src_offset,
stride, format);
emit_supertile_coordinates(job, framebuffer);
}
static void
emit_copy_buffer_rcl(struct v3dv_job *job,
struct v3dv_bo *dst,
struct v3dv_bo *src,
uint32_t dst_offset,
uint32_t src_offset,
struct framebuffer_data *framebuffer,
uint32_t format)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, NULL);
emit_copy_buffer(job, dst, src, dst_offset, src_offset, framebuffer, format);
cl_emit(rcl, END_OF_RENDERING, end);
}
/* Figure out a TLB size configuration for a number of pixels to process.
* Beware that we can't "render" more than 4096x4096 pixels in a single job,
* if the pixel count is larger than this, the caller might need to split
* the job and call this function multiple times.
*/
static void
framebuffer_size_for_pixel_count(uint32_t num_pixels,
uint32_t *width,
uint32_t *height)
{
assert(num_pixels > 0);
const uint32_t max_dim_pixels = 4096;
const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
uint32_t w, h;
if (num_pixels > max_pixels) {
w = max_dim_pixels;
h = max_dim_pixels;
} else {
w = num_pixels;
h = 1;
while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
w >>= 1;
h <<= 1;
}
}
assert(w <= max_dim_pixels && h <= max_dim_pixels);
assert(w * h <= num_pixels);
assert(w > 0 && h > 0);
*width = w;
*height = h;
}
static struct v3dv_job *
copy_buffer(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_bo *dst,
uint32_t dst_offset,
struct v3dv_bo *src,
uint32_t src_offset,
const VkBufferCopy *region)
{
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
/* Select appropriate pixel format for the copy operation based on the
* size to copy and the alignment of the source and destination offsets.
*/
src_offset += region->srcOffset;
dst_offset += region->dstOffset;
uint32_t item_size = 4;
while (item_size > 1 &&
(src_offset % item_size != 0 || dst_offset % item_size != 0)) {
item_size /= 2;
}
while (item_size > 1 && region->size % item_size != 0)
item_size /= 2;
assert(region->size % item_size == 0);
uint32_t num_items = region->size / item_size;
assert(num_items > 0);
uint32_t format;
VkFormat vk_format;
switch (item_size) {
case 4:
format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
vk_format = VK_FORMAT_R8G8B8A8_UINT;
break;
case 2:
format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
vk_format = VK_FORMAT_R8G8_UINT;
break;
default:
format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
vk_format = VK_FORMAT_R8_UINT;
break;
}
struct v3dv_job *job = NULL;
while (num_items > 0) {
job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return NULL;
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, vk_format, internal_type,
&job->frame_tiling);
v3dv_job_emit_binning_flush(job);
emit_copy_buffer_rcl(job, dst, src, dst_offset, src_offset,
&framebuffer, format);
v3dv_cmd_buffer_finish_job(cmd_buffer);
const uint32_t items_copied = width * height;
const uint32_t bytes_copied = items_copied * item_size;
num_items -= items_copied;
src_offset += bytes_copied;
dst_offset += bytes_copied;
}
return job;
}
void
v3dv_CmdCopyBuffer(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkBuffer dstBuffer,
uint32_t regionCount,
const VkBufferCopy *pRegions)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, srcBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
for (uint32_t i = 0; i < regionCount; i++) {
copy_buffer(cmd_buffer,
dst_buffer->mem->bo, dst_buffer->mem_offset,
src_buffer->mem->bo, src_buffer->mem_offset,
&pRegions[i]);
}
}
static void
destroy_update_buffer_cb(VkDevice _device,
uint64_t pobj,
VkAllocationCallbacks *alloc)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
v3dv_bo_free(device, bo);
}
void
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void *pData)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
struct v3dv_bo *src_bo =
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
if (!src_bo) {
fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
return;
}
bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
if (!ok) {
fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
return;
}
memcpy(src_bo->map, pData, dataSize);
v3dv_bo_unmap(cmd_buffer->device, src_bo);
VkBufferCopy region = {
.srcOffset = 0,
.dstOffset = dstOffset,
.size = dataSize,
};
struct v3dv_job *copy_job =
copy_buffer(cmd_buffer,
dst_buffer->mem->bo, dst_buffer->mem_offset,
src_bo, 0,
&region);
if (!copy_job)
return;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
}
static void
emit_fill_buffer_per_tile_list(struct v3dv_job *job,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t stride)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_fill_buffer(struct v3dv_job *job,
struct v3dv_bo *bo,
uint32_t offset,
struct framebuffer_data *framebuffer)
{
const uint32_t stride = job->frame_tiling.width * 4;
emit_fill_buffer_per_tile_list(job, bo, offset, stride);
emit_supertile_coordinates(job, framebuffer);
}
static void
emit_fill_buffer_rcl(struct v3dv_job *job,
struct v3dv_bo *bo,
uint32_t offset,
struct framebuffer_data *framebuffer,
uint32_t data)
{
const union v3dv_clear_value clear_value = {
.color = { data, 0, 0, 0 },
};
const struct rcl_clear_info clear_info = {
.clear_value = &clear_value,
.image = NULL,
.aspects = VK_IMAGE_ASPECT_COLOR_BIT,
.layer = 0,
.level = 0,
};
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, &clear_value);
emit_fill_buffer(job, bo, offset, framebuffer);
cl_emit(rcl, END_OF_RENDERING, end);
}
static void
fill_buffer(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t size,
uint32_t data)
{
assert(size > 0 && size % 4 == 0);
assert(offset + size <= bo->size);
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
uint32_t num_items = size / 4;
while (num_items > 0) {
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return;
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
internal_type, &job->frame_tiling);
v3dv_job_emit_binning_flush(job);
emit_fill_buffer_rcl(job, bo, offset, &framebuffer, data);
v3dv_cmd_buffer_finish_job(cmd_buffer);
const uint32_t items_copied = width * height;
const uint32_t bytes_copied = items_copied * 4;
num_items -= items_copied;
offset += bytes_copied;
}
}
void
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize size,
uint32_t data)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
struct v3dv_bo *bo = dst_buffer->mem->bo;
/* From the Vulkan spec:
*
* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
* a multiple of 4, then the nearest smaller multiple is used."
*/
if (size == VK_WHOLE_SIZE) {
size = dst_buffer->size - dstOffset;
size -= size % 4;
}
fill_buffer(cmd_buffer, bo, dstOffset, size, data);
}
/* Disable level 0 write, just write following mipmaps */
#define V3D_TFU_IOA_DIMTW (1 << 0)
#define V3D_TFU_IOA_FORMAT_SHIFT 3
#define V3D_TFU_IOA_FORMAT_LINEARTILE 3
#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
#define V3D_TFU_IOA_FORMAT_UIF_XOR 7
#define V3D_TFU_ICFG_NUMMM_SHIFT 5
#define V3D_TFU_ICFG_TTYPE_SHIFT 9
#define V3D_TFU_ICFG_OPAD_SHIFT 22
#define V3D_TFU_ICFG_FORMAT_SHIFT 18
#define V3D_TFU_ICFG_FORMAT_RASTER 0
#define V3D_TFU_ICFG_FORMAT_SAND_128 1
#define V3D_TFU_ICFG_FORMAT_SAND_256 2
#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy *region)
{
VkFormat vk_format = image->vk_format;
const struct v3dv_format *format = image->format;
/* Format must be supported for texturing */
if (!v3dv_tfu_supports_tex_format(&cmd_buffer->device->devinfo,
format->tex_type)) {
return false;
}
/* Only color formats */
if (vk_format_is_depth_or_stencil(vk_format))
return false;
/* Destination can't be raster format */
const uint32_t mip_level = region->imageSubresource.mipLevel;
if (image->slices[mip_level].tiling == VC5_TILING_RASTER)
return false;
/* Region must include full slice */
const uint32_t offset_x = region->imageOffset.x;
const uint32_t offset_y = region->imageOffset.y;
if (offset_x != 0 || offset_y != 0)
return false;
uint32_t width, height;
if (region->bufferRowLength == 0)
width = region->imageExtent.width;
else
width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
height = region->imageExtent.height;
else
height = region->bufferImageHeight;
if (width != image->extent.width || height != image->extent.height)
return false;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
assert(image->mem && image->mem->bo);
const struct v3dv_bo *dst_bo = image->mem->bo;
assert(buffer->mem && buffer->mem->bo);
const struct v3dv_bo *src_bo = buffer->mem->bo;
/* Emit a TFU job per layer to copy */
const uint32_t buffer_stride = width * image->cpp;
for (int i = 0; i < num_layers; i++) {
uint32_t layer = region->imageSubresource.baseArrayLayer + i;
struct drm_v3d_submit_tfu tfu = {
.ios = (height << 16) | width,
.bo_handles = {
dst_bo->handle,
src_bo != dst_bo ? src_bo->handle : 0
},
};
const uint32_t buffer_offset =
buffer->mem_offset + region->bufferOffset +
height * buffer_stride * i;
const uint32_t src_offset = src_bo->offset + buffer_offset;
tfu.iia |= src_offset;
tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT;
tfu.iis |= width;
const uint32_t dst_offset =
dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
tfu.ioa |= dst_offset;
tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
(slice->tiling - VC5_TILING_LINEARTILE)) <<
V3D_TFU_IOA_FORMAT_SHIFT;
tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
* OPAD field for the destination (how many extra UIF blocks beyond
* those necessary to cover the height).
*/
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
slice->tiling == VC5_TILING_UIF_XOR) {
uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);
uint32_t implicit_padded_height = align(height, uif_block_h);
uint32_t icfg =
(slice->padded_height - implicit_padded_height) / uif_block_h;
tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
}
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
}
return true;
}
static void
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
uint32_t layer,
const VkBufferImageCopy *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
layer < image->extent.depth);
/* Load TLB from buffer */
uint32_t width, height;
if (region->bufferRowLength == 0)
width = region->imageExtent.width;
else
width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
height = region->imageExtent.height;
else
height = region->bufferImageHeight;
/* Handle copy to compressed format using a compatible format */
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1 : image->cpp;
uint32_t buffer_stride = width * cpp;
uint32_t buffer_offset =
buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
false, false, true);
emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
buffer_offset, buffer_stride, format);
/* Because we can't do raster loads/stores of Z/S formats we need to
* use a color tile buffer with a compatible RGBA color format instead.
* However, when we are uploading a single aspect to a combined
* depth/stencil image we have the problem that our tile buffer stores don't
* allow us to mask out the other aspect, so we always write all four RGBA
* channels to the image and we end up overwriting that other aspect with
* undefined values. To work around that, we first load the aspect we are
* not copying from the image memory into a proper Z/S tile buffer. Then we
* do our store from the color buffer for the aspect we are copying, and
* after that, we do another store from the Z/S tile buffer to restore the
* other aspect to its original value.
*/
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
false, false);
} else {
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
false, false);
}
}
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
/* Store TLB to image */
emit_image_store(cl, framebuffer, image, imgrsc->aspectMask,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
false, true, false);
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
false, false, false);
} else {
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
false, false, false);
}
}
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_copy_buffer_to_layer(struct v3dv_job *job,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
struct framebuffer_data *framebuffer,
uint32_t layer,
const VkBufferImageCopy *region)
{
emit_frame_setup(job, layer, NULL);
emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
layer, region);
emit_supertile_coordinates(job, framebuffer);
}
static void
emit_copy_buffer_to_image_rcl(struct v3dv_job *job,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
struct framebuffer_data *framebuffer,
const VkBufferImageCopy *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy *region)
{
VkFormat fb_format;
if (!can_use_tlb(image, &region->imageOffset, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
get_internal_type_bpp_for_image_aspects(fb_format,
region->imageSubresource.aspectMask,
&internal_type, &internal_bpp);
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
/* Handle copy to compressed format using a compatible format */
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
&job->frame_tiling);
v3dv_job_emit_binning_flush(job);
emit_copy_buffer_to_image_rcl(job, image, buffer, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
}
static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy *region)
{
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
return true;
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
return true;
return false;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy *region)
{
bool handled = false;
/* Generally, the bpp of the data in the buffer matches that of the
* destination image. The exception is the case where we are uploading
* stencil (8bpp) to a combined d24s8 image (32bpp).
*/
uint32_t buffer_bpp = image->cpp;
VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
/* We are about to upload the buffer data to an image so we can then
* blit that to our destination region. Because we are going to implement
* the copy as a blit, we want our blit source and destination formats to be
* the same (to avoid any format conversions), so we choose a canonical
* format that matches the destination image bpp.
*/
VkColorComponentFlags cmask = 0; /* Write all components */
VkFormat src_format;
VkFormat dst_format;
switch (buffer_bpp) {
case 16:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
src_format = VK_FORMAT_R32G32B32A32_UINT;
dst_format = src_format;
break;
case 8:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
src_format = VK_FORMAT_R16G16B16A16_UINT;
dst_format = src_format;
break;
case 4:
switch (aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = src_format;
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
src_format = image->vk_format;
dst_format = src_format;
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
/* Since we don't support separate stencil this is always a stencil
* copy to a combined depth/stencil image. Becasue we don't support
* separate stencil images, we upload the buffer data to a compatible
* color R8UI image, and implement the blit as a compatible color
* blit to an RGBA8UI destination masking out writes to components
* GBA (which map to the D24 component of a S8D24 image).
*/
assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
buffer_bpp = 1;
src_format = VK_FORMAT_R8_UINT;
dst_format = VK_FORMAT_R8G8B8A8_UINT;
cmask = VK_COLOR_COMPONENT_R_BIT;
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
break;
default:
unreachable("unsupported aspect");
return handled;
};
break;
case 2:
src_format = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ?
VK_FORMAT_R16_UINT : image->vk_format;
dst_format = src_format;
break;
case 1:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
src_format = VK_FORMAT_R8_UINT;
dst_format = src_format;
break;
default:
unreachable("unsupported bit-size");
return handled;
}
/* We should be able to handle the blit if we reached here */
handled = true;
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
if (region->bufferRowLength == 0)
buf_width = region->imageExtent.width;
else
buf_width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
buf_height = region->imageExtent.height;
else
buf_height = region->bufferImageHeight;
/* If the image is compressed, the bpp refers to blocks, not pixels */
uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
uint32_t block_height = vk_format_get_blockheight(image->vk_format);
buf_width = buf_width / block_width;
buf_height = buf_height / block_height;
/* Compute layers to copy */
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
struct v3dv_device *device = cmd_buffer->device;
VkDevice _device = v3dv_device_to_handle(device);
for (uint32_t i = 0; i < num_layers; i++) {
/* Create the source blit image from the source buffer.
*
* We can't texture from a linear image, so we can't just setup a blit
* straight from the buffer contents. Instead, we need to upload the
* buffer to a tiled image, and then copy that image to the selected
* region of the destination.
*
* FIXME: we could do better than this is we use a blit shader that has
* a UBO (for the buffer) as input instead of a texture. Then we would
* have to do some arithmetics in the shader to identify the offset into
* the UBO that we need to load for each pixel in the destination image
* (we would need to support all the possible copy formats we have above).
*/
VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = src_format,
.extent = { buf_width, buf_height, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkImage buffer_image;
VkResult result =
v3dv_CreateImage(_device, &image_info, &device->alloc, &buffer_image);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)buffer_image,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
/* Allocate and bind memory for the image */
VkDeviceMemory mem;
VkMemoryRequirements reqs;
v3dv_GetImageMemoryRequirements(_device, buffer_image, &reqs);
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = reqs.size,
.memoryTypeIndex = 0,
};
result = v3dv_AllocateMemory(_device, &alloc_info, &device->alloc, &mem);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)mem,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
result = v3dv_BindImageMemory(_device, buffer_image, mem, 0);
if (result != VK_SUCCESS)
return handled;
/* Upload buffer contents for the selected layer */
VkDeviceSize buffer_offset =
region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
const VkBufferImageCopy buffer_image_copy = {
.bufferOffset = buffer_offset,
.bufferRowLength = region->bufferRowLength / block_width,
.bufferImageHeight = region->bufferImageHeight / block_height,
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = { 0, 0, 0 },
.imageExtent = { buf_width, buf_height, 1 }
};
handled =
create_tiled_image_from_buffer(cmd_buffer,
v3dv_image_from_handle(buffer_image),
buffer, &buffer_image_copy);
if (!handled) {
/* This is unexpected, we should have setup the upload to be
* conformant to a TFU or TLB copy.
*/
unreachable("Unable to copy buffer to image through TLB");
return false;
}
/* Blit-copy the requested image extent from the buffer image to the
* destination image.
*
* Since we are copying, the blit must use the same format on the
* destination and source images to avoid format conversions. The
* only exception is copying stencil, which we upload to a R8UI source
* image, but that we need to blit to a S8D24 destination (the only
* stencil format we support).
*/
const VkImageBlit blit_region = {
.srcSubresource = {
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.srcOffsets = {
{ 0, 0, 0 },
{ region->imageExtent.width, region->imageExtent.height, 1 },
},
.dstSubresource = {
.aspectMask = aspect,
.mipLevel = region->imageSubresource.mipLevel,
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
.layerCount = 1,
},
.dstOffsets = {
{
region->imageOffset.x / block_width,
region->imageOffset.y / block_height,
region->imageOffset.z + i,
},
{
(region->imageOffset.x + region->imageExtent.width) / block_width,
(region->imageOffset.y + region->imageExtent.height) / block_height,
region->imageOffset.z + i + 1,
},
},
};
handled = blit_shader(cmd_buffer,
image, dst_format,
v3dv_image_from_handle(buffer_image), src_format,
cmask, NULL,
&blit_region, VK_FILTER_NEAREST, true);
if (!handled) {
/* This is unexpected, we should have a supported blit spec */
unreachable("Unable to blit buffer to destination image");
return false;
}
}
assert(handled);
return true;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy *region)
{
/* FIXME */
if (vk_format_is_depth_or_stencil(image->vk_format))
return false;
if (vk_format_is_compressed(image->vk_format))
return false;
if (image->tiling == VK_IMAGE_TILING_LINEAR)
return false;
uint32_t buffer_width, buffer_height;
if (region->bufferRowLength == 0)
buffer_width = region->imageExtent.width;
else
buffer_width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
buffer_height = region->imageExtent.height;
else
buffer_height = region->bufferImageHeight;
uint32_t buffer_stride = buffer_width * image->cpp;
uint32_t buffer_layer_stride = buffer_stride * buffer_height;
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
cmd_buffer, -1);
if (!job)
return true;
job->cpu.copy_buffer_to_image.image = image;
job->cpu.copy_buffer_to_image.buffer = buffer;
job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
job->cpu.copy_buffer_to_image.mip_level =
region->imageSubresource.mipLevel;
job->cpu.copy_buffer_to_image.base_layer =
region->imageSubresource.baseArrayLayer;
job->cpu.copy_buffer_to_image.layer_count = num_layers;
list_addtail(&job->list_link, &cmd_buffer->jobs);
return true;
}
void
v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, srcBuffer);
V3DV_FROM_HANDLE(v3dv_image, image, dstImage);
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
for (uint32_t i = 0; i < regionCount; i++) {
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &pRegions[i]))
continue;
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &pRegions[i]))
continue;
if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[i]))
continue;
if (copy_buffer_to_image_blit(cmd_buffer, image, buffer, &pRegions[i]))
continue;
unreachable("Unsupported buffer to image copy.");
}
}
static void
emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
uint32_t dst_mip_level,
uint32_t dst_layer,
struct v3dv_image *src,
uint32_t src_mip_level,
uint32_t src_layer,
uint32_t width,
uint32_t height)
{
const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
const struct v3d_resource_slice *dst_slice = &dst->slices[src_mip_level];
assert(dst->mem && dst->mem->bo);
const struct v3dv_bo *dst_bo = dst->mem->bo;
assert(src->mem && src->mem->bo);
const struct v3dv_bo *src_bo = src->mem->bo;
struct drm_v3d_submit_tfu tfu = {
.ios = (height << 16) | width,
.bo_handles = {
dst_bo->handle,
src != dst ? src_bo->handle : 0
},
};
const uint32_t src_offset =
src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
tfu.iia |= src_offset;
uint32_t icfg;
if (src_slice->tiling == VC5_TILING_RASTER) {
icfg = V3D_TFU_ICFG_FORMAT_RASTER;
} else {
icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
(src_slice->tiling - VC5_TILING_LINEARTILE);
}
tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
const uint32_t dst_offset =
dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
tfu.ioa |= dst_offset;
tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
(dst_slice->tiling - VC5_TILING_LINEARTILE)) <<
V3D_TFU_IOA_FORMAT_SHIFT;
tfu.icfg |= dst->format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
switch (src_slice->tiling) {
case VC5_TILING_UIF_NO_XOR:
case VC5_TILING_UIF_XOR:
tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
break;
case VC5_TILING_RASTER:
tfu.iis |= src_slice->stride / src->cpp;
break;
default:
break;
}
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
* OPAD field for the destination (how many extra UIF blocks beyond
* those necessary to cover the height).
*/
if (dst_slice->tiling == VC5_TILING_UIF_NO_XOR ||
dst_slice->tiling == VC5_TILING_UIF_XOR) {
uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
uint32_t implicit_padded_height = align(height, uif_block_h);
uint32_t icfg =
(dst_slice->padded_height - implicit_padded_height) / uif_block_h;
tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
}
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
}
static void
compute_blit_3d_layers(const VkOffset3D *offsets,
uint32_t *min_layer, uint32_t *max_layer,
bool *mirror_z);
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageBlit *region,
VkFilter filter)
{
/* FIXME? The v3d driver seems to ignore filtering completely! */
if (filter != VK_FILTER_NEAREST)
return false;
/* Format must match */
if (src->vk_format != dst->vk_format)
return false;
VkFormat vk_format = dst->vk_format;
const struct v3dv_format *format = dst->format;
/* Format must be supported for texturing */
if (!v3dv_tfu_supports_tex_format(&cmd_buffer->device->devinfo,
format->tex_type)) {
return false;
}
/* Only color formats */
if (vk_format_is_depth_or_stencil(vk_format))
return false;
#if 0
/* FIXME: Only 2D images? */
if (dst->type == VK_IMAGE_TYPE_2D || src->type == VK_IMAGE_TYPE_2D)
return false;
#endif
/* Destination can't be raster format */
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
if (dst->slices[dst_mip_level].tiling == VC5_TILING_RASTER)
return false;
/* Source region must start at (0,0) */
if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
return false;
/* Destination image must be complete */
if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
return false;
const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
if (region->dstOffsets[1].x < dst_width - 1||
region->dstOffsets[1].y < dst_height - 1) {
return false;
}
/* No scaling */
if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
region->srcOffsets[1].y != region->dstOffsets[1].y) {
return false;
}
if (dst->type == VK_IMAGE_TYPE_3D &&
region->srcOffsets[1].z != region->dstOffsets[1].z) {
return false;
}
/* Emit a TFU job for each layer to blit */
assert(region->dstSubresource.layerCount ==
region->srcSubresource.layerCount);
uint32_t min_dst_layer;
uint32_t max_dst_layer;
bool dst_mirror_z = false;
if (dst->type == VK_IMAGE_TYPE_3D) {
compute_blit_3d_layers(region->dstOffsets,
&min_dst_layer, &max_dst_layer,
&dst_mirror_z);
/* TFU can only do exact copies, so we can't handle mirroring. This checks
* mirroring in Z for 3D images, XY mirroring is already handled by earlier
* checks
*/
if (dst_mirror_z)
return false;
}
uint32_t min_src_layer;
uint32_t max_src_layer;
bool src_mirror_z = false;
if (src->type == VK_IMAGE_TYPE_3D) {
compute_blit_3d_layers(region->srcOffsets,
&min_src_layer, &max_src_layer,
&src_mirror_z);
if (src_mirror_z)
return false;
if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
return false;
}
const uint32_t layer_count = dst->type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.layerCount :
max_dst_layer - min_dst_layer;
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
for (uint32_t i = 0; i < layer_count; i++) {
emit_tfu_job(cmd_buffer,
dst, dst_mip_level, region->dstSubresource.baseArrayLayer + i,
src, src_mip_level, region->srcSubresource.baseArrayLayer + i,
dst_width, dst_height);
}
return true;
}
static bool
format_needs_software_int_clamp(VkFormat format)
{
switch (format) {
case VK_FORMAT_A2R10G10B10_UINT_PACK32:
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_UINT_PACK32:
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
return true;
default:
return false;
};
}
static void
get_blit_pipeline_cache_key(VkFormat dst_format,
VkFormat src_format,
VkColorComponentFlags cmask,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
uint8_t *key)
{
memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
uint32_t *p = (uint32_t *) key;
*p = dst_format;
p++;
/* Generally, when blitting from a larger format to a smaller format
* the hardware takes care of clamping the source to the RT range.
* Specifically, for integer formats, this is done by using
* V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
* clamps to the bit-size of the render type, and some formats, such as
* rgb10a2_uint have a 16-bit type, so it won't do what we need and we
* require to clamp in software. In these cases, we need to amend the blit
* shader with clamp code that depends on both the src and dst formats, so
* we need the src format to be part of the key.
*/
*p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
p++;
*p = cmask;
p++;
*p = (dst_samples << 8) | src_samples;
p++;
assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
}
static bool
create_blit_pipeline_layout(struct v3dv_device *device,
VkDescriptorSetLayout *descriptor_set_layout,
VkPipelineLayout *pipeline_layout)
{
VkResult result;
if (*descriptor_set_layout == 0) {
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 1,
.pBindings = &descriptor_set_layout_binding,
};
result =
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
&descriptor_set_layout_info,
&device->alloc,
descriptor_set_layout);
if (result != VK_SUCCESS)
return false;
}
assert(*pipeline_layout == 0);
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges =
&(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
};
result =
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
&pipeline_layout_info,
&device->alloc,
pipeline_layout);
return result == VK_SUCCESS;
}
static bool
create_blit_render_pass(struct v3dv_device *device,
VkFormat dst_format,
VkFormat src_format,
VkRenderPass *pass_load,
VkRenderPass *pass_no_load)
{
const bool is_color_blit = vk_format_is_color(dst_format);
/* Attachment load operation is specified below */
VkAttachmentDescription att = {
.format = dst_format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkAttachmentReference att_ref = {
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
VkSubpassDescription subpass = {
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.colorAttachmentCount = is_color_blit ? 1 : 0,
.pColorAttachments = is_color_blit ? &att_ref : NULL,
.pResolveAttachments = NULL,
.pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
.preserveAttachmentCount = 0,
.pPreserveAttachments = NULL,
};
VkRenderPassCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &att,
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 0,
.pDependencies = NULL,
};
VkResult result;
att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
&info, &device->alloc, pass_load);
if (result != VK_SUCCESS)
return false;
att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
&info, &device->alloc, pass_no_load);
return result == VK_SUCCESS;
}
static nir_ssa_def *
gen_rect_vertices(nir_builder *b)
{
nir_intrinsic_instr *vertex_id =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_load_vertex_id);
nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
nir_builder_instr_insert(b, &vertex_id->instr);
/* vertex 0: -1.0, -1.0
* vertex 1: -1.0, 1.0
* vertex 2: 1.0, -1.0
* vertex 3: 1.0, 1.0
*
* so:
*
* channel 0 is vertex_id < 2 ? -1.0 : 1.0
* channel 1 is vertex id & 1 ? 1.0 : -1.0
*/
nir_ssa_def *one = nir_imm_int(b, 1);
nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2));
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one);
nir_ssa_def *comp[4];
comp[0] = nir_bcsel(b, c0cmp,
nir_imm_float(b, -1.0f),
nir_imm_float(b, 1.0f));
comp[1] = nir_bcsel(b, c1cmp,
nir_imm_float(b, 1.0f),
nir_imm_float(b, -1.0f));
comp[2] = nir_imm_float(b, 0.0f);
comp[3] = nir_imm_float(b, 1.0f);
return nir_vec(b, comp, 4);
}
static nir_ssa_def *
gen_tex_coords(nir_builder *b)
{
nir_intrinsic_instr *tex_box =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
tex_box->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_intrinsic_set_base(tex_box, 0);
nir_intrinsic_set_range(tex_box, 16);
tex_box->num_components = 4;
nir_ssa_dest_init(&tex_box->instr, &tex_box->dest, 4, 32, "tex_box");
nir_builder_instr_insert(b, &tex_box->instr);
nir_intrinsic_instr *tex_z =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
tex_z->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_intrinsic_set_base(tex_z, 16);
nir_intrinsic_set_range(tex_z, 4);
tex_z->num_components = 1;
nir_ssa_dest_init(&tex_z->instr, &tex_z->dest, 1, 32, "tex_z");
nir_builder_instr_insert(b, &tex_z->instr);
nir_intrinsic_instr *vertex_id =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_vertex_id);
nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
nir_builder_instr_insert(b, &vertex_id->instr);
/* vertex 0: src0_x, src0_y
* vertex 1: src0_x, src1_y
* vertex 2: src1_x, src0_y
* vertex 3: src1_x, src1_y
*
* So:
*
* channel 0 is vertex_id < 2 ? src0_x : src1_x
* channel 1 is vertex id & 1 ? src1_y : src0_y
*/
nir_ssa_def *one = nir_imm_int(b, 1);
nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2));
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one);
nir_ssa_def *comp[4];
comp[0] = nir_bcsel(b, c0cmp,
nir_channel(b, &tex_box->dest.ssa, 0),
nir_channel(b, &tex_box->dest.ssa, 2));
comp[1] = nir_bcsel(b, c1cmp,
nir_channel(b, &tex_box->dest.ssa, 3),
nir_channel(b, &tex_box->dest.ssa, 1));
comp[2] = &tex_z->dest.ssa;
comp[3] = nir_imm_float(b, 1.0f);
return nir_vec(b, comp, 4);
}
static nir_ssa_def *
build_nir_tex_op_read(struct nir_builder *b,
nir_ssa_def *tex_pos,
enum glsl_base_type tex_type,
enum glsl_sampler_dim dim)
{
assert(dim != GLSL_SAMPLER_DIM_MS);
const struct glsl_type *sampler_type =
glsl_sampler_type(dim, false, false, tex_type);
nir_variable *sampler =
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
tex->sampler_dim = dim;
tex->op = nir_texop_tex;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(tex_pos);
tex->src[1].src_type = nir_tex_src_texture_deref;
tex->src[1].src = nir_src_for_ssa(tex_deref);
tex->src[2].src_type = nir_tex_src_sampler_deref;
tex->src[2].src = nir_src_for_ssa(tex_deref);
tex->dest_type =
nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(tex_type));
tex->is_array = glsl_sampler_type_is_array(sampler_type);
tex->coord_components = tex_pos->num_components;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(b, &tex->instr);
return &tex->dest.ssa;
}
static nir_ssa_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
nir_variable *sampler,
nir_ssa_def *tex_deref,
enum glsl_base_type tex_type,
nir_ssa_def *tex_pos,
nir_ssa_def *sample_idx)
{
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(tex_pos);
tex->src[1].src_type = nir_tex_src_texture_deref;
tex->src[1].src = nir_src_for_ssa(tex_deref);
tex->src[2].src_type = nir_tex_src_sampler_deref;
tex->src[2].src = nir_src_for_ssa(tex_deref);
tex->src[3].src_type = nir_tex_src_ms_index;
tex->src[3].src = nir_src_for_ssa(sample_idx);
tex->dest_type =
nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(tex_type));
tex->is_array = false;
tex->coord_components = tex_pos->num_components;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(b, &tex->instr);
return &tex->dest.ssa;
}
/* Fetches all samples at the given position and averages them */
static nir_ssa_def *
build_nir_tex_op_ms_resolve(struct nir_builder *b,
nir_ssa_def *tex_pos,
enum glsl_base_type tex_type,
VkSampleCountFlagBits src_samples)
{
assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
const struct glsl_type *sampler_type =
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
nir_variable *sampler =
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
const bool is_int = glsl_base_type_is_integer(tex_type);
nir_ssa_def *tmp;
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
for (uint32_t i = 0; i < src_samples; i++) {
nir_ssa_def *s =
build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
tex_type, tex_pos,
nir_imm_int(b, i));
/* For integer formats, the multisample resolve operation is expected to
* return one of the samples, we just return the first one.
*/
if (is_int)
return s;
tmp = i == 0 ? s : nir_fadd(b, tmp, s);
}
assert(!is_int);
return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
}
/* Fetches the current sample (gl_SampleID) at the given position */
static nir_ssa_def *
build_nir_tex_op_ms_read(struct nir_builder *b,
nir_ssa_def *tex_pos,
enum glsl_base_type tex_type)
{
const struct glsl_type *sampler_type =
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
nir_variable *sampler =
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
tex_type, tex_pos,
nir_load_sample_id(b));
}
static nir_ssa_def *
build_nir_tex_op(struct nir_builder *b,
struct v3dv_device *device,
nir_ssa_def *tex_pos,
enum glsl_base_type tex_type,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
enum glsl_sampler_dim dim)
{
switch (dim) {
case GLSL_SAMPLER_DIM_MS:
assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
/* For multisampled texture sources we need to use fetching instead of
* normalized texture coordinates. We already configured our blit
* coordinates to be in texel units, but here we still need to convert
* them from floating point to integer.
*/
tex_pos = nir_f2i32(b, tex_pos);
if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
else
return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
default:
assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
}
}
static nir_shader *
get_blit_vs()
{
nir_builder b;
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
b.shader->info.name = ralloc_strdup(b.shader, "meta blit vs");
const struct glsl_type *vec4 = glsl_vec4_type();
nir_variable *vs_out_pos =
nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_variable *vs_out_tex_coord =
nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
nir_ssa_def *pos = gen_rect_vertices(&b);
nir_store_var(&b, vs_out_pos, pos, 0xf);
nir_ssa_def *tex_coord = gen_tex_coords(&b);
nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
return b.shader;
}
static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
{
switch (sampler_dim) {
case GLSL_SAMPLER_DIM_1D: return 0x1;
case GLSL_SAMPLER_DIM_2D: return 0x3;
case GLSL_SAMPLER_DIM_MS: return 0x3;
case GLSL_SAMPLER_DIM_3D: return 0x7;
default:
unreachable("invalid sampler dim");
};
}
static nir_shader *
get_color_blit_fs(struct v3dv_device *device,
VkFormat dst_format,
VkFormat src_format,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
enum glsl_sampler_dim sampler_dim)
{
nir_builder b;
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
b.shader->info.name = ralloc_strdup(b.shader, "meta blit fs");
const struct glsl_type *vec4 = glsl_vec4_type();
nir_variable *fs_in_tex_coord =
nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
const struct glsl_type *fs_out_type =
vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
glsl_vec4_type();
enum glsl_base_type src_base_type =
vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
GLSL_TYPE_FLOAT;
nir_variable *fs_out_color =
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
fs_out_color->data.location = FRAG_RESULT_DATA0;
nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
tex_coord = nir_channels(&b, tex_coord, channel_mask);
nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
dst_samples, src_samples, sampler_dim);
/* For integer textures, if the bit-size of the destination is too small to
* hold source value, Vulkan (CTS) expects the implementation to clamp to the
* maximum value the destination can hold. The hardware can clamp to the
* render target type, which usually matches the component bit-size, but
* there are some cases that won't match, such as rgb10a2, which has a 16-bit
* render target type, so in these cases we need to clamp manually.
*/
if (format_needs_software_int_clamp(dst_format)) {
assert(vk_format_is_int(dst_format));
enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
nir_ssa_def *c[4];
for (uint32_t i = 0; i < 4; i++) {
c[i] = nir_channel(&b, color, i);
const uint32_t src_bit_size =
util_format_get_component_bits(src_pformat,
UTIL_FORMAT_COLORSPACE_RGB,
i);
const uint32_t dst_bit_size =
util_format_get_component_bits(dst_pformat,
UTIL_FORMAT_COLORSPACE_RGB,
i);
if (dst_bit_size >= src_bit_size)
continue;
if (util_format_is_pure_uint(dst_pformat)) {
nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
c[i] = nir_umin(&b, c[i], max);
} else {
nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
}
}
color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
}
nir_store_var(&b, fs_out_color, color, 0xf);
return b.shader;
}
static bool
create_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
struct nir_shader *vs_nir,
struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
const VkPipelineColorBlendStateCreateInfo *cb_state,
const VkPipelineMultisampleStateCreateInfo *ms_state,
const VkPipelineLayout layout,
VkPipeline *pipeline)
{
struct v3dv_shader_module vs_m;
struct v3dv_shader_module fs_m;
v3dv_shader_module_internal_init(&vs_m, vs_nir);
v3dv_shader_module_internal_init(&fs_m, fs_nir);
VkPipelineShaderStageCreateInfo stages[2] = {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = v3dv_shader_module_to_handle(&vs_m),
.pName = "main",
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = v3dv_shader_module_to_handle(&fs_m),
.pName = "main",
},
};
VkGraphicsPipelineCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = 2,
.pStages = stages,
.pVertexInputState = vi_state,
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = false,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.rasterizerDiscardEnable = false,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
.depthBiasEnable = false,
},
.pMultisampleState = ms_state,
.pDepthStencilState = ds_state,
.pColorBlendState = cb_state,
/* The meta clear pipeline declares all state as dynamic.
* As a consequence, vkCmdBindPipeline writes no dynamic state
* to the cmd buffer. Therefore, at the end of the meta clear,
* we need only restore dynamic state that was vkCmdSet.
*/
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 6,
.pDynamicStates = (VkDynamicState[]) {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_LINE_WIDTH,
},
},
.flags = 0,
.layout = layout,
.renderPass = v3dv_render_pass_to_handle(pass),
.subpass = 0,
};
VkResult result =
v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
VK_NULL_HANDLE,
1, &info,
&device->alloc,
pipeline);
ralloc_free(vs_nir);
ralloc_free(fs_nir);
return result == VK_SUCCESS;
}
static enum glsl_sampler_dim
get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
{
/* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
*
* "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
* VK_IMAGE_TYPE_2D, ..."
*/
assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
switch (type) {
case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
case VK_IMAGE_TYPE_2D:
return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
GLSL_SAMPLER_DIM_MS;
case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
default:
unreachable("Invalid image type");
}
}
static bool
create_blit_pipeline(struct v3dv_device *device,
VkFormat dst_format,
VkFormat src_format,
VkColorComponentFlags cmask,
VkImageType src_type,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
VkRenderPass _pass,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
{
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
/* We always rewrite depth/stencil blits to compatible color blits */
assert(vk_format_is_color(dst_format));
assert(vk_format_is_color(src_format));
const enum glsl_sampler_dim sampler_dim =
get_sampler_dim(src_type, src_samples);
nir_shader *vs_nir = get_blit_vs();
nir_shader *fs_nir =
get_color_blit_fs(device, dst_format, src_format,
dst_samples, src_samples, sampler_dim);
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
};
VkPipelineDepthStencilStateCreateInfo ds_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
};
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
.blendEnable = false,
.colorWriteMask = cmask,
};
const VkPipelineColorBlendStateCreateInfo cb_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = false,
.attachmentCount = 1,
.pAttachments = blend_att_state
};
const VkPipelineMultisampleStateCreateInfo ms_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.rasterizationSamples = dst_samples,
.sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
.pSampleMask = NULL,
.alphaToCoverageEnable = false,
.alphaToOneEnable = false,
};
return create_pipeline(device,
pass,
vs_nir, fs_nir,
&vi_state,
&ds_state,
&cb_state,
&ms_state,
pipeline_layout,
pipeline);
}
/**
* Return a pipeline suitable for blitting the requested aspect given the
* destination and source formats.
*/
static bool
get_blit_pipeline(struct v3dv_device *device,
VkFormat dst_format,
VkFormat src_format,
VkColorComponentFlags cmask,
VkImageType src_type,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
struct v3dv_meta_blit_pipeline **pipeline)
{
bool ok = true;
mtx_lock(&device->meta.mtx);
if (!device->meta.blit.playout) {
ok = create_blit_pipeline_layout(device,
&device->meta.blit.dslayout,
&device->meta.blit.playout);
}
mtx_unlock(&device->meta.mtx);
if (!ok)
return false;
uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
get_blit_pipeline_cache_key(dst_format, src_format, cmask,
dst_samples, src_samples, key);
mtx_lock(&device->meta.mtx);
struct hash_entry *entry =
_mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
if (entry) {
mtx_unlock(&device->meta.mtx);
*pipeline = entry->data;
return true;
}
*pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (*pipeline == NULL)
goto fail;
ok = create_blit_render_pass(device, dst_format, src_format,
&(*pipeline)->pass,
&(*pipeline)->pass_no_load);
if (!ok)
goto fail;
/* Create the pipeline using one of the render passes, they are both
* compatible, so we don't care which one we use here.
*/
ok = create_blit_pipeline(device,
dst_format,
src_format,
cmask,
src_type,
dst_samples,
src_samples,
(*pipeline)->pass,
device->meta.blit.playout,
&(*pipeline)->pipeline);
if (!ok)
goto fail;
memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
_mesa_hash_table_insert(device->meta.blit.cache[src_type],
&(*pipeline)->key, *pipeline);
mtx_unlock(&device->meta.mtx);
return true;
fail:
mtx_unlock(&device->meta.mtx);
VkDevice _device = v3dv_device_to_handle(device);
if (*pipeline) {
if ((*pipeline)->pass)
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->alloc);
if ((*pipeline)->pass_no_load)
v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->alloc);
if ((*pipeline)->pipeline)
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc);
vk_free(&device->alloc, *pipeline);
*pipeline = NULL;
}
return false;
}
static void
compute_blit_box(const VkOffset3D *offsets,
uint32_t image_w, uint32_t image_h,
uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
bool *mirror_x, bool *mirror_y)
{
if (offsets[1].x >= offsets[0].x) {
*mirror_x = false;
*x = MIN2(offsets[0].x, image_w - 1);
*w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
} else {
*mirror_x = true;
*x = MIN2(offsets[1].x, image_w - 1);
*w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
}
if (offsets[1].y >= offsets[0].y) {
*mirror_y = false;
*y = MIN2(offsets[0].y, image_h - 1);
*h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
} else {
*mirror_y = true;
*y = MIN2(offsets[1].y, image_h - 1);
*h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
}
}
static void
compute_blit_3d_layers(const VkOffset3D *offsets,
uint32_t *min_layer, uint32_t *max_layer,
bool *mirror_z)
{
if (offsets[1].z >= offsets[0].z) {
*mirror_z = false;
*min_layer = offsets[0].z;
*max_layer = offsets[1].z;
} else {
*mirror_z = true;
*min_layer = offsets[1].z;
*max_layer = offsets[0].z;
}
}
static void
ensure_meta_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer->meta.blit.dspool)
return;
/*
* FIXME: the size for the descriptor pool is based on what it is needed
* for the tests/programs that we tested. It would be good to try to use a
* smaller value, and create descriptor pool on demand as we find ourselves
* running out of pool space.
*/
const uint32_t POOL_DESCRIPTOR_COUNT = 1024;
VkDescriptorPoolSize pool_size = {
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = POOL_DESCRIPTOR_COUNT,
};
VkDescriptorPoolCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = POOL_DESCRIPTOR_COUNT,
.poolSizeCount = 1,
.pPoolSizes = &pool_size,
.flags = 0,
};
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
&info,
&cmd_buffer->device->alloc,
&cmd_buffer->meta.blit.dspool);
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*
* The caller can specify the channels on the destination to be written via the
* cmask parameter (which can be 0 to default to all channels), as well as a
* swizzle to apply to the source via the cswizzle parameter (which can be NULL
* to use the default identity swizzle).
*/
static bool
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
VkFormat dst_format,
struct v3dv_image *src,
VkFormat src_format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
const VkImageBlit *_region,
VkFilter filter,
bool dst_is_padded_image)
{
bool handled = true;
/* We don't support rendering to linear depth/stencil, this should have
* been rewritten to a compatible color blit by the caller.
*/
assert(dst->tiling != VK_IMAGE_TILING_LINEAR ||
!vk_format_is_depth_or_stencil(dst_format));
/* Can't sample from linear images */
if (src->tiling == VK_IMAGE_TILING_LINEAR && src->type != VK_IMAGE_TYPE_1D)
return false;
VkImageBlit region = *_region;
/* Rewrite combined D/S blits to compatible color blits */
if (vk_format_is_depth_or_stencil(dst_format)) {
assert(src_format == dst_format);
assert(cmask == 0);
switch(dst_format) {
case VK_FORMAT_D16_UNORM:
dst_format = VK_FORMAT_R16_UINT;
break;
case VK_FORMAT_D32_SFLOAT:
dst_format = VK_FORMAT_R32_UINT;
break;
case VK_FORMAT_X8_D24_UNORM_PACK32:
case VK_FORMAT_D24_UNORM_S8_UINT:
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
cmask |= VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT;
}
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
cmask |= VK_COLOR_COMPONENT_R_BIT;
}
dst_format = VK_FORMAT_R8G8B8A8_UINT;
break;
default:
unreachable("Unsupported depth/stencil format");
};
src_format = dst_format;
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT;
if (cmask == 0)
cmask = full_cmask;
VkComponentMapping ident_swizzle = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
};
if (!cswizzle)
cswizzle = &ident_swizzle;
/* When we get here from a copy between compressed / uncompressed images
* we choose to specify the destination blit region based on the size
* semantics of the source image of the copy (see copy_image_blit), so we
* need to apply those same semantics here when we compute the size of the
* destination image level.
*/
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
const uint32_t dst_level_w =
u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w),
region.dstSubresource.mipLevel);
const uint32_t dst_level_h =
u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h),
region.dstSubresource.mipLevel);
const uint32_t src_level_w =
u_minify(src->extent.width, region.srcSubresource.mipLevel);
const uint32_t src_level_h =
u_minify(src->extent.height, region.srcSubresource.mipLevel);
const uint32_t src_level_d =
u_minify(src->extent.depth, region.srcSubresource.mipLevel);
uint32_t dst_x, dst_y, dst_w, dst_h;
bool dst_mirror_x, dst_mirror_y;
compute_blit_box(region.dstOffsets,
dst_level_w, dst_level_h,
&dst_x, &dst_y, &dst_w, &dst_h,
&dst_mirror_x, &dst_mirror_y);
uint32_t src_x, src_y, src_w, src_h;
bool src_mirror_x, src_mirror_y;
compute_blit_box(region.srcOffsets,
src_level_w, src_level_h,
&src_x, &src_y, &src_w, &src_h,
&src_mirror_x, &src_mirror_y);
uint32_t min_dst_layer;
uint32_t max_dst_layer;
bool dst_mirror_z;
if (dst->type != VK_IMAGE_TYPE_3D) {
min_dst_layer = region.dstSubresource.baseArrayLayer;
max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
} else {
compute_blit_3d_layers(region.dstOffsets,
&min_dst_layer, &max_dst_layer,
&dst_mirror_z);
}
uint32_t min_src_layer;
uint32_t max_src_layer;
bool src_mirror_z;
if (src->type != VK_IMAGE_TYPE_3D) {
min_src_layer = region.srcSubresource.baseArrayLayer;
max_src_layer = min_src_layer + region.srcSubresource.layerCount;
} else {
compute_blit_3d_layers(region.srcOffsets,
&min_src_layer, &max_src_layer,
&src_mirror_z);
}
uint32_t layer_count = max_dst_layer - min_dst_layer;
/* Translate source blit coordinates to normalized texture coordinates for
* single sampled textures. For multisampled textures we require
* unnormalized coordinates, since we can only do texelFetch on them.
*/
float coords[4] = {
(float)src_x,
(float)src_y,
(float)(src_x + src_w),
(float)(src_y + src_h),
};
if (src->samples == VK_SAMPLE_COUNT_1_BIT) {
coords[0] /= (float)src_level_w;
coords[1] /= (float)src_level_h;
coords[2] /= (float)src_level_w;
coords[3] /= (float)src_level_h;
}
/* Handle mirroring */
const bool mirror_x = dst_mirror_x != src_mirror_x;
const bool mirror_y = dst_mirror_y != src_mirror_y;
const bool mirror_z = dst_mirror_z != src_mirror_z;
float tex_coords[5] = {
!mirror_x ? coords[0] : coords[2],
!mirror_y ? coords[1] : coords[3],
!mirror_x ? coords[2] : coords[0],
!mirror_y ? coords[3] : coords[1],
/* Z coordinate for 3D blit sources, to be filled for each
* destination layer
*/
0.0f
};
/* For blits from 3D images we also need to compute the slice coordinate to
* sample from, which will change for each layer in the destination.
* Compute the step we should increase for each iteration.
*/
const float src_z_step =
(float)(max_src_layer - min_src_layer) / (float)layer_count;
/* Create the descriptor pool for the source blit texture if needed */
ensure_meta_blit_descriptor_pool(cmd_buffer);
/* Get the blit pipeline */
struct v3dv_meta_blit_pipeline *pipeline = NULL;
bool ok = get_blit_pipeline(cmd_buffer->device,
dst_format, src_format, cmask, src->type,
dst->samples, src->samples,
&pipeline);
if (!ok)
return handled;
assert(pipeline && pipeline->pipeline &&
pipeline->pass && pipeline->pass_no_load);
struct v3dv_device *device = cmd_buffer->device;
assert(cmd_buffer->meta.blit.dspool);
assert(device->meta.blit.dslayout);
/* Push command buffer state before starting meta operation */
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
/* Setup framebuffer */
VkDevice _device = v3dv_device_to_handle(device);
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
VkResult result;
uint32_t dirty_dynamic_state = 0;
VkImageAspectFlags aspects = region.dstSubresource.aspectMask;
for (uint32_t i = 0; i < layer_count; i++) {
VkImageViewCreateInfo dst_image_view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = v3dv_image_to_handle(dst),
.viewType = v3dv_image_type_to_view_type(dst->type),
.format = dst_format,
.subresourceRange = {
.aspectMask = aspects,
.baseMipLevel = region.dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = min_dst_layer + i,
.layerCount = 1
},
};
VkImageView dst_image_view;
result = v3dv_CreateImageView(_device, &dst_image_view_info,
&device->alloc, &dst_image_view);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)dst_image_view,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
VkFramebufferCreateInfo fb_info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = pipeline->pass,
.attachmentCount = 1,
.pAttachments = &dst_image_view,
.width = dst_x + dst_w,
.height = dst_y + dst_h,
.layers = 1,
};
VkFramebuffer fb;
result = v3dv_CreateFramebuffer(_device, &fb_info,
&cmd_buffer->device->alloc, &fb);
if (result != VK_SUCCESS)
goto fail;
struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
fb_info.height == dst_level_h &&
dst_is_padded_image;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)fb,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
/* Setup descriptor set for blit source texture. We don't have to
* register the descriptor as a private command buffer object since
* all descriptors will be freed automatically with the descriptor
* pool.
*/
VkDescriptorSet set;
VkDescriptorSetAllocateInfo set_alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = cmd_buffer->meta.blit.dspool,
.descriptorSetCount = 1,
.pSetLayouts = &device->meta.blit.dslayout,
};
result = v3dv_AllocateDescriptorSets(_device, &set_alloc_info, &set);
if (result != VK_SUCCESS)
goto fail;
VkSamplerCreateInfo sampler_info = {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = filter,
.minFilter = filter,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
};
VkSampler sampler;
result = v3dv_CreateSampler(_device, &sampler_info, &device->alloc,
&sampler);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)sampler,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
VkImageViewCreateInfo src_image_view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = v3dv_image_to_handle(src),
.viewType = v3dv_image_type_to_view_type(src->type),
.format = src_format,
.components = *cswizzle,
.subresourceRange = {
.aspectMask = aspects,
.baseMipLevel = region.srcSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer =
src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
.layerCount = 1
},
};
VkImageView src_image_view;
result = v3dv_CreateImageView(_device, &src_image_view_info,
&device->alloc, &src_image_view);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)src_image_view,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
VkDescriptorImageInfo image_info = {
.sampler = sampler,
.imageView = src_image_view,
.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
};
VkWriteDescriptorSet write = {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
};
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
/* If the region we are about to blit is tile-aligned, then we can
* use the render pass version that won't pre-load the tile buffer
* with the dst image contents before the blit. The exception is when we
* don't have a full color mask, since in that case we need to preserve
* the original value of some of the color components.
*/
const VkRect2D render_area = {
.offset = { dst_x, dst_y },
.extent = { dst_w, dst_h },
};
struct v3dv_render_pass *pipeline_pass =
v3dv_render_pass_from_handle(pipeline->pass);
bool can_skip_tlb_load =
cmask == full_cmask &&
v3dv_subpass_area_is_tile_aligned(&render_area, framebuffer,
pipeline_pass, 0);
/* Record blit */
VkRenderPassBeginInfo rp_info = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
pipeline->pass,
.framebuffer = fb,
.renderArea = render_area,
.clearValueCount = 0,
};
v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
struct v3dv_job *job = cmd_buffer->state.job;
if (!job)
goto fail;
/* For 3D blits we need to compute the source slice to blit from (the Z
* coordinate of the source sample operation). We want to choose this
* based on the ratio of the depth of the source and the destination
* images, picking the coordinate in the middle of each step.
*/
if (src->type == VK_IMAGE_TYPE_3D) {
tex_coords[4] =
!mirror_z ?
(min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
(max_dst_layer - (i + 0.5f) * src_z_step) / (float)src_level_d ;
}
v3dv_CmdPushConstants(_cmd_buffer,
device->meta.blit.playout,
VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
&tex_coords);
v3dv_CmdBindPipeline(_cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline->pipeline);
v3dv_CmdBindDescriptorSets(_cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
device->meta.blit.playout,
0, 1, &set,
0, NULL);
const VkViewport viewport = {
.x = dst_x,
.y = dst_y,
.width = dst_w,
.height = dst_h,
.minDepth = 0.0f,
.maxDepth = 1.0f
};
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
const VkRect2D scissor = {
.offset = { dst_x, dst_y },
.extent = { dst_w, dst_h }
};
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
v3dv_CmdEndRenderPass(_cmd_buffer);
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
}
fail:
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
return handled;
}
void
v3dv_CmdBlitImage(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageBlit* pRegions,
VkFilter filter)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
/* This command can only happen outside a render pass */
assert(cmd_buffer->state.pass == NULL);
assert(cmd_buffer->state.job == NULL);
/* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT &&
src->samples == VK_SAMPLE_COUNT_1_BIT);
for (uint32_t i = 0; i < regionCount; i++) {
if (blit_tfu(cmd_buffer, dst, src, &pRegions[i], filter))
continue;
if (blit_shader(cmd_buffer,
dst, dst->vk_format,
src, src->vk_format,
0, NULL,
&pRegions[i], filter, true)) {
continue;
}
unreachable("Unsupported blit operation");
}
}
static void
emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_image *dst,
struct v3dv_image *src,
uint32_t layer,
const VkImageResolve *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *srcrsc = &region->srcSubresource;
assert((src->type != VK_IMAGE_TYPE_3D && layer < srcrsc->layerCount) ||
layer < src->extent.depth);
emit_image_load(cl, framebuffer, src, srcrsc->aspectMask,
srcrsc->baseArrayLayer + layer, srcrsc->mipLevel,
false, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
const VkImageSubresourceLayers *dstrsc = &region->dstSubresource;
assert((dst->type != VK_IMAGE_TYPE_3D && layer < dstrsc->layerCount) ||
layer < dst->extent.depth);
emit_image_store(cl, framebuffer, dst, dstrsc->aspectMask,
dstrsc->baseArrayLayer + layer, dstrsc->mipLevel,
false, false, true);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_resolve_image_layer(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct framebuffer_data *framebuffer,
uint32_t layer,
const VkImageResolve *region)
{
emit_frame_setup(job, layer, NULL);
emit_resolve_image_layer_per_tile_list(job, framebuffer,
dst, src, layer, region);
emit_supertile_coordinates(job, framebuffer);
}
static void
emit_resolve_image_rcl(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct framebuffer_data *framebuffer,
const VkImageResolve *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
static bool
resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageResolve *region)
{
if (!can_use_tlb(src, &region->srcOffset, NULL) ||
!can_use_tlb(dst, &region->dstOffset, NULL)) {
return false;
}
if (!v3dv_format_supports_tlb_resolve(src->format))
return false;
const VkFormat fb_format = src->vk_format;
uint32_t num_layers;
if (dst->type != VK_IMAGE_TYPE_3D)
num_layers = region->dstSubresource.layerCount;
else
num_layers = region->extent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
uint32_t internal_type, internal_bpp;
get_internal_type_bpp_for_image_aspects(fb_format,
region->srcSubresource.aspectMask,
&internal_type, &internal_bpp);
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, true);
struct framebuffer_data framebuffer;
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
&job->frame_tiling);
v3dv_job_emit_binning_flush(job);
emit_resolve_image_rcl(job, dst, src, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
}
static bool
resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageResolve *region)
{
const VkImageBlit blit_region = {
.srcSubresource = region->srcSubresource,
.srcOffsets = {
region->srcOffset,
{
region->srcOffset.x + region->extent.width,
region->srcOffset.y + region->extent.height,
}
},
.dstSubresource = region->dstSubresource,
.dstOffsets = {
region->dstOffset,
{
region->dstOffset.x + region->extent.width,
region->dstOffset.y + region->extent.height,
}
},
};
return blit_shader(cmd_buffer,
dst, dst->vk_format,
src, src->vk_format,
0, NULL,
&blit_region, VK_FILTER_NEAREST, true);
}
void
v3dv_CmdResolveImage(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageResolve *pRegions)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
/* This command can only happen outside a render pass */
assert(cmd_buffer->state.pass == NULL);
assert(cmd_buffer->state.job == NULL);
assert(src->samples == VK_SAMPLE_COUNT_4_BIT);
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
for (uint32_t i = 0; i < regionCount; i++) {
if (resolve_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
continue;
if (resolve_image_blit(cmd_buffer, dst, src, &pRegions[i]))
continue;
unreachable("Unsupported multismaple resolve operation");
}
}