| /* |
| * Copyright © 2019 Raspberry Pi |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "v3dv_private.h" |
| |
| #include "compiler/nir/nir_builder.h" |
| #include "broadcom/cle/v3dx_pack.h" |
| #include "vk_format_info.h" |
| #include "util/u_pack_color.h" |
| |
| static inline bool |
| can_use_tlb(struct v3dv_image *image, |
| const VkOffset3D *offset, |
| VkFormat *compat_format); |
| |
| /** |
| * Copy operations implemented in this file don't operate on a framebuffer |
| * object provided by the user, however, since most use the TLB for this, |
| * we still need to have some representation of the framebuffer. For the most |
| * part, the job's frame tiling information is enough for this, however we |
| * still need additional information such us the internal type of our single |
| * render target, so we use this auxiliary struct to pass that information |
| * around. |
| */ |
| struct framebuffer_data { |
| /* The internal type of the single render target */ |
| uint32_t internal_type; |
| |
| /* Supertile coverage */ |
| uint32_t min_x_supertile; |
| uint32_t min_y_supertile; |
| uint32_t max_x_supertile; |
| uint32_t max_y_supertile; |
| |
| /* Format info */ |
| VkFormat vk_format; |
| const struct v3dv_format *format; |
| }; |
| |
| static void |
| setup_framebuffer_data(struct framebuffer_data *fb, |
| VkFormat vk_format, |
| uint32_t internal_type, |
| const struct v3dv_frame_tiling *tiling) |
| { |
| fb->internal_type = internal_type; |
| |
| /* Supertile coverage always starts at 0,0 */ |
| uint32_t supertile_w_in_pixels = |
| tiling->tile_width * tiling->supertile_width; |
| uint32_t supertile_h_in_pixels = |
| tiling->tile_height * tiling->supertile_height; |
| |
| fb->min_x_supertile = 0; |
| fb->min_y_supertile = 0; |
| fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels; |
| fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels; |
| |
| fb->vk_format = vk_format; |
| fb->format = v3dv_get_format(vk_format); |
| } |
| |
| /* This chooses a tile buffer format that is appropriate for the copy operation. |
| * Typically, this is the image render target type, however, if we are copying |
| * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so |
| * we need to load and store to/from a tile color buffer using a compatible |
| * color format. |
| */ |
| static uint32_t |
| choose_tlb_format(struct framebuffer_data *framebuffer, |
| VkImageAspectFlags aspect, |
| bool for_store, |
| bool is_copy_to_buffer, |
| bool is_copy_from_buffer) |
| { |
| if (is_copy_to_buffer || is_copy_from_buffer) { |
| switch (framebuffer->vk_format) { |
| case VK_FORMAT_D16_UNORM: |
| return V3D_OUTPUT_IMAGE_FORMAT_R16UI; |
| case VK_FORMAT_D32_SFLOAT: |
| return V3D_OUTPUT_IMAGE_FORMAT_R32F; |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| /* When storing the stencil aspect of a combined depth/stencil image |
| * to a buffer, the Vulkan spec states that the output buffer must |
| * have packed stencil values, so we choose an R8UI format for our |
| * store outputs. For the load input we still want RGBA8UI since the |
| * source image contains 4 channels (including the 3 channels |
| * containing the 24-bit depth value). |
| * |
| * When loading the stencil aspect of a combined depth/stencil image |
| * from a buffer, we read packed 8-bit stencil values from the buffer |
| * that we need to put into the LSB of the 32-bit format (the R |
| * channel), so we use R8UI. For the store, if we used R8UI then we |
| * would write 8-bit stencil values consecutively over depth channels, |
| * so we need to use RGBA8UI. This will write each stencil value in |
| * its correct position, but will overwrite depth values (channels G |
| * B,A) with undefined values. To fix this, we will have to restore |
| * the depth aspect from the Z tile buffer, which we should pre-load |
| * from the image before the store). |
| */ |
| if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; |
| } else { |
| assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); |
| if (is_copy_to_buffer) { |
| return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI : |
| V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; |
| } else { |
| assert(is_copy_from_buffer); |
| return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI : |
| V3D_OUTPUT_IMAGE_FORMAT_R8UI; |
| } |
| } |
| default: /* Color formats */ |
| return framebuffer->format->rt_type; |
| break; |
| } |
| } else { |
| return framebuffer->format->rt_type; |
| } |
| } |
| |
| static inline bool |
| format_needs_rb_swap(VkFormat format) |
| { |
| const uint8_t *swizzle = v3dv_get_format_swizzle(format); |
| return swizzle[0] == PIPE_SWIZZLE_Z; |
| } |
| |
| static void |
| get_internal_type_bpp_for_image_aspects(VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| uint32_t *internal_type, |
| uint32_t *internal_bpp) |
| { |
| const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | |
| VK_IMAGE_ASPECT_STENCIL_BIT; |
| |
| /* We can't store depth/stencil pixel formats to a raster format, so |
| * so instead we load our depth/stencil aspects to a compatible color |
| * format. |
| */ |
| /* FIXME: pre-compute this at image creation time? */ |
| if (aspect_mask & ds_aspects) { |
| switch (vk_format) { |
| case VK_FORMAT_D16_UNORM: |
| *internal_type = V3D_INTERNAL_TYPE_16UI; |
| *internal_bpp = V3D_INTERNAL_BPP_64; |
| break; |
| case VK_FORMAT_D32_SFLOAT: |
| *internal_type = V3D_INTERNAL_TYPE_32F; |
| *internal_bpp = V3D_INTERNAL_BPP_128; |
| break; |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| /* Use RGBA8 format so we can relocate the X/S bits in the appropriate |
| * place to match Vulkan expectations. See the comment on the tile |
| * load command for more details. |
| */ |
| *internal_type = V3D_INTERNAL_TYPE_8UI; |
| *internal_bpp = V3D_INTERNAL_BPP_32; |
| break; |
| default: |
| assert(!"unsupported format"); |
| break; |
| } |
| } else { |
| const struct v3dv_format *format = v3dv_get_format(vk_format); |
| v3dv_get_internal_type_bpp_for_output_format(format->rt_type, |
| internal_type, |
| internal_bpp); |
| } |
| } |
| |
| struct rcl_clear_info { |
| const union v3dv_clear_value *clear_value; |
| struct v3dv_image *image; |
| VkImageAspectFlags aspects; |
| uint32_t layer; |
| uint32_t level; |
| }; |
| |
| static struct v3dv_cl * |
| emit_rcl_prologue(struct v3dv_job *job, |
| uint32_t rt_internal_type, |
| const struct rcl_clear_info *clear_info) |
| { |
| const struct v3dv_frame_tiling *tiling = &job->frame_tiling; |
| |
| struct v3dv_cl *rcl = &job->rcl; |
| v3dv_cl_ensure_space_with_branch(rcl, 200 + |
| tiling->layers * 256 * |
| cl_packet_length(SUPERTILE_COORDINATES)); |
| if (job->cmd_buffer->state.oom) |
| return NULL; |
| |
| cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { |
| config.early_z_disable = true; |
| config.image_width_pixels = tiling->width; |
| config.image_height_pixels = tiling->height; |
| config.number_of_render_targets = 1; |
| config.multisample_mode_4x = false; |
| config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; |
| } |
| |
| if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) { |
| uint32_t clear_pad = 0; |
| if (clear_info->image) { |
| const struct v3dv_image *image = clear_info->image; |
| const struct v3d_resource_slice *slice = |
| &image->slices[clear_info->level]; |
| if (slice->tiling == VC5_TILING_UIF_NO_XOR || |
| slice->tiling == VC5_TILING_UIF_XOR) { |
| int uif_block_height = v3d_utile_height(image->cpp) * 2; |
| |
| uint32_t implicit_padded_height = |
| align(tiling->height, uif_block_height) / uif_block_height; |
| |
| if (slice->padded_height_of_output_image_in_uif_blocks - |
| implicit_padded_height >= 15) { |
| clear_pad = slice->padded_height_of_output_image_in_uif_blocks; |
| } |
| } |
| } |
| |
| const uint32_t *color = &clear_info->clear_value->color[0]; |
| cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { |
| clear.clear_color_low_32_bits = color[0]; |
| clear.clear_color_next_24_bits = color[1] & 0x00ffffff; |
| clear.render_target_number = 0; |
| }; |
| |
| if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { |
| cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { |
| clear.clear_color_mid_low_32_bits = |
| ((color[1] >> 24) | (color[2] << 8)); |
| clear.clear_color_mid_high_24_bits = |
| ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); |
| clear.render_target_number = 0; |
| }; |
| } |
| |
| if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { |
| cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { |
| clear.uif_padded_height_in_uif_blocks = clear_pad; |
| clear.clear_color_high_16_bits = color[3] >> 16; |
| clear.render_target_number = 0; |
| }; |
| } |
| } |
| |
| cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { |
| rt.render_target_0_internal_bpp = tiling->internal_bpp; |
| rt.render_target_0_internal_type = rt_internal_type; |
| rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; |
| } |
| |
| cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { |
| clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f; |
| clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0; |
| }; |
| |
| cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { |
| init.use_auto_chained_tile_lists = true; |
| init.size_of_first_block_in_chained_tile_lists = |
| TILE_ALLOCATION_BLOCK_SIZE_64B; |
| } |
| |
| return rcl; |
| } |
| |
| static void |
| emit_frame_setup(struct v3dv_job *job, |
| uint32_t layer, |
| const union v3dv_clear_value *clear_value) |
| { |
| v3dv_return_if_oom(NULL, job); |
| |
| const struct v3dv_frame_tiling *tiling = &job->frame_tiling; |
| |
| struct v3dv_cl *rcl = &job->rcl; |
| |
| const uint32_t tile_alloc_offset = |
| 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; |
| cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { |
| list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); |
| } |
| |
| cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { |
| config.number_of_bin_tile_lists = 1; |
| config.total_frame_width_in_tiles = tiling->draw_tiles_x; |
| config.total_frame_height_in_tiles = tiling->draw_tiles_y; |
| |
| config.supertile_width_in_tiles = tiling->supertile_width; |
| config.supertile_height_in_tiles = tiling->supertile_height; |
| |
| config.total_frame_width_in_supertiles = |
| tiling->frame_width_in_supertiles; |
| config.total_frame_height_in_supertiles = |
| tiling->frame_height_in_supertiles; |
| } |
| |
| /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do |
| * it here. |
| */ |
| for (int i = 0; i < 2; i++) { |
| cl_emit(rcl, TILE_COORDINATES, coords); |
| cl_emit(rcl, END_OF_LOADS, end); |
| cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { |
| store.buffer_to_store = NONE; |
| } |
| if (clear_value && i == 0) { |
| cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { |
| clear.clear_z_stencil_buffer = true; |
| clear.clear_all_render_targets = true; |
| } |
| } |
| cl_emit(rcl, END_OF_TILE_MARKER, end); |
| } |
| |
| cl_emit(rcl, FLUSH_VCD_CACHE, flush); |
| } |
| |
| static void |
| emit_supertile_coordinates(struct v3dv_job *job, |
| struct framebuffer_data *framebuffer) |
| { |
| v3dv_return_if_oom(NULL, job); |
| |
| struct v3dv_cl *rcl = &job->rcl; |
| |
| const uint32_t min_y = framebuffer->min_y_supertile; |
| const uint32_t max_y = framebuffer->max_y_supertile; |
| const uint32_t min_x = framebuffer->min_x_supertile; |
| const uint32_t max_x = framebuffer->max_x_supertile; |
| |
| for (int y = min_y; y <= max_y; y++) { |
| for (int x = min_x; x <= max_x; x++) { |
| cl_emit(rcl, SUPERTILE_COORDINATES, coords) { |
| coords.column_number_in_supertiles = x; |
| coords.row_number_in_supertiles = y; |
| } |
| } |
| } |
| } |
| |
| static void |
| emit_linear_load(struct v3dv_cl *cl, |
| uint32_t buffer, |
| struct v3dv_bo *bo, |
| uint32_t offset, |
| uint32_t stride, |
| uint32_t format) |
| { |
| cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { |
| load.buffer_to_load = buffer; |
| load.address = v3dv_cl_address(bo, offset); |
| load.input_image_format = format; |
| load.memory_format = VC5_TILING_RASTER; |
| load.height_in_ub_or_stride = stride; |
| load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; |
| } |
| } |
| |
| static void |
| emit_linear_store(struct v3dv_cl *cl, |
| uint32_t buffer, |
| struct v3dv_bo *bo, |
| uint32_t offset, |
| uint32_t stride, |
| bool msaa, |
| uint32_t format) |
| { |
| cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { |
| store.buffer_to_store = RENDER_TARGET_0; |
| store.address = v3dv_cl_address(bo, offset); |
| store.clear_buffer_being_stored = false; |
| store.output_image_format = format; |
| store.memory_format = VC5_TILING_RASTER; |
| store.height_in_ub_or_stride = stride; |
| store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES : |
| V3D_DECIMATE_MODE_SAMPLE_0; |
| } |
| } |
| |
| static void |
| emit_image_load(struct v3dv_cl *cl, |
| struct framebuffer_data *framebuffer, |
| struct v3dv_image *image, |
| VkImageAspectFlags aspect, |
| uint32_t layer, |
| uint32_t mip_level, |
| bool is_copy_to_buffer, |
| bool is_copy_from_buffer) |
| { |
| uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); |
| |
| /* For image to/from buffer copies we always load to and store from RT0, |
| * even for depth/stencil aspects, because the hardware can't do raster |
| * stores or loads from/to the depth/stencil tile buffers. |
| */ |
| bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer || |
| aspect == VK_IMAGE_ASPECT_COLOR_BIT; |
| |
| const struct v3d_resource_slice *slice = &image->slices[mip_level]; |
| cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { |
| load.buffer_to_load = load_to_color_tlb ? |
| RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect); |
| |
| load.address = v3dv_cl_address(image->mem->bo, layer_offset); |
| |
| load.input_image_format = choose_tlb_format(framebuffer, aspect, false, |
| is_copy_to_buffer, |
| is_copy_from_buffer); |
| load.memory_format = slice->tiling; |
| |
| /* When copying depth/stencil images to a buffer, for D24 formats Vulkan |
| * expects the depth value in the LSB bits of each 32-bit pixel. |
| * Unfortunately, the hardware seems to put the S8/X8 bits there and the |
| * depth bits on the MSB. To work around that we can reverse the channel |
| * order and then swap the R/B channels to get what we want. |
| * |
| * NOTE: reversing and swapping only gets us the behavior we want if the |
| * operations happen in that exact order, which seems to be the case when |
| * done on the tile buffer load operations. On the store, it seems the |
| * order is not the same. The order on the store is probably reversed so |
| * that reversing and swapping on both the load and the store preserves |
| * the original order of the channels in memory. |
| * |
| * Notice that we only need to do this when copying to a buffer, where |
| * depth and stencil aspects are copied as separate regions and |
| * the spec expects them to be tightly packed. |
| */ |
| bool needs_rb_swap = false; |
| bool needs_chan_reverse = false; |
| if (is_copy_to_buffer && |
| (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || |
| (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && |
| (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { |
| needs_rb_swap = true; |
| needs_chan_reverse = true; |
| } else if (!is_copy_from_buffer && !is_copy_to_buffer && |
| (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { |
| /* This is not a raw data copy (i.e. we are clearing the image), |
| * so we need to make sure we respect the format swizzle. |
| */ |
| needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format); |
| } |
| |
| load.r_b_swap = needs_rb_swap; |
| load.channel_reverse = needs_chan_reverse; |
| |
| if (slice->tiling == VC5_TILING_UIF_NO_XOR || |
| slice->tiling == VC5_TILING_UIF_XOR) { |
| load.height_in_ub_or_stride = |
| slice->padded_height_of_output_image_in_uif_blocks; |
| } else if (slice->tiling == VC5_TILING_RASTER) { |
| load.height_in_ub_or_stride = slice->stride; |
| } |
| |
| if (image->samples > VK_SAMPLE_COUNT_1_BIT) |
| load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; |
| else |
| load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; |
| } |
| } |
| |
| static void |
| emit_image_store(struct v3dv_cl *cl, |
| struct framebuffer_data *framebuffer, |
| struct v3dv_image *image, |
| VkImageAspectFlags aspect, |
| uint32_t layer, |
| uint32_t mip_level, |
| bool is_copy_to_buffer, |
| bool is_copy_from_buffer) |
| { |
| uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); |
| |
| bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer || |
| aspect == VK_IMAGE_ASPECT_COLOR_BIT; |
| |
| const struct v3d_resource_slice *slice = &image->slices[mip_level]; |
| cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { |
| store.buffer_to_store = store_from_color_tlb ? |
| RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect); |
| |
| store.address = v3dv_cl_address(image->mem->bo, layer_offset); |
| store.clear_buffer_being_stored = false; |
| |
| /* See rationale in emit_image_load() */ |
| bool needs_rb_swap = false; |
| bool needs_chan_reverse = false; |
| if (is_copy_from_buffer && |
| (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || |
| (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && |
| (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { |
| needs_rb_swap = true; |
| needs_chan_reverse = true; |
| } else if (!is_copy_from_buffer && !is_copy_to_buffer && |
| (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { |
| needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format); |
| } |
| |
| store.r_b_swap = needs_rb_swap; |
| store.channel_reverse = needs_chan_reverse; |
| |
| store.output_image_format = choose_tlb_format(framebuffer, aspect, true, |
| is_copy_to_buffer, |
| is_copy_from_buffer); |
| store.memory_format = slice->tiling; |
| if (slice->tiling == VC5_TILING_UIF_NO_XOR || |
| slice->tiling == VC5_TILING_UIF_XOR) { |
| store.height_in_ub_or_stride = |
| slice->padded_height_of_output_image_in_uif_blocks; |
| } else if (slice->tiling == VC5_TILING_RASTER) { |
| store.height_in_ub_or_stride = slice->stride; |
| } |
| |
| if (image->samples > VK_SAMPLE_COUNT_1_BIT) |
| store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; |
| else |
| store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; |
| } |
| } |
| |
| static void |
| emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, |
| struct framebuffer_data *framebuffer, |
| struct v3dv_buffer *buffer, |
| struct v3dv_image *image, |
| uint32_t layer, |
| const VkBufferImageCopy *region) |
| { |
| struct v3dv_cl *cl = &job->indirect; |
| v3dv_cl_ensure_space(cl, 200, 1); |
| v3dv_return_if_oom(NULL, job); |
| |
| struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); |
| |
| cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); |
| |
| const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource; |
| assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) || |
| layer < image->extent.depth); |
| |
| /* Load image to TLB */ |
| emit_image_load(cl, framebuffer, image, imgrsc->aspectMask, |
| imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, |
| true, false); |
| |
| cl_emit(cl, END_OF_LOADS, end); |
| |
| cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); |
| |
| /* Store TLB to buffer */ |
| uint32_t width, height; |
| if (region->bufferRowLength == 0) |
| width = region->imageExtent.width; |
| else |
| width = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| height = region->imageExtent.height; |
| else |
| height = region->bufferImageHeight; |
| |
| /* Handle copy from compressed format */ |
| width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format)); |
| height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format)); |
| |
| /* If we are storing stencil from a combined depth/stencil format the |
| * Vulkan spec states that the output buffer must have packed stencil |
| * values, where each stencil value is 1 byte. |
| */ |
| uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? |
| 1 : image->cpp; |
| uint32_t buffer_stride = width * cpp; |
| uint32_t buffer_offset = |
| buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; |
| |
| uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask, |
| true, true, false); |
| bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT; |
| |
| emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo, |
| buffer_offset, buffer_stride, msaa, format); |
| |
| cl_emit(cl, END_OF_TILE_MARKER, end); |
| |
| cl_emit(cl, RETURN_FROM_SUB_LIST, ret); |
| |
| cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { |
| branch.start = tile_list_start; |
| branch.end = v3dv_cl_get_address(cl); |
| } |
| } |
| |
| static void |
| emit_copy_layer_to_buffer(struct v3dv_job *job, |
| struct v3dv_buffer *buffer, |
| struct v3dv_image *image, |
| struct framebuffer_data *framebuffer, |
| uint32_t layer, |
| const VkBufferImageCopy *region) |
| { |
| emit_frame_setup(job, layer, NULL); |
| emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer, |
| image, layer, region); |
| emit_supertile_coordinates(job, framebuffer); |
| } |
| |
| static void |
| emit_copy_image_to_buffer_rcl(struct v3dv_job *job, |
| struct v3dv_buffer *buffer, |
| struct v3dv_image *image, |
| struct framebuffer_data *framebuffer, |
| const VkBufferImageCopy *region) |
| { |
| struct v3dv_cl *rcl = |
| emit_rcl_prologue(job, framebuffer->internal_type, NULL); |
| v3dv_return_if_oom(NULL, job); |
| |
| for (int layer = 0; layer < job->frame_tiling.layers; layer++) |
| emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region); |
| cl_emit(rcl, END_OF_RENDERING, end); |
| } |
| |
| /* Implements a copy using the TLB. |
| * |
| * This only works if we are copying from offset (0,0), since a TLB store for |
| * tile (x,y) will be written at the same tile offset into the destination. |
| * When this requirement is not met, we need to use a blit instead. |
| * |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| * |
| */ |
| static bool |
| copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_buffer *buffer, |
| struct v3dv_image *image, |
| const VkBufferImageCopy *region) |
| { |
| VkFormat fb_format; |
| if (!can_use_tlb(image, ®ion->imageOffset, &fb_format)) |
| return false; |
| |
| uint32_t internal_type, internal_bpp; |
| get_internal_type_bpp_for_image_aspects(fb_format, |
| region->imageSubresource.aspectMask, |
| &internal_type, &internal_bpp); |
| |
| uint32_t num_layers; |
| if (image->type != VK_IMAGE_TYPE_3D) |
| num_layers = region->imageSubresource.layerCount; |
| else |
| num_layers = region->imageExtent.depth; |
| assert(num_layers > 0); |
| |
| struct v3dv_job *job = |
| v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); |
| if (!job) |
| return true; |
| |
| /* Handle copy from compressed format using a compatible format */ |
| const uint32_t block_w = vk_format_get_blockwidth(image->vk_format); |
| const uint32_t block_h = vk_format_get_blockheight(image->vk_format); |
| const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); |
| const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); |
| |
| v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp); |
| |
| struct framebuffer_data framebuffer; |
| setup_framebuffer_data(&framebuffer, fb_format, internal_type, |
| &job->frame_tiling); |
| |
| v3dv_job_emit_binning_flush(job); |
| emit_copy_image_to_buffer_rcl(job, buffer, image, &framebuffer, region); |
| |
| v3dv_cmd_buffer_finish_job(cmd_buffer); |
| |
| return true; |
| } |
| |
| static bool |
| blit_shader(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *dst, |
| VkFormat dst_format, |
| struct v3dv_image *src, |
| VkFormat src_format, |
| VkColorComponentFlags cmask, |
| VkComponentMapping *cswizzle, |
| const VkImageBlit *region, |
| VkFilter filter); |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_buffer *buffer, |
| struct v3dv_image *image, |
| const VkBufferImageCopy *region) |
| { |
| bool handled = false; |
| |
| /* Generally, the bpp of the data in the buffer matches that of the |
| * source image. The exception is the case where we are copying |
| * stencil (8bpp) to a combined d24s8 image (32bpp). |
| */ |
| uint32_t buffer_bpp = image->cpp; |
| |
| VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask; |
| |
| /* Because we are going to implement the copy as a blit, we need to create |
| * a linear image from the destination buffer and we also want our blit |
| * source and destination formats to be the same (to avoid any format |
| * conversions), so we choose a canonical format that matches the |
| * source image bpp. |
| * |
| * The exception to the above is copying from combined depth/stencil images |
| * because we are copying only one aspect of the image, so we need to setup |
| * our formats, color write mask and source swizzle mask to match that. |
| */ |
| VkFormat dst_format; |
| VkFormat src_format; |
| VkColorComponentFlags cmask = 0; /* All components */ |
| VkComponentMapping cswizzle = { |
| .r = VK_COMPONENT_SWIZZLE_IDENTITY, |
| .g = VK_COMPONENT_SWIZZLE_IDENTITY, |
| .b = VK_COMPONENT_SWIZZLE_IDENTITY, |
| .a = VK_COMPONENT_SWIZZLE_IDENTITY, |
| }; |
| switch (buffer_bpp) { |
| case 16: |
| assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); |
| dst_format = VK_FORMAT_R32G32B32A32_UINT; |
| src_format = dst_format; |
| break; |
| case 8: |
| assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); |
| dst_format = VK_FORMAT_R16G16B16A16_UINT; |
| src_format = dst_format; |
| break; |
| case 4: |
| switch (copy_aspect) { |
| case VK_IMAGE_ASPECT_COLOR_BIT: |
| src_format = VK_FORMAT_R8G8B8A8_UINT; |
| dst_format = VK_FORMAT_R8G8B8A8_UINT; |
| break; |
| case VK_IMAGE_ASPECT_DEPTH_BIT: |
| assert(image->vk_format == VK_FORMAT_D32_SFLOAT || |
| image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT || |
| image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32); |
| if (image->vk_format == VK_FORMAT_D32_SFLOAT) { |
| src_format = VK_FORMAT_R32_UINT; |
| dst_format = VK_FORMAT_R32_UINT; |
| } else { |
| /* We want to write depth in the buffer in the first 24-bits, |
| * however, the hardware has depth in bits 8-31, so swizzle the |
| * the source components to match what we want. Also, we don't |
| * want to write bits 24-31 in the destination. |
| */ |
| src_format = VK_FORMAT_R8G8B8A8_UINT; |
| dst_format = VK_FORMAT_R8G8B8A8_UINT; |
| cmask = VK_COLOR_COMPONENT_R_BIT | |
| VK_COLOR_COMPONENT_G_BIT | |
| VK_COLOR_COMPONENT_B_BIT; |
| cswizzle.r = VK_COMPONENT_SWIZZLE_G; |
| cswizzle.g = VK_COMPONENT_SWIZZLE_B; |
| cswizzle.b = VK_COMPONENT_SWIZZLE_A; |
| cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO; |
| } |
| break; |
| case VK_IMAGE_ASPECT_STENCIL_BIT: |
| assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT); |
| assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT); |
| /* Copying from S8D24. We want to write 8-bit stencil values only, |
| * so adjust the buffer bpp for that. Since the hardware stores stencil |
| * in the LSB, we can just do a RGBA8UI to R8UI blit. |
| */ |
| src_format = VK_FORMAT_R8G8B8A8_UINT; |
| dst_format = VK_FORMAT_R8_UINT; |
| buffer_bpp = 1; |
| break; |
| default: |
| unreachable("unsupported aspect"); |
| return handled; |
| }; |
| break; |
| case 2: |
| assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT || |
| copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT); |
| dst_format = VK_FORMAT_R16_UINT; |
| src_format = dst_format; |
| break; |
| case 1: |
| assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); |
| dst_format = VK_FORMAT_R8_UINT; |
| src_format = dst_format; |
| break; |
| default: |
| unreachable("unsupported bit-size"); |
| return handled; |
| }; |
| |
| /* The hardware doesn't support linear depth/stencil stores, so we |
| * implement copies of depth/stencil aspect as color copies using a |
| * compatible color format. |
| */ |
| assert(vk_format_is_color(src_format)); |
| assert(vk_format_is_color(dst_format)); |
| copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT; |
| |
| /* We should be able to handle the blit if we got this far */ |
| handled = true; |
| |
| /* Obtain the 2D buffer region spec */ |
| uint32_t buf_width, buf_height; |
| if (region->bufferRowLength == 0) |
| buf_width = region->imageExtent.width; |
| else |
| buf_width = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| buf_height = region->imageExtent.height; |
| else |
| buf_height = region->bufferImageHeight; |
| |
| /* Compute layers to copy */ |
| uint32_t num_layers; |
| if (image->type != VK_IMAGE_TYPE_3D) |
| num_layers = region->imageSubresource.layerCount; |
| else |
| num_layers = region->imageExtent.depth; |
| assert(num_layers > 0); |
| |
| /* Copy requested layers */ |
| struct v3dv_device *device = cmd_buffer->device; |
| VkDevice _device = v3dv_device_to_handle(device); |
| for (uint32_t i = 0; i < num_layers; i++) { |
| /* Create the destination blit image from the destination buffer */ |
| VkImageCreateInfo image_info = { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| .imageType = VK_IMAGE_TYPE_2D, |
| .format = dst_format, |
| .extent = { buf_width, buf_height, 1 }, |
| .mipLevels = 1, |
| .arrayLayers = 1, |
| .samples = VK_SAMPLE_COUNT_1_BIT, |
| .tiling = VK_IMAGE_TILING_LINEAR, |
| .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, |
| .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| .queueFamilyIndexCount = 0, |
| .initialLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }; |
| |
| VkImage buffer_image; |
| VkResult result = |
| v3dv_CreateImage(_device, &image_info, &device->alloc, &buffer_image); |
| if (result != VK_SUCCESS) |
| return handled; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uintptr_t)buffer_image, |
| (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage); |
| |
| /* Bind the buffer memory to the image */ |
| VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset + |
| i * buf_width * buf_height * buffer_bpp; |
| result = v3dv_BindImageMemory(_device, buffer_image, |
| v3dv_device_memory_to_handle(buffer->mem), |
| buffer_offset); |
| if (result != VK_SUCCESS) |
| return handled; |
| |
| /* Blit-copy the requested image extent. |
| * |
| * Since we are copying, the blit must use the same format on the |
| * destination and source images to avoid format conversions. The |
| * only exception is copying stencil, which we upload to a R8UI source |
| * image, but that we need to blit to a S8D24 destination (the only |
| * stencil format we support). |
| */ |
| const VkImageBlit blit_region = { |
| .srcSubresource = { |
| .aspectMask = copy_aspect, |
| .mipLevel = region->imageSubresource.mipLevel, |
| .baseArrayLayer = region->imageSubresource.baseArrayLayer + i, |
| .layerCount = 1, |
| }, |
| .srcOffsets = { |
| { |
| region->imageOffset.x, |
| region->imageOffset.y, |
| region->imageOffset.z + i, |
| }, |
| { |
| region->imageOffset.x + region->imageExtent.width, |
| region->imageOffset.y + region->imageExtent.height, |
| region->imageOffset.z + i + 1, |
| }, |
| }, |
| .dstSubresource = { |
| .aspectMask = copy_aspect, |
| .mipLevel = 0, |
| .baseArrayLayer = 0, |
| .layerCount = 1, |
| }, |
| .dstOffsets = { |
| { 0, 0, 0 }, |
| { region->imageExtent.width, region->imageExtent.height, 1 }, |
| }, |
| }; |
| |
| handled = blit_shader(cmd_buffer, |
| v3dv_image_from_handle(buffer_image), dst_format, |
| image, src_format, |
| cmask, &cswizzle, |
| &blit_region, VK_FILTER_NEAREST); |
| if (!handled) { |
| /* This is unexpected, we should have a supported blit spec */ |
| unreachable("Unable to blit buffer to destination image"); |
| return false; |
| } |
| } |
| |
| assert(handled); |
| return true; |
| } |
| |
| static VkFormat |
| get_compatible_tlb_format(VkFormat format) |
| { |
| switch (format) { |
| case VK_FORMAT_R8G8B8A8_SNORM: |
| return VK_FORMAT_R8G8B8A8_UINT; |
| |
| case VK_FORMAT_R8G8_SNORM: |
| return VK_FORMAT_R8G8_UINT; |
| |
| case VK_FORMAT_R8_SNORM: |
| return VK_FORMAT_R8_UINT; |
| |
| case VK_FORMAT_A8B8G8R8_SNORM_PACK32: |
| return VK_FORMAT_A8B8G8R8_UINT_PACK32; |
| |
| case VK_FORMAT_R16_UNORM: |
| case VK_FORMAT_R16_SNORM: |
| return VK_FORMAT_R16_UINT; |
| |
| case VK_FORMAT_R16G16_UNORM: |
| case VK_FORMAT_R16G16_SNORM: |
| return VK_FORMAT_R16G16_UINT; |
| |
| case VK_FORMAT_R16G16B16A16_UNORM: |
| case VK_FORMAT_R16G16B16A16_SNORM: |
| return VK_FORMAT_R16G16B16A16_UINT; |
| |
| case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
| return VK_FORMAT_R32_SFLOAT; |
| |
| /* We can't render to compressed formats using the TLB so instead we use |
| * a compatible format with the same bpp as the compressed format. Because |
| * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the |
| * case of ETC), when we implement copies with the compatible format we |
| * will have to divide offsets and dimensions on the compressed image by |
| * the compressed block size. |
| */ |
| case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: |
| case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: |
| case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: |
| return VK_FORMAT_R32G32B32A32_UINT; |
| |
| case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: |
| case VK_FORMAT_EAC_R11_UNORM_BLOCK: |
| case VK_FORMAT_EAC_R11_SNORM_BLOCK: |
| return VK_FORMAT_R16G16B16A16_UINT; |
| |
| default: |
| return VK_FORMAT_UNDEFINED; |
| } |
| } |
| |
| static inline bool |
| can_use_tlb(struct v3dv_image *image, |
| const VkOffset3D *offset, |
| VkFormat *compat_format) |
| { |
| if (offset->x != 0 || offset->y != 0) |
| return false; |
| |
| if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) { |
| if (compat_format) |
| *compat_format = image->vk_format; |
| return true; |
| } |
| |
| /* If the image format is not TLB-supported, then check if we can use |
| * a compatible format instead. |
| */ |
| if (compat_format) { |
| *compat_format = get_compatible_tlb_format(image->vk_format); |
| if (*compat_format != VK_FORMAT_UNDEFINED) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void |
| v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkBuffer destBuffer, |
| uint32_t regionCount, |
| const VkBufferImageCopy *pRegions) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_image, image, srcImage); |
| V3DV_FROM_HANDLE(v3dv_buffer, buffer, destBuffer); |
| |
| for (uint32_t i = 0; i < regionCount; i++) { |
| if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &pRegions[i])) |
| continue; |
| if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &pRegions[i])) |
| continue; |
| unreachable("Unsupported image to buffer copy."); |
| } |
| } |
| |
| static void |
| emit_copy_image_layer_per_tile_list(struct v3dv_job *job, |
| struct framebuffer_data *framebuffer, |
| struct v3dv_image *dst, |
| struct v3dv_image *src, |
| uint32_t layer, |
| const VkImageCopy *region) |
| { |
| struct v3dv_cl *cl = &job->indirect; |
| v3dv_cl_ensure_space(cl, 200, 1); |
| v3dv_return_if_oom(NULL, job); |
| |
| struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); |
| |
| cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); |
| |
| const VkImageSubresourceLayers *srcrsc = ®ion->srcSubresource; |
| assert((src->type != VK_IMAGE_TYPE_3D && layer < srcrsc->layerCount) || |
| layer < src->extent.depth); |
| |
| emit_image_load(cl, framebuffer, src, srcrsc->aspectMask, |
| srcrsc->baseArrayLayer + layer, srcrsc->mipLevel, |
| false, false); |
| |
| cl_emit(cl, END_OF_LOADS, end); |
| |
| cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); |
| |
| const VkImageSubresourceLayers *dstrsc = ®ion->dstSubresource; |
| assert((dst->type != VK_IMAGE_TYPE_3D && layer < dstrsc->layerCount) || |
| layer < dst->extent.depth); |
| |
| emit_image_store(cl, framebuffer, dst, dstrsc->aspectMask, |
| dstrsc->baseArrayLayer + layer, dstrsc->mipLevel, |
| false, false); |
| |
| cl_emit(cl, END_OF_TILE_MARKER, end); |
| |
| cl_emit(cl, RETURN_FROM_SUB_LIST, ret); |
| |
| cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { |
| branch.start = tile_list_start; |
| branch.end = v3dv_cl_get_address(cl); |
| } |
| } |
| |
| static void |
| emit_copy_image_layer(struct v3dv_job *job, |
| struct v3dv_image *dst, |
| struct v3dv_image *src, |
| struct framebuffer_data *framebuffer, |
| uint32_t layer, |
| const VkImageCopy *region) |
| { |
| emit_frame_setup(job, layer, NULL); |
| emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region); |
| emit_supertile_coordinates(job, framebuffer); |
| } |
| |
| static void |
| emit_copy_image_rcl(struct v3dv_job *job, |
| struct v3dv_image *dst, |
| struct v3dv_image *src, |
| struct framebuffer_data *framebuffer, |
| const VkImageCopy *region) |
| { |
| struct v3dv_cl *rcl = |
| emit_rcl_prologue(job, framebuffer->internal_type, NULL); |
| v3dv_return_if_oom(NULL, job); |
| |
| for (int layer = 0; layer < job->frame_tiling.layers; layer++) |
| emit_copy_image_layer(job, dst, src, framebuffer, layer, region); |
| cl_emit(rcl, END_OF_RENDERING, end); |
| } |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *dst, |
| struct v3dv_image *src, |
| const VkImageCopy *region) |
| { |
| VkFormat fb_format; |
| if (!can_use_tlb(src, ®ion->srcOffset, &fb_format) || |
| !can_use_tlb(dst, ®ion->dstOffset, &fb_format)) { |
| return false; |
| } |
| |
| /* From the Vulkan spec, VkImageCopy valid usage: |
| * |
| * "If neither the calling command’s srcImage nor the calling command’s |
| * dstImage has a multi-planar image format then the aspectMask member |
| * of srcSubresource and dstSubresource must match." |
| */ |
| assert(region->dstSubresource.aspectMask == |
| region->srcSubresource.aspectMask); |
| uint32_t internal_type, internal_bpp; |
| get_internal_type_bpp_for_image_aspects(fb_format, |
| region->dstSubresource.aspectMask, |
| &internal_type, &internal_bpp); |
| |
| /* From the Vulkan spec, VkImageCopy valid usage: |
| * |
| * "The layerCount member of srcSubresource and dstSubresource must match" |
| */ |
| assert(region->srcSubresource.layerCount == |
| region->dstSubresource.layerCount); |
| uint32_t num_layers; |
| if (dst->type != VK_IMAGE_TYPE_3D) |
| num_layers = region->dstSubresource.layerCount; |
| else |
| num_layers = region->extent.depth; |
| assert(num_layers > 0); |
| |
| struct v3dv_job *job = |
| v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); |
| if (!job) |
| return true; |
| |
| /* Handle copy to compressed image using compatible format */ |
| const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format); |
| const uint32_t block_h = vk_format_get_blockheight(dst->vk_format); |
| const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); |
| const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); |
| |
| v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp); |
| |
| struct framebuffer_data framebuffer; |
| setup_framebuffer_data(&framebuffer, fb_format, internal_type, |
| &job->frame_tiling); |
| |
| v3dv_job_emit_binning_flush(job); |
| emit_copy_image_rcl(job, dst, src, &framebuffer, region); |
| |
| v3dv_cmd_buffer_finish_job(cmd_buffer); |
| |
| return true; |
| } |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *dst, |
| struct v3dv_image *src, |
| const VkImageCopy *region) |
| { |
| /* We need to choose a single format for the blit to ensure that this is |
| * really a copy and there are not format conversions going on. Since we |
| * going to blit, we need to make sure that the selected format can be |
| * both rendered to and textured from. |
| */ |
| VkFormat format; |
| uint32_t divisor = 1; |
| if (vk_format_is_compressed(src->vk_format)) { |
| /* If we are copying from a compressed format we should be aware that we |
| * are going to texture from the source image, and the texture setup |
| * knows the actual size of the image, so we need to choose a format |
| * that has a per-texel (not per-block) bpp that is compatible for that |
| * image size. For example, for a source image with size Bw*WxBh*H image |
| * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI, |
| * each of the Bw*WxBh*H texels in the compressed source image is 8-bit |
| * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed), |
| * so we specify a blit with size Bw*WxBh*H and we choose a format with |
| * a bpp of 8-bit per texel (R8_UINT). |
| * |
| * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM we |
| * would need a 4-bit format, which we don't have, so instead we still |
| * choose an 8-bit format, but we apply a divisor to the row dimensions |
| * of the blit, since we are copying two texels per item. |
| */ |
| format = VK_FORMAT_R8_UINT; |
| switch (src->cpp) { |
| case 16: |
| break; |
| case 8: |
| divisor = 2; |
| break; |
| default: |
| unreachable("Unsupported compressed format"); |
| } |
| } else { |
| format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ? |
| src->vk_format : get_compatible_tlb_format(src->vk_format); |
| if (format == VK_FORMAT_UNDEFINED) |
| return false; |
| |
| const struct v3dv_format *f = v3dv_get_format(format); |
| if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO) |
| return false; |
| } |
| |
| /* Given an uncompressed image with size WxH, if we copy it to a compressed |
| * image, it will result in an image with size W*bWxH*bH, where bW and bH |
| * are the compressed format's block width and height. This means that |
| * copies between compressed and uncompressed images involve different |
| * image sizes, and therefore, we need to take that into account when |
| * setting up the source and destination blit regions below, so they are |
| * consistent from the point of view of the single compatible format |
| * selected for the copy. |
| * |
| * We should take into account that the dimensions of the region provided |
| * to the copy command are specified in terms of the source image. With that |
| * in mind, below we adjust the blit destination region to be consistent with |
| * the source region for the compatible format, so basically, we apply |
| * the block size factor to the destination offset provided by the copy |
| * command (because it is specified in terms of the destination image, not |
| * the source), and then we just add the region copy dimensions to that |
| * (since the region dimensions are already specified in terms of the source |
| * image). |
| */ |
| const VkOffset3D src_start = { |
| region->srcOffset.x / divisor, |
| region->srcOffset.y, |
| region->srcOffset.z, |
| }; |
| const VkOffset3D src_end = { |
| src_start.x + region->extent.width / divisor, |
| src_start.y + region->extent.height, |
| src_start.z + region->extent.depth, |
| }; |
| |
| const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format); |
| const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format); |
| const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format); |
| const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format); |
| const VkOffset3D dst_start = { |
| DIV_ROUND_UP(region->dstOffset.x * src_block_w, dst_block_w) / divisor, |
| DIV_ROUND_UP(region->dstOffset.y * src_block_h, dst_block_h), |
| region->dstOffset.z, |
| }; |
| const VkOffset3D dst_end = { |
| dst_start.x + region->extent.width / divisor, |
| dst_start.y + region->extent.height, |
| dst_start.z + region->extent.depth, |
| }; |
| |
| const VkImageBlit blit_region = { |
| .srcSubresource = region->srcSubresource, |
| .srcOffsets = { src_start, src_end }, |
| .dstSubresource = region->dstSubresource, |
| .dstOffsets = { dst_start, dst_end }, |
| }; |
| bool handled = blit_shader(cmd_buffer, |
| dst, format, |
| src, format, |
| 0, NULL, |
| &blit_region, VK_FILTER_NEAREST); |
| |
| /* We should have selected formats that we can blit */ |
| assert(handled); |
| return handled; |
| } |
| |
| void |
| v3dv_CmdCopyImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageCopy *pRegions) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_image, src, srcImage); |
| V3DV_FROM_HANDLE(v3dv_image, dst, dstImage); |
| |
| for (uint32_t i = 0; i < regionCount; i++) { |
| if (copy_image_tlb(cmd_buffer, dst, src, &pRegions[i])) |
| continue; |
| if (copy_image_blit(cmd_buffer, dst, src, &pRegions[i])) |
| continue; |
| unreachable("Image copy not supported"); |
| } |
| } |
| |
| static void |
| emit_clear_image_per_tile_list(struct v3dv_job *job, |
| struct framebuffer_data *framebuffer, |
| struct v3dv_image *image, |
| VkImageAspectFlags aspects, |
| uint32_t layer, |
| uint32_t level) |
| { |
| struct v3dv_cl *cl = &job->indirect; |
| v3dv_cl_ensure_space(cl, 200, 1); |
| v3dv_return_if_oom(NULL, job); |
| |
| struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); |
| |
| cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); |
| |
| cl_emit(cl, END_OF_LOADS, end); |
| |
| cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); |
| |
| emit_image_store(cl, framebuffer, image, aspects, layer, level, false, false); |
| |
| cl_emit(cl, END_OF_TILE_MARKER, end); |
| |
| cl_emit(cl, RETURN_FROM_SUB_LIST, ret); |
| |
| cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { |
| branch.start = tile_list_start; |
| branch.end = v3dv_cl_get_address(cl); |
| } |
| } |
| |
| static void |
| emit_clear_image(struct v3dv_job *job, |
| struct v3dv_image *image, |
| struct framebuffer_data *framebuffer, |
| VkImageAspectFlags aspects, |
| uint32_t layer, |
| uint32_t level) |
| { |
| emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level); |
| emit_supertile_coordinates(job, framebuffer); |
| } |
| |
| static void |
| emit_clear_image_rcl(struct v3dv_job *job, |
| struct v3dv_image *image, |
| struct framebuffer_data *framebuffer, |
| const union v3dv_clear_value *clear_value, |
| VkImageAspectFlags aspects, |
| uint32_t layer, |
| uint32_t level) |
| { |
| const struct rcl_clear_info clear_info = { |
| .clear_value = clear_value, |
| .image = image, |
| .aspects = aspects, |
| .layer = layer, |
| .level = level, |
| }; |
| |
| struct v3dv_cl *rcl = |
| emit_rcl_prologue(job, framebuffer->internal_type, &clear_info); |
| v3dv_return_if_oom(NULL, job); |
| |
| emit_frame_setup(job, 0, clear_value); |
| emit_clear_image(job, image, framebuffer, aspects, layer, level); |
| cl_emit(rcl, END_OF_RENDERING, end); |
| } |
| |
| static void |
| get_hw_clear_color(const VkClearColorValue *color, |
| VkFormat fb_format, |
| VkFormat image_format, |
| uint32_t internal_type, |
| uint32_t internal_bpp, |
| uint32_t *hw_color) |
| { |
| const uint32_t internal_size = 4 << internal_bpp; |
| |
| /* If the image format doesn't match the framebuffer format, then we are |
| * trying to clear an unsupported tlb format using a compatible |
| * format for the framebuffer. In this case, we want to make sure that |
| * we pack the clear value according to the original format semantics, |
| * not the compatible format. |
| */ |
| if (fb_format == image_format) { |
| v3dv_get_hw_clear_color(color, internal_type, internal_size, hw_color); |
| } else { |
| union util_color uc; |
| enum pipe_format pipe_image_format = |
| vk_format_to_pipe_format(image_format); |
| util_pack_color(color->float32, pipe_image_format, &uc); |
| memcpy(hw_color, uc.ui, internal_size); |
| } |
| } |
| |
| /* Returns true if the implementation is able to handle the case, false |
| * otherwise. |
| */ |
| static bool |
| clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *image, |
| const VkClearValue *clear_value, |
| const VkImageSubresourceRange *range) |
| { |
| const VkOffset3D origin = { 0, 0, 0 }; |
| VkFormat fb_format; |
| if (!can_use_tlb(image, &origin, &fb_format)) |
| return false; |
| |
| uint32_t internal_type, internal_bpp; |
| get_internal_type_bpp_for_image_aspects(fb_format, range->aspectMask, |
| &internal_type, &internal_bpp); |
| |
| union v3dv_clear_value hw_clear_value = { 0 }; |
| if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { |
| get_hw_clear_color(&clear_value->color, fb_format, image->vk_format, |
| internal_type, internal_bpp, &hw_clear_value.color[0]); |
| } else { |
| assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) || |
| (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)); |
| hw_clear_value.z = clear_value->depthStencil.depth; |
| hw_clear_value.s = clear_value->depthStencil.stencil; |
| } |
| |
| uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ? |
| image->levels - range->baseMipLevel : |
| range->levelCount; |
| uint32_t min_level = range->baseMipLevel; |
| uint32_t max_level = range->baseMipLevel + level_count; |
| |
| /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively. |
| * Instead, we need to consider the full depth dimension of the image, which |
| * goes from 0 up to the level's depth extent. |
| */ |
| uint32_t min_layer; |
| uint32_t max_layer; |
| if (image->type != VK_IMAGE_TYPE_3D) { |
| uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ? |
| image->array_size - range->baseArrayLayer : |
| range->layerCount; |
| min_layer = range->baseArrayLayer; |
| max_layer = range->baseArrayLayer + layer_count; |
| } else { |
| min_layer = 0; |
| max_layer = 0; |
| } |
| |
| for (uint32_t level = min_level; level < max_level; level++) { |
| if (image->type == VK_IMAGE_TYPE_3D) |
| max_layer = u_minify(image->extent.depth, level); |
| for (uint32_t layer = min_layer; layer < max_layer; layer++) { |
| uint32_t width = u_minify(image->extent.width, level); |
| uint32_t height = u_minify(image->extent.height, level); |
| |
| struct v3dv_job *job = |
| v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); |
| |
| if (!job) |
| return true; |
| |
| /* We start a a new job for each layer so the frame "depth" is 1 */ |
| v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp); |
| |
| struct framebuffer_data framebuffer; |
| setup_framebuffer_data(&framebuffer, fb_format, internal_type, |
| &job->frame_tiling); |
| |
| v3dv_job_emit_binning_flush(job); |
| |
| /* If this triggers it is an application bug: the spec requires |
| * that any aspects to clear are present in the image. |
| */ |
| assert(range->aspectMask & image->aspects); |
| |
| emit_clear_image_rcl(job, image, &framebuffer, &hw_clear_value, |
| range->aspectMask, layer, level); |
| |
| v3dv_cmd_buffer_finish_job(cmd_buffer); |
| } |
| } |
| |
| return true; |
| } |
| |
| void |
| v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer, |
| VkImage _image, |
| VkImageLayout imageLayout, |
| const VkClearColorValue *pColor, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_image, image, _image); |
| |
| const VkClearValue clear_value = { |
| .color = *pColor, |
| }; |
| |
| for (uint32_t i = 0; i < rangeCount; i++) { |
| if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i])) |
| continue; |
| unreachable("Unsupported color clear."); |
| } |
| } |
| |
| void |
| v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, |
| VkImage _image, |
| VkImageLayout imageLayout, |
| const VkClearDepthStencilValue *pDepthStencil, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_image, image, _image); |
| |
| const VkClearValue clear_value = { |
| .depthStencil = *pDepthStencil, |
| }; |
| |
| for (uint32_t i = 0; i < rangeCount; i++) { |
| if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i])) |
| continue; |
| unreachable("Unsupported depth/stencil clear."); |
| } |
| } |
| |
| static void |
| emit_copy_buffer_per_tile_list(struct v3dv_job *job, |
| struct v3dv_bo *dst, |
| struct v3dv_bo *src, |
| uint32_t dst_offset, |
| uint32_t src_offset, |
| uint32_t stride, |
| uint32_t format) |
| { |
| struct v3dv_cl *cl = &job->indirect; |
| v3dv_cl_ensure_space(cl, 200, 1); |
| v3dv_return_if_oom(NULL, job); |
| |
| struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); |
| |
| cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); |
| |
| emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format); |
| |
| cl_emit(cl, END_OF_LOADS, end); |
| |
| cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); |
| |
| emit_linear_store(cl, RENDER_TARGET_0, |
| dst, dst_offset, stride, false, format); |
| |
| cl_emit(cl, END_OF_TILE_MARKER, end); |
| |
| cl_emit(cl, RETURN_FROM_SUB_LIST, ret); |
| |
| cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { |
| branch.start = tile_list_start; |
| branch.end = v3dv_cl_get_address(cl); |
| } |
| } |
| |
| static void |
| emit_copy_buffer(struct v3dv_job *job, |
| struct v3dv_bo *dst, |
| struct v3dv_bo *src, |
| uint32_t dst_offset, |
| uint32_t src_offset, |
| struct framebuffer_data *framebuffer, |
| uint32_t format) |
| { |
| const uint32_t stride = job->frame_tiling.width * 4; |
| emit_copy_buffer_per_tile_list(job, dst, src, |
| dst_offset, src_offset, |
| stride, format); |
| emit_supertile_coordinates(job, framebuffer); |
| } |
| |
| static void |
| emit_copy_buffer_rcl(struct v3dv_job *job, |
| struct v3dv_bo *dst, |
| struct v3dv_bo *src, |
| uint32_t dst_offset, |
| uint32_t src_offset, |
| struct framebuffer_data *framebuffer, |
| uint32_t format) |
| { |
| struct v3dv_cl *rcl = |
| emit_rcl_prologue(job, framebuffer->internal_type, NULL); |
| v3dv_return_if_oom(NULL, job); |
| |
| emit_frame_setup(job, 0, NULL); |
| emit_copy_buffer(job, dst, src, dst_offset, src_offset, framebuffer, format); |
| cl_emit(rcl, END_OF_RENDERING, end); |
| } |
| |
| /* Figure out a TLB size configuration for a number of pixels to process. |
| * Beware that we can't "render" more than 4096x4096 pixels in a single job, |
| * if the pixel count is larger than this, the caller might need to split |
| * the job and call this function multiple times. |
| */ |
| static void |
| framebuffer_size_for_pixel_count(uint32_t num_pixels, |
| uint32_t *width, |
| uint32_t *height) |
| { |
| assert(num_pixels > 0); |
| |
| const uint32_t max_dim_pixels = 4096; |
| const uint32_t max_pixels = max_dim_pixels * max_dim_pixels; |
| |
| uint32_t w, h; |
| if (num_pixels > max_pixels) { |
| w = max_dim_pixels; |
| h = max_dim_pixels; |
| } else { |
| w = num_pixels; |
| h = 1; |
| while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) { |
| w >>= 1; |
| h <<= 1; |
| } |
| } |
| assert(w <= max_dim_pixels && h <= max_dim_pixels); |
| assert(w * h <= num_pixels); |
| assert(w > 0 && h > 0); |
| |
| *width = w; |
| *height = h; |
| } |
| |
| static struct v3dv_job * |
| copy_buffer(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_bo *dst, |
| uint32_t dst_offset, |
| struct v3dv_bo *src, |
| uint32_t src_offset, |
| const VkBufferCopy *region) |
| { |
| const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; |
| const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; |
| |
| /* Select appropriate pixel format for the copy operation based on the |
| * size to copy and the alignment of the source and destination offsets. |
| */ |
| src_offset += region->srcOffset; |
| dst_offset += region->dstOffset; |
| uint32_t item_size = 4; |
| while (item_size > 1 && |
| (src_offset % item_size != 0 || dst_offset % item_size != 0)) { |
| item_size /= 2; |
| } |
| |
| while (item_size > 1 && region->size % item_size != 0) |
| item_size /= 2; |
| |
| assert(region->size % item_size == 0); |
| uint32_t num_items = region->size / item_size; |
| assert(num_items > 0); |
| |
| uint32_t format; |
| VkFormat vk_format; |
| switch (item_size) { |
| case 4: |
| format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; |
| vk_format = VK_FORMAT_R8G8B8A8_UINT; |
| break; |
| case 2: |
| format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI; |
| vk_format = VK_FORMAT_R8G8_UINT; |
| break; |
| default: |
| format = V3D_OUTPUT_IMAGE_FORMAT_R8UI; |
| vk_format = VK_FORMAT_R8_UINT; |
| break; |
| } |
| |
| struct v3dv_job *job = NULL; |
| while (num_items > 0) { |
| job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); |
| if (!job) |
| return NULL; |
| |
| uint32_t width, height; |
| framebuffer_size_for_pixel_count(num_items, &width, &height); |
| |
| v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp); |
| |
| struct framebuffer_data framebuffer; |
| setup_framebuffer_data(&framebuffer, vk_format, internal_type, |
| &job->frame_tiling); |
| |
| v3dv_job_emit_binning_flush(job); |
| |
| emit_copy_buffer_rcl(job, dst, src, dst_offset, src_offset, |
| &framebuffer, format); |
| |
| v3dv_cmd_buffer_finish_job(cmd_buffer); |
| |
| const uint32_t items_copied = width * height; |
| const uint32_t bytes_copied = items_copied * item_size; |
| num_items -= items_copied; |
| src_offset += bytes_copied; |
| dst_offset += bytes_copied; |
| } |
| |
| return job; |
| } |
| |
| void |
| v3dv_CmdCopyBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer srcBuffer, |
| VkBuffer dstBuffer, |
| uint32_t regionCount, |
| const VkBufferCopy *pRegions) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, srcBuffer); |
| V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer); |
| |
| for (uint32_t i = 0; i < regionCount; i++) { |
| copy_buffer(cmd_buffer, |
| dst_buffer->mem->bo, dst_buffer->mem_offset, |
| src_buffer->mem->bo, src_buffer->mem_offset, |
| &pRegions[i]); |
| } |
| } |
| |
| static void |
| destroy_update_buffer_cb(VkDevice _device, |
| uint64_t pobj, |
| VkAllocationCallbacks *alloc) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj); |
| v3dv_bo_free(device, bo); |
| } |
| |
| void |
| v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize dataSize, |
| const void *pData) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer); |
| |
| struct v3dv_bo *src_bo = |
| v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true); |
| if (!src_bo) { |
| fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n"); |
| return; |
| } |
| |
| bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size); |
| if (!ok) { |
| fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n"); |
| return; |
| } |
| |
| memcpy(src_bo->map, pData, dataSize); |
| |
| v3dv_bo_unmap(cmd_buffer->device, src_bo); |
| |
| VkBufferCopy region = { |
| .srcOffset = 0, |
| .dstOffset = dstOffset, |
| .size = dataSize, |
| }; |
| struct v3dv_job *copy_job = |
| copy_buffer(cmd_buffer, |
| dst_buffer->mem->bo, dst_buffer->mem_offset, |
| src_bo, 0, |
| ®ion); |
| if (!copy_job) |
| return; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb); |
| } |
| |
| static void |
| emit_fill_buffer_per_tile_list(struct v3dv_job *job, |
| struct v3dv_bo *bo, |
| uint32_t offset, |
| uint32_t stride) |
| { |
| struct v3dv_cl *cl = &job->indirect; |
| v3dv_cl_ensure_space(cl, 200, 1); |
| v3dv_return_if_oom(NULL, job); |
| |
| struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); |
| |
| cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); |
| |
| cl_emit(cl, END_OF_LOADS, end); |
| |
| cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); |
| |
| emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false, |
| V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI); |
| |
| cl_emit(cl, END_OF_TILE_MARKER, end); |
| |
| cl_emit(cl, RETURN_FROM_SUB_LIST, ret); |
| |
| cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { |
| branch.start = tile_list_start; |
| branch.end = v3dv_cl_get_address(cl); |
| } |
| } |
| |
| static void |
| emit_fill_buffer(struct v3dv_job *job, |
| struct v3dv_bo *bo, |
| uint32_t offset, |
| struct framebuffer_data *framebuffer) |
| { |
| const uint32_t stride = job->frame_tiling.width * 4; |
| emit_fill_buffer_per_tile_list(job, bo, offset, stride); |
| emit_supertile_coordinates(job, framebuffer); |
| } |
| |
| static void |
| emit_fill_buffer_rcl(struct v3dv_job *job, |
| struct v3dv_bo *bo, |
| uint32_t offset, |
| struct framebuffer_data *framebuffer, |
| uint32_t data) |
| { |
| const union v3dv_clear_value clear_value = { |
| .color = { data, 0, 0, 0 }, |
| }; |
| |
| const struct rcl_clear_info clear_info = { |
| .clear_value = &clear_value, |
| .image = NULL, |
| .aspects = VK_IMAGE_ASPECT_COLOR_BIT, |
| .layer = 0, |
| .level = 0, |
| }; |
| |
| struct v3dv_cl *rcl = |
| emit_rcl_prologue(job, framebuffer->internal_type, &clear_info); |
| v3dv_return_if_oom(NULL, job); |
| |
| emit_frame_setup(job, 0, &clear_value); |
| emit_fill_buffer(job, bo, offset, framebuffer); |
| cl_emit(rcl, END_OF_RENDERING, end); |
| } |
| |
| static void |
| fill_buffer(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_bo *bo, |
| uint32_t offset, |
| uint32_t size, |
| uint32_t data) |
| { |
| assert(size > 0 && size % 4 == 0); |
| assert(offset + size <= bo->size); |
| |
| const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; |
| const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; |
| uint32_t num_items = size / 4; |
| |
| while (num_items > 0) { |
| struct v3dv_job *job = |
| v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); |
| if (!job) |
| return; |
| |
| uint32_t width, height; |
| framebuffer_size_for_pixel_count(num_items, &width, &height); |
| |
| v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp); |
| |
| struct framebuffer_data framebuffer; |
| setup_framebuffer_data(&framebuffer, VK_FORMAT_R8G8B8A8_UINT, |
| internal_type, &job->frame_tiling); |
| |
| v3dv_job_emit_binning_flush(job); |
| |
| emit_fill_buffer_rcl(job, bo, offset, &framebuffer, data); |
| |
| v3dv_cmd_buffer_finish_job(cmd_buffer); |
| |
| const uint32_t items_copied = width * height; |
| const uint32_t bytes_copied = items_copied * 4; |
| num_items -= items_copied; |
| offset += bytes_copied; |
| } |
| } |
| |
| void |
| v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize size, |
| uint32_t data) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer); |
| |
| struct v3dv_bo *bo = dst_buffer->mem->bo; |
| |
| /* From the Vulkan spec: |
| * |
| * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not |
| * a multiple of 4, then the nearest smaller multiple is used." |
| */ |
| if (size == VK_WHOLE_SIZE) { |
| size = dst_buffer->size - dstOffset; |
| size -= size % 4; |
| } |
| |
| fill_buffer(cmd_buffer, bo, dstOffset, size, data); |
| } |
| |
| /* Disable level 0 write, just write following mipmaps */ |
| #define V3D_TFU_IOA_DIMTW (1 << 0) |
| #define V3D_TFU_IOA_FORMAT_SHIFT 3 |
| #define V3D_TFU_IOA_FORMAT_LINEARTILE 3 |
| #define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 |
| #define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 |
| #define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6 |
| #define V3D_TFU_IOA_FORMAT_UIF_XOR 7 |
| |
| #define V3D_TFU_ICFG_NUMMM_SHIFT 5 |
| #define V3D_TFU_ICFG_TTYPE_SHIFT 9 |
| |
| #define V3D_TFU_ICFG_OPAD_SHIFT 22 |
| |
| #define V3D_TFU_ICFG_FORMAT_SHIFT 18 |
| #define V3D_TFU_ICFG_FORMAT_RASTER 0 |
| #define V3D_TFU_ICFG_FORMAT_SAND_128 1 |
| #define V3D_TFU_ICFG_FORMAT_SAND_256 2 |
| #define V3D_TFU_ICFG_FORMAT_LINEARTILE 11 |
| #define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 |
| #define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 |
| #define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14 |
| #define V3D_TFU_ICFG_FORMAT_UIF_XOR 15 |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| const VkBufferImageCopy *region) |
| { |
| VkFormat vk_format = image->vk_format; |
| const struct v3dv_format *format = image->format; |
| |
| /* Format must be supported for texturing */ |
| if (!v3dv_tfu_supports_tex_format(&cmd_buffer->device->devinfo, |
| format->tex_type)) { |
| return false; |
| } |
| |
| /* Only color formats */ |
| if (vk_format_is_depth_or_stencil(vk_format)) |
| return false; |
| |
| /* Destination can't be raster format */ |
| const uint32_t mip_level = region->imageSubresource.mipLevel; |
| if (image->slices[mip_level].tiling == VC5_TILING_RASTER) |
| return false; |
| |
| /* Region must include full slice */ |
| const uint32_t offset_x = region->imageOffset.x; |
| const uint32_t offset_y = region->imageOffset.y; |
| if (offset_x != 0 || offset_y != 0) |
| return false; |
| |
| uint32_t width, height; |
| if (region->bufferRowLength == 0) |
| width = region->imageExtent.width; |
| else |
| width = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| height = region->imageExtent.height; |
| else |
| height = region->bufferImageHeight; |
| |
| if (width != image->extent.width || height != image->extent.height) |
| return false; |
| |
| const struct v3d_resource_slice *slice = &image->slices[mip_level]; |
| |
| uint32_t num_layers; |
| if (image->type != VK_IMAGE_TYPE_3D) |
| num_layers = region->imageSubresource.layerCount; |
| else |
| num_layers = region->imageExtent.depth; |
| assert(num_layers > 0); |
| |
| assert(image->mem && image->mem->bo); |
| const struct v3dv_bo *dst_bo = image->mem->bo; |
| |
| assert(buffer->mem && buffer->mem->bo); |
| const struct v3dv_bo *src_bo = buffer->mem->bo; |
| |
| /* Emit a TFU job per layer to copy */ |
| const uint32_t buffer_stride = width * image->cpp; |
| for (int i = 0; i < num_layers; i++) { |
| uint32_t layer = region->imageSubresource.baseArrayLayer + i; |
| |
| struct drm_v3d_submit_tfu tfu = { |
| .ios = (height << 16) | width, |
| .bo_handles = { |
| dst_bo->handle, |
| src_bo != dst_bo ? src_bo->handle : 0 |
| }, |
| }; |
| |
| const uint32_t buffer_offset = |
| buffer->mem_offset + region->bufferOffset + |
| height * buffer_stride * i; |
| |
| const uint32_t src_offset = src_bo->offset + buffer_offset; |
| tfu.iia |= src_offset; |
| tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT; |
| tfu.iis |= width; |
| |
| const uint32_t dst_offset = |
| dst_bo->offset + v3dv_layer_offset(image, mip_level, layer); |
| tfu.ioa |= dst_offset; |
| |
| tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + |
| (slice->tiling - VC5_TILING_LINEARTILE)) << |
| V3D_TFU_IOA_FORMAT_SHIFT; |
| tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; |
| |
| /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the |
| * OPAD field for the destination (how many extra UIF blocks beyond |
| * those necessary to cover the height). |
| */ |
| if (slice->tiling == VC5_TILING_UIF_NO_XOR || |
| slice->tiling == VC5_TILING_UIF_XOR) { |
| uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp); |
| uint32_t implicit_padded_height = align(height, uif_block_h); |
| uint32_t icfg = |
| (slice->padded_height - implicit_padded_height) / uif_block_h; |
| tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; |
| } |
| |
| v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); |
| } |
| |
| return true; |
| } |
| |
| static void |
| emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, |
| struct framebuffer_data *framebuffer, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| uint32_t layer, |
| const VkBufferImageCopy *region) |
| { |
| struct v3dv_cl *cl = &job->indirect; |
| v3dv_cl_ensure_space(cl, 200, 1); |
| v3dv_return_if_oom(NULL, job); |
| |
| struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); |
| |
| cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); |
| |
| const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource; |
| assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) || |
| layer < image->extent.depth); |
| |
| /* Load TLB from buffer */ |
| uint32_t width, height; |
| if (region->bufferRowLength == 0) |
| width = region->imageExtent.width; |
| else |
| width = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| height = region->imageExtent.height; |
| else |
| height = region->bufferImageHeight; |
| |
| /* Handle copy to compressed format using a compatible format */ |
| width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format)); |
| height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format)); |
| |
| uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? |
| 1 : image->cpp; |
| uint32_t buffer_stride = width * cpp; |
| uint32_t buffer_offset = |
| buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; |
| |
| uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask, |
| false, false, true); |
| |
| emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo, |
| buffer_offset, buffer_stride, format); |
| |
| /* Because we can't do raster loads/stores of Z/S formats we need to |
| * use a color tile buffer with a compatible RGBA color format instead. |
| * However, when we are uploading a single aspect to a combined |
| * depth/stencil image we have the problem that our tile buffer stores don't |
| * allow us to mask out the other aspect, so we always write all four RGBA |
| * channels to the image and we end up overwriting that other aspect with |
| * undefined values. To work around that, we first load the aspect we are |
| * not copying from the image memory into a proper Z/S tile buffer. Then we |
| * do our store from the color buffer for the aspect we are copying, and |
| * after that, we do another store from the Z/S tile buffer to restore the |
| * other aspect to its original value. |
| */ |
| if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { |
| if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT, |
| imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, |
| false, false); |
| } else { |
| assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); |
| emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT, |
| imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, |
| false, false); |
| } |
| } |
| |
| cl_emit(cl, END_OF_LOADS, end); |
| |
| cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); |
| |
| /* Store TLB to image */ |
| emit_image_store(cl, framebuffer, image, imgrsc->aspectMask, |
| imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, |
| false, true); |
| |
| if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { |
| if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT, |
| imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, |
| false, false); |
| } else { |
| assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); |
| emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT, |
| imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, |
| false, false); |
| } |
| } |
| |
| cl_emit(cl, END_OF_TILE_MARKER, end); |
| |
| cl_emit(cl, RETURN_FROM_SUB_LIST, ret); |
| |
| cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { |
| branch.start = tile_list_start; |
| branch.end = v3dv_cl_get_address(cl); |
| } |
| } |
| |
| static void |
| emit_copy_buffer_to_layer(struct v3dv_job *job, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| struct framebuffer_data *framebuffer, |
| uint32_t layer, |
| const VkBufferImageCopy *region) |
| { |
| emit_frame_setup(job, layer, NULL); |
| emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer, |
| layer, region); |
| emit_supertile_coordinates(job, framebuffer); |
| } |
| |
| static void |
| emit_copy_buffer_to_image_rcl(struct v3dv_job *job, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| struct framebuffer_data *framebuffer, |
| const VkBufferImageCopy *region) |
| { |
| struct v3dv_cl *rcl = |
| emit_rcl_prologue(job, framebuffer->internal_type, NULL); |
| v3dv_return_if_oom(NULL, job); |
| |
| for (int layer = 0; layer < job->frame_tiling.layers; layer++) |
| emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region); |
| cl_emit(rcl, END_OF_RENDERING, end); |
| } |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| const VkBufferImageCopy *region) |
| { |
| VkFormat fb_format; |
| if (!can_use_tlb(image, ®ion->imageOffset, &fb_format)) |
| return false; |
| |
| uint32_t internal_type, internal_bpp; |
| get_internal_type_bpp_for_image_aspects(fb_format, |
| region->imageSubresource.aspectMask, |
| &internal_type, &internal_bpp); |
| |
| uint32_t num_layers; |
| if (image->type != VK_IMAGE_TYPE_3D) |
| num_layers = region->imageSubresource.layerCount; |
| else |
| num_layers = region->imageExtent.depth; |
| assert(num_layers > 0); |
| |
| struct v3dv_job *job = |
| v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); |
| if (!job) |
| return true; |
| |
| /* Handle copy to compressed format using a compatible format */ |
| const uint32_t block_w = vk_format_get_blockwidth(image->vk_format); |
| const uint32_t block_h = vk_format_get_blockheight(image->vk_format); |
| const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); |
| const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); |
| |
| v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp); |
| |
| struct framebuffer_data framebuffer; |
| setup_framebuffer_data(&framebuffer, fb_format, internal_type, |
| &job->frame_tiling); |
| |
| v3dv_job_emit_binning_flush(job); |
| emit_copy_buffer_to_image_rcl(job, image, buffer, &framebuffer, region); |
| |
| v3dv_cmd_buffer_finish_job(cmd_buffer); |
| |
| return true; |
| } |
| |
| static bool |
| create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| const VkBufferImageCopy *region) |
| { |
| if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region)) |
| return true; |
| if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region)) |
| return true; |
| return false; |
| } |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| const VkBufferImageCopy *region) |
| { |
| bool handled = false; |
| |
| /* Generally, the bpp of the data in the buffer matches that of the |
| * destination image. The exception is the case where we are uploading |
| * stencil (8bpp) to a combined d24s8 image (32bpp). |
| */ |
| uint32_t buffer_bpp = image->cpp; |
| |
| VkImageAspectFlags aspect = region->imageSubresource.aspectMask; |
| |
| /* We are about to upload the buffer data to an image so we can then |
| * blit that to our destination region. Because we are going to implement |
| * the copy as a blit, we want our blit source and destination formats to be |
| * the same (to avoid any format conversions), so we choose a canonical |
| * format that matches the destination image bpp. |
| */ |
| VkColorComponentFlags cmask = 0; /* Write all components */ |
| VkFormat src_format; |
| VkFormat dst_format; |
| switch (buffer_bpp) { |
| case 16: |
| assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); |
| src_format = VK_FORMAT_R32G32B32A32_UINT; |
| dst_format = src_format; |
| break; |
| case 8: |
| assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); |
| src_format = VK_FORMAT_R16G16B16A16_UINT; |
| dst_format = src_format; |
| break; |
| case 4: |
| switch (aspect) { |
| case VK_IMAGE_ASPECT_COLOR_BIT: |
| src_format = VK_FORMAT_R8G8B8A8_UINT; |
| dst_format = src_format; |
| break; |
| case VK_IMAGE_ASPECT_DEPTH_BIT: |
| assert(image->vk_format == VK_FORMAT_D32_SFLOAT || |
| image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT || |
| image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32); |
| src_format = image->vk_format; |
| dst_format = src_format; |
| break; |
| case VK_IMAGE_ASPECT_STENCIL_BIT: |
| /* Since we don't support separate stencil this is always a stencil |
| * copy to a combined depth/stencil image. Becasue we don't support |
| * separate stencil images, we upload the buffer data to a compatible |
| * color R8UI image, and implement the blit as a compatible color |
| * blit to an RGBA8UI destination masking out writes to components |
| * GBA (which map to the D24 component of a S8D24 image). |
| */ |
| assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT); |
| buffer_bpp = 1; |
| src_format = VK_FORMAT_R8_UINT; |
| dst_format = VK_FORMAT_R8G8B8A8_UINT; |
| cmask = VK_COLOR_COMPONENT_R_BIT; |
| aspect = VK_IMAGE_ASPECT_COLOR_BIT; |
| break; |
| default: |
| unreachable("unsupported aspect"); |
| return handled; |
| }; |
| break; |
| case 2: |
| src_format = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? |
| VK_FORMAT_R16_UINT : image->vk_format; |
| dst_format = src_format; |
| break; |
| case 1: |
| assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); |
| src_format = VK_FORMAT_R8_UINT; |
| dst_format = src_format; |
| break; |
| default: |
| unreachable("unsupported bit-size"); |
| return handled; |
| } |
| |
| /* We should be able to handle the blit if we reached here */ |
| handled = true; |
| |
| /* Obtain the 2D buffer region spec */ |
| uint32_t buf_width, buf_height; |
| if (region->bufferRowLength == 0) |
| buf_width = region->imageExtent.width; |
| else |
| buf_width = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| buf_height = region->imageExtent.height; |
| else |
| buf_height = region->bufferImageHeight; |
| |
| /* Compute layers to copy */ |
| uint32_t num_layers; |
| if (image->type != VK_IMAGE_TYPE_3D) |
| num_layers = region->imageSubresource.layerCount; |
| else |
| num_layers = region->imageExtent.depth; |
| assert(num_layers > 0); |
| |
| struct v3dv_device *device = cmd_buffer->device; |
| VkDevice _device = v3dv_device_to_handle(device); |
| for (uint32_t i = 0; i < num_layers; i++) { |
| /* Create the source blit image from the source buffer. |
| * |
| * We can't texture from a linear image, so we can't just setup a blit |
| * straight from the buffer contents. Instead, we need to upload the |
| * buffer to a tiled image, and then copy that image to the selected |
| * region of the destination. |
| * |
| * FIXME: we could do better than this is we use a blit shader that has |
| * a UBO (for the buffer) as input instead of a texture. Then we would |
| * have to do some arithmetics in the shader to identify the offset into |
| * the UBO that we need to load for each pixel in the destination image |
| * (we would need to support all the possible copy formats we have above). |
| */ |
| VkImageCreateInfo image_info = { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| .imageType = VK_IMAGE_TYPE_2D, |
| .format = src_format, |
| .extent = { buf_width, buf_height, 1 }, |
| .mipLevels = 1, |
| .arrayLayers = 1, |
| .samples = VK_SAMPLE_COUNT_1_BIT, |
| .tiling = VK_IMAGE_TILING_OPTIMAL, |
| .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | |
| VK_IMAGE_USAGE_TRANSFER_DST_BIT, |
| .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| .queueFamilyIndexCount = 0, |
| .initialLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }; |
| |
| VkImage buffer_image; |
| VkResult result = |
| v3dv_CreateImage(_device, &image_info, &device->alloc, &buffer_image); |
| if (result != VK_SUCCESS) |
| return handled; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uintptr_t)buffer_image, |
| (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage); |
| |
| /* Allocate and bind memory for the image */ |
| VkDeviceMemory mem; |
| VkMemoryRequirements reqs; |
| v3dv_GetImageMemoryRequirements(_device, buffer_image, &reqs); |
| VkMemoryAllocateInfo alloc_info = { |
| .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| .allocationSize = reqs.size, |
| .memoryTypeIndex = 0, |
| }; |
| result = v3dv_AllocateMemory(_device, &alloc_info, &device->alloc, &mem); |
| if (result != VK_SUCCESS) |
| return handled; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uintptr_t)mem, |
| (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory); |
| |
| result = v3dv_BindImageMemory(_device, buffer_image, mem, 0); |
| if (result != VK_SUCCESS) |
| return handled; |
| |
| /* Upload buffer contents for the selected layer */ |
| VkDeviceSize buffer_offset = |
| region->bufferOffset + i * buf_height * buf_width * buffer_bpp; |
| const VkBufferImageCopy buffer_image_copy = { |
| .bufferOffset = buffer_offset, |
| .bufferRowLength = region->bufferRowLength, |
| .bufferImageHeight = region->bufferImageHeight, |
| .imageSubresource = { |
| .aspectMask = aspect, |
| .mipLevel = 0, |
| .baseArrayLayer = 0, |
| .layerCount = 1, |
| }, |
| .imageOffset = { 0, 0, 0 }, |
| .imageExtent = { buf_width, buf_height, 1 } |
| }; |
| handled = |
| create_tiled_image_from_buffer(cmd_buffer, |
| v3dv_image_from_handle(buffer_image), |
| buffer, &buffer_image_copy); |
| if (!handled) { |
| /* This is unexpected, we should have setup the upload to be |
| * conformant to a TFU or TLB copy. |
| */ |
| unreachable("Unable to copy buffer to image through TLB"); |
| return false; |
| } |
| |
| /* Blit-copy the requested image extent from the buffer image to the |
| * destination image. |
| * |
| * Since we are copying, the blit must use the same format on the |
| * destination and source images to avoid format conversions. The |
| * only exception is copying stencil, which we upload to a R8UI source |
| * image, but that we need to blit to a S8D24 destination (the only |
| * stencil format we support). |
| */ |
| const VkImageBlit blit_region = { |
| .srcSubresource = { |
| .aspectMask = aspect, |
| .mipLevel = 0, |
| .baseArrayLayer = 0, |
| .layerCount = 1, |
| }, |
| .srcOffsets = { |
| { 0, 0, 0 }, |
| { region->imageExtent.width, region->imageExtent.height, 1 }, |
| }, |
| .dstSubresource = { |
| .aspectMask = aspect, |
| .mipLevel = region->imageSubresource.mipLevel, |
| .baseArrayLayer = region->imageSubresource.baseArrayLayer, |
| .layerCount = region->imageSubresource.layerCount, |
| }, |
| .dstOffsets = { |
| { |
| region->imageOffset.x, |
| region->imageOffset.y, |
| region->imageOffset.z + i, |
| }, |
| { |
| region->imageOffset.x + region->imageExtent.width, |
| region->imageOffset.y + region->imageExtent.height, |
| region->imageOffset.z + i + 1, |
| }, |
| }, |
| }; |
| |
| handled = blit_shader(cmd_buffer, |
| image, dst_format, |
| v3dv_image_from_handle(buffer_image), src_format, |
| cmask, NULL, |
| &blit_region, VK_FILTER_NEAREST); |
| if (!handled) { |
| /* This is unexpected, we should have a supported blit spec */ |
| unreachable("Unable to blit buffer to destination image"); |
| return false; |
| } |
| } |
| |
| assert(handled); |
| return true; |
| } |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *image, |
| struct v3dv_buffer *buffer, |
| const VkBufferImageCopy *region) |
| { |
| /* FIXME */ |
| if (vk_format_is_depth_or_stencil(image->vk_format)) |
| return false; |
| |
| if (vk_format_is_compressed(image->vk_format)) |
| return false; |
| |
| if (image->tiling == VK_IMAGE_TILING_LINEAR) |
| return false; |
| |
| uint32_t buffer_width, buffer_height; |
| if (region->bufferRowLength == 0) |
| buffer_width = region->imageExtent.width; |
| else |
| buffer_width = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| buffer_height = region->imageExtent.height; |
| else |
| buffer_height = region->bufferImageHeight; |
| |
| uint32_t buffer_stride = buffer_width * image->cpp; |
| uint32_t buffer_layer_stride = buffer_stride * buffer_height; |
| |
| uint32_t num_layers; |
| if (image->type != VK_IMAGE_TYPE_3D) |
| num_layers = region->imageSubresource.layerCount; |
| else |
| num_layers = region->imageExtent.depth; |
| assert(num_layers > 0); |
| |
| struct v3dv_job *job = |
| v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, |
| V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE, |
| cmd_buffer, -1); |
| if (!job) |
| return true; |
| |
| job->cpu.copy_buffer_to_image.image = image; |
| job->cpu.copy_buffer_to_image.buffer = buffer; |
| job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride; |
| job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride; |
| job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset; |
| job->cpu.copy_buffer_to_image.image_extent = region->imageExtent; |
| job->cpu.copy_buffer_to_image.image_offset = region->imageOffset; |
| job->cpu.copy_buffer_to_image.mip_level = |
| region->imageSubresource.mipLevel; |
| job->cpu.copy_buffer_to_image.base_layer = |
| region->imageSubresource.baseArrayLayer; |
| job->cpu.copy_buffer_to_image.layer_count = num_layers; |
| |
| list_addtail(&job->list_link, &cmd_buffer->jobs); |
| |
| return true; |
| } |
| |
| void |
| v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, |
| VkBuffer srcBuffer, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkBufferImageCopy *pRegions) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_buffer, buffer, srcBuffer); |
| V3DV_FROM_HANDLE(v3dv_image, image, dstImage); |
| |
| for (uint32_t i = 0; i < regionCount; i++) { |
| if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &pRegions[i])) |
| continue; |
| if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &pRegions[i])) |
| continue; |
| if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[i])) |
| continue; |
| if (copy_buffer_to_image_blit(cmd_buffer, image, buffer, &pRegions[i])) |
| continue; |
| unreachable("Unsupported buffer to image copy."); |
| } |
| } |
| |
| static void |
| emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *dst, |
| uint32_t dst_mip_level, |
| uint32_t dst_layer, |
| struct v3dv_image *src, |
| uint32_t src_mip_level, |
| uint32_t src_layer, |
| uint32_t width, |
| uint32_t height) |
| { |
| const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; |
| const struct v3d_resource_slice *dst_slice = &dst->slices[src_mip_level]; |
| |
| assert(dst->mem && dst->mem->bo); |
| const struct v3dv_bo *dst_bo = dst->mem->bo; |
| |
| assert(src->mem && src->mem->bo); |
| const struct v3dv_bo *src_bo = src->mem->bo; |
| |
| struct drm_v3d_submit_tfu tfu = { |
| .ios = (height << 16) | width, |
| .bo_handles = { |
| dst_bo->handle, |
| src != dst ? src_bo->handle : 0 |
| }, |
| }; |
| |
| const uint32_t src_offset = |
| src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); |
| tfu.iia |= src_offset; |
| |
| uint32_t icfg; |
| if (src_slice->tiling == VC5_TILING_RASTER) { |
| icfg = V3D_TFU_ICFG_FORMAT_RASTER; |
| } else { |
| icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE + |
| (src_slice->tiling - VC5_TILING_LINEARTILE); |
| } |
| tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT; |
| |
| const uint32_t dst_offset = |
| dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); |
| tfu.ioa |= dst_offset; |
| |
| tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + |
| (dst_slice->tiling - VC5_TILING_LINEARTILE)) << |
| V3D_TFU_IOA_FORMAT_SHIFT; |
| tfu.icfg |= dst->format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; |
| |
| switch (src_slice->tiling) { |
| case VC5_TILING_UIF_NO_XOR: |
| case VC5_TILING_UIF_XOR: |
| tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp)); |
| break; |
| case VC5_TILING_RASTER: |
| tfu.iis |= src_slice->stride / src->cpp; |
| break; |
| default: |
| break; |
| } |
| |
| /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the |
| * OPAD field for the destination (how many extra UIF blocks beyond |
| * those necessary to cover the height). |
| */ |
| if (dst_slice->tiling == VC5_TILING_UIF_NO_XOR || |
| dst_slice->tiling == VC5_TILING_UIF_XOR) { |
| uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp); |
| uint32_t implicit_padded_height = align(height, uif_block_h); |
| uint32_t icfg = |
| (dst_slice->padded_height - implicit_padded_height) / uif_block_h; |
| tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; |
| } |
| |
| v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); |
| } |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| */ |
| static bool |
| blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *dst, |
| struct v3dv_image *src, |
| const VkImageBlit *region, |
| VkFilter filter) |
| { |
| /* FIXME? The v3d driver seems to ignore filtering completely! */ |
| if (filter != VK_FILTER_NEAREST) |
| return false; |
| |
| /* Format must match */ |
| if (src->vk_format != dst->vk_format) |
| return false; |
| |
| VkFormat vk_format = dst->vk_format; |
| const struct v3dv_format *format = dst->format; |
| |
| /* Format must be supported for texturing */ |
| if (!v3dv_tfu_supports_tex_format(&cmd_buffer->device->devinfo, |
| format->tex_type)) { |
| return false; |
| } |
| |
| /* Only color formats */ |
| if (vk_format_is_depth_or_stencil(vk_format)) |
| return false; |
| |
| #if 0 |
| /* FIXME: Only 2D images? */ |
| if (dst->type == VK_IMAGE_TYPE_2D || src->type == VK_IMAGE_TYPE_2D) |
| return false; |
| #endif |
| |
| /* Destination can't be raster format */ |
| const uint32_t dst_mip_level = region->dstSubresource.mipLevel; |
| if (dst->slices[dst_mip_level].tiling == VC5_TILING_RASTER) |
| return false; |
| |
| /* Source region must start at (0,0) */ |
| if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0) |
| return false; |
| |
| /* Destination image must be complete */ |
| if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0) |
| return false; |
| |
| const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level); |
| const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level); |
| if (region->dstOffsets[1].x < dst_width - 1|| |
| region->dstOffsets[1].y < dst_height - 1) { |
| return false; |
| } |
| |
| /* No scaling */ |
| if (region->srcOffsets[1].x != region->dstOffsets[1].x || |
| region->srcOffsets[1].y != region->dstOffsets[1].y) { |
| return false; |
| } |
| |
| /* Emit a TFU job for each layer to blit */ |
| assert(region->dstSubresource.layerCount == |
| region->srcSubresource.layerCount); |
| const uint32_t layer_count = region->dstSubresource.layerCount; |
| const uint32_t src_mip_level = region->srcSubresource.mipLevel; |
| for (uint32_t i = 0; i < layer_count; i++) { |
| uint32_t src_layer, dst_layer; |
| if (src->type == VK_IMAGE_TYPE_3D) { |
| assert(layer_count == 1); |
| src_layer = u_minify(src->extent.depth, src_mip_level); |
| } else { |
| src_layer = region->srcSubresource.baseArrayLayer + i; |
| } |
| |
| if (dst->type == VK_IMAGE_TYPE_3D) { |
| assert(layer_count == 1); |
| dst_layer = u_minify(dst->extent.depth, dst_mip_level); |
| } else { |
| dst_layer = region->dstSubresource.baseArrayLayer + i; |
| } |
| |
| emit_tfu_job(cmd_buffer, |
| dst, dst_mip_level, dst_layer, |
| src, src_mip_level, src_layer, |
| dst_width, dst_height); |
| } |
| |
| return true; |
| } |
| |
| static bool |
| format_needs_software_int_clamp(VkFormat format) |
| { |
| switch (format) { |
| case VK_FORMAT_A2R10G10B10_UINT_PACK32: |
| case VK_FORMAT_A2R10G10B10_SINT_PACK32: |
| case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
| case VK_FORMAT_A2B10G10R10_SINT_PACK32: |
| return true; |
| default: |
| return false; |
| }; |
| } |
| |
| static void |
| get_blit_pipeline_cache_key(VkFormat dst_format, |
| VkFormat src_format, |
| VkColorComponentFlags cmask, |
| uint8_t *key) |
| { |
| memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE); |
| |
| uint32_t *p = (uint32_t *) key; |
| |
| *p = dst_format; |
| p++; |
| |
| /* Generally, when blitting from a larger format to a smaller format |
| * the hardware takes care of clamping the source to the RT range. |
| * Specifically, for integer formats, this is done by using |
| * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this |
| * clamps to the bit-size of the render type, and some formats, such as |
| * rgb10a2_uint have a 16-bit type, so it won't do what we need and we |
| * require to clamp in software. In these cases, we need to amend the blit |
| * shader with clamp code that depends on both the src and dst formats, so |
| * we need the src format to be part of the key. |
| */ |
| *p = format_needs_software_int_clamp(dst_format) ? src_format : 0; |
| p++; |
| |
| *p = cmask; |
| p++; |
| |
| assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE); |
| } |
| |
| static bool |
| create_blit_pipeline_layout(struct v3dv_device *device, |
| VkDescriptorSetLayout *descriptor_set_layout, |
| VkPipelineLayout *pipeline_layout) |
| { |
| VkResult result; |
| |
| if (*descriptor_set_layout == 0) { |
| VkDescriptorSetLayoutBinding descriptor_set_layout_binding = { |
| .binding = 0, |
| .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, |
| .descriptorCount = 1, |
| .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, |
| }; |
| VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = { |
| .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| .bindingCount = 1, |
| .pBindings = &descriptor_set_layout_binding, |
| }; |
| result = |
| v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device), |
| &descriptor_set_layout_info, |
| &device->alloc, |
| descriptor_set_layout); |
| if (result != VK_SUCCESS) |
| return false; |
| } |
| |
| assert(*pipeline_layout == 0); |
| VkPipelineLayoutCreateInfo pipeline_layout_info = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| .setLayoutCount = 1, |
| .pSetLayouts = descriptor_set_layout, |
| .pushConstantRangeCount = 1, |
| .pPushConstantRanges = |
| &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 }, |
| }; |
| |
| result = |
| v3dv_CreatePipelineLayout(v3dv_device_to_handle(device), |
| &pipeline_layout_info, |
| &device->alloc, |
| pipeline_layout); |
| return result == VK_SUCCESS; |
| } |
| |
| static bool |
| create_blit_render_pass(struct v3dv_device *device, |
| VkFormat dst_format, |
| VkFormat src_format, |
| VkRenderPass *pass) |
| { |
| const bool is_color_blit = vk_format_is_color(dst_format); |
| |
| /* FIXME: if blitting to tile boundaries or to the whole image, we could |
| * use LOAD_DONT_CARE, but then we would have to include that in the |
| * pipeline hash key. Or maybe we should just create both render passes and |
| * use one or the other at draw time since they would both be compatible |
| * with the pipeline anyway |
| */ |
| VkAttachmentDescription att = { |
| .format = dst_format, |
| .samples = VK_SAMPLE_COUNT_1_BIT, |
| .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, |
| .storeOp = VK_ATTACHMENT_STORE_OP_STORE, |
| .initialLayout = VK_IMAGE_LAYOUT_GENERAL, |
| .finalLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }; |
| |
| VkAttachmentReference att_ref = { |
| .attachment = 0, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }; |
| |
| VkSubpassDescription subpass = { |
| .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, |
| .inputAttachmentCount = 0, |
| .colorAttachmentCount = is_color_blit ? 1 : 0, |
| .pColorAttachments = is_color_blit ? &att_ref : NULL, |
| .pResolveAttachments = NULL, |
| .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref, |
| .preserveAttachmentCount = 0, |
| .pPreserveAttachments = NULL, |
| }; |
| |
| VkRenderPassCreateInfo info = { |
| .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, |
| .attachmentCount = 1, |
| .pAttachments = &att, |
| .subpassCount = 1, |
| .pSubpasses = &subpass, |
| .dependencyCount = 0, |
| .pDependencies = NULL, |
| }; |
| |
| VkResult result = v3dv_CreateRenderPass(v3dv_device_to_handle(device), |
| &info, &device->alloc, pass); |
| return result == VK_SUCCESS; |
| } |
| |
| static nir_ssa_def * |
| gen_rect_vertices(nir_builder *b) |
| { |
| nir_intrinsic_instr *vertex_id = |
| nir_intrinsic_instr_create(b->shader, |
| nir_intrinsic_load_vertex_id); |
| nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid"); |
| nir_builder_instr_insert(b, &vertex_id->instr); |
| |
| |
| /* vertex 0: -1.0, -1.0 |
| * vertex 1: -1.0, 1.0 |
| * vertex 2: 1.0, -1.0 |
| * vertex 3: 1.0, 1.0 |
| * |
| * so: |
| * |
| * channel 0 is vertex_id < 2 ? -1.0 : 1.0 |
| * channel 1 is vertex id & 1 ? 1.0 : -1.0 |
| */ |
| |
| nir_ssa_def *one = nir_imm_int(b, 1); |
| nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2)); |
| nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one); |
| |
| nir_ssa_def *comp[4]; |
| comp[0] = nir_bcsel(b, c0cmp, |
| nir_imm_float(b, -1.0f), |
| nir_imm_float(b, 1.0f)); |
| |
| comp[1] = nir_bcsel(b, c1cmp, |
| nir_imm_float(b, 1.0f), |
| nir_imm_float(b, -1.0f)); |
| comp[2] = nir_imm_float(b, 0.0f); |
| comp[3] = nir_imm_float(b, 1.0f); |
| return nir_vec(b, comp, 4); |
| } |
| |
| static nir_ssa_def * |
| gen_tex_coords(nir_builder *b) |
| { |
| nir_intrinsic_instr *tex_box = |
| nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); |
| tex_box->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); |
| nir_intrinsic_set_base(tex_box, 0); |
| nir_intrinsic_set_range(tex_box, 16); |
| tex_box->num_components = 4; |
| nir_ssa_dest_init(&tex_box->instr, &tex_box->dest, 4, 32, "tex_box"); |
| nir_builder_instr_insert(b, &tex_box->instr); |
| |
| nir_intrinsic_instr *tex_z = |
| nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); |
| tex_z->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); |
| nir_intrinsic_set_base(tex_z, 16); |
| nir_intrinsic_set_range(tex_z, 4); |
| tex_z->num_components = 1; |
| nir_ssa_dest_init(&tex_z->instr, &tex_z->dest, 1, 32, "tex_z"); |
| nir_builder_instr_insert(b, &tex_z->instr); |
| |
| nir_intrinsic_instr *vertex_id = |
| nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_vertex_id); |
| nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid"); |
| nir_builder_instr_insert(b, &vertex_id->instr); |
| |
| /* vertex 0: src0_x, src0_y |
| * vertex 1: src0_x, src1_y |
| * vertex 2: src1_x, src0_y |
| * vertex 3: src1_x, src1_y |
| * |
| * So: |
| * |
| * channel 0 is vertex_id < 2 ? src0_x : src1_x |
| * channel 1 is vertex id & 1 ? src1_y : src0_y |
| */ |
| |
| nir_ssa_def *one = nir_imm_int(b, 1); |
| nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2)); |
| nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one); |
| |
| nir_ssa_def *comp[4]; |
| comp[0] = nir_bcsel(b, c0cmp, |
| nir_channel(b, &tex_box->dest.ssa, 0), |
| nir_channel(b, &tex_box->dest.ssa, 2)); |
| |
| comp[1] = nir_bcsel(b, c1cmp, |
| nir_channel(b, &tex_box->dest.ssa, 3), |
| nir_channel(b, &tex_box->dest.ssa, 1)); |
| comp[2] = &tex_z->dest.ssa; |
| comp[3] = nir_imm_float(b, 1.0f); |
| return nir_vec(b, comp, 4); |
| } |
| |
| static nir_ssa_def * |
| build_nir_tex_op(struct nir_builder *b, |
| struct v3dv_device *device, |
| nir_ssa_def *tex_pos, |
| enum glsl_base_type tex_type, |
| enum glsl_sampler_dim dim) |
| { |
| const struct glsl_type *sampler_type = |
| glsl_sampler_type(dim, false, false, tex_type); |
| nir_variable *sampler = |
| nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); |
| sampler->data.descriptor_set = 0; |
| sampler->data.binding = 0; |
| |
| nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa; |
| nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3); |
| tex->sampler_dim = dim; |
| tex->op = nir_texop_tex; |
| tex->src[0].src_type = nir_tex_src_coord; |
| tex->src[0].src = nir_src_for_ssa(tex_pos); |
| tex->src[1].src_type = nir_tex_src_texture_deref; |
| tex->src[1].src = nir_src_for_ssa(tex_deref); |
| tex->src[2].src_type = nir_tex_src_sampler_deref; |
| tex->src[2].src = nir_src_for_ssa(tex_deref); |
| tex->dest_type = |
| nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(tex_type)); |
| tex->is_array = glsl_sampler_type_is_array(sampler_type); |
| tex->coord_components = tex_pos->num_components; |
| |
| nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); |
| nir_builder_instr_insert(b, &tex->instr); |
| return &tex->dest.ssa; |
| } |
| |
| static nir_shader * |
| get_blit_vs() |
| { |
| nir_builder b; |
| const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); |
| nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options); |
| b.shader->info.name = ralloc_strdup(b.shader, "meta blit vs"); |
| |
| const struct glsl_type *vec4 = glsl_vec4_type(); |
| |
| nir_variable *vs_out_pos = |
| nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position"); |
| vs_out_pos->data.location = VARYING_SLOT_POS; |
| |
| nir_variable *vs_out_tex_coord = |
| nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord"); |
| vs_out_tex_coord->data.location = VARYING_SLOT_VAR0; |
| vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH; |
| |
| nir_ssa_def *pos = gen_rect_vertices(&b); |
| nir_store_var(&b, vs_out_pos, pos, 0xf); |
| |
| nir_ssa_def *tex_coord = gen_tex_coords(&b); |
| nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf); |
| |
| return b.shader; |
| } |
| |
| static uint32_t |
| get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim) |
| { |
| switch (sampler_dim) { |
| case GLSL_SAMPLER_DIM_1D: return 0x1; |
| case GLSL_SAMPLER_DIM_2D: return 0x3; |
| case GLSL_SAMPLER_DIM_3D: return 0x7; |
| default: |
| unreachable("invalid sampler dim"); |
| }; |
| } |
| |
| static nir_shader * |
| get_color_blit_fs(struct v3dv_device *device, |
| VkFormat dst_format, |
| VkFormat src_format, |
| enum glsl_sampler_dim sampler_dim) |
| { |
| nir_builder b; |
| const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); |
| nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options); |
| b.shader->info.name = ralloc_strdup(b.shader, "meta blit fs"); |
| |
| const struct glsl_type *vec4 = glsl_vec4_type(); |
| |
| nir_variable *fs_in_tex_coord = |
| nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord"); |
| fs_in_tex_coord->data.location = VARYING_SLOT_VAR0; |
| |
| const bool is_int_blit = vk_format_is_int(dst_format); |
| const struct glsl_type *fs_out_type = |
| is_int_blit ? glsl_uvec4_type() : glsl_vec4_type(); |
| |
| nir_variable *fs_out_color = |
| nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color"); |
| fs_out_color->data.location = FRAG_RESULT_DATA0; |
| |
| nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord); |
| const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim); |
| tex_coord = nir_channels(&b, tex_coord, channel_mask); |
| |
| nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, |
| glsl_get_base_type(fs_out_type), |
| sampler_dim); |
| |
| /* For integer textures, if the bit-size of the destination is too small to |
| * hold source value, Vulkan (CTS) expects the implementation to clamp to the |
| * maximum value the destination can hold. The hardware can clamp to the |
| * render target type, which usually matches the component bit-size, but |
| * there are some cases that won't match, such as rgb10a2, which has a 16-bit |
| * render target type, so in these cases we need to clamp manually. |
| */ |
| if (format_needs_software_int_clamp(dst_format)) { |
| assert(is_int_blit); |
| enum pipe_format src_pformat = vk_format_to_pipe_format(src_format); |
| enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format); |
| |
| nir_ssa_def *c[4]; |
| for (uint32_t i = 0; i < 4; i++) { |
| c[i] = nir_channel(&b, color, i); |
| |
| const uint32_t src_bit_size = |
| util_format_get_component_bits(src_pformat, |
| UTIL_FORMAT_COLORSPACE_RGB, |
| i); |
| const uint32_t dst_bit_size = |
| util_format_get_component_bits(dst_pformat, |
| UTIL_FORMAT_COLORSPACE_RGB, |
| i); |
| |
| if (dst_bit_size >= src_bit_size) |
| continue; |
| |
| if (util_format_is_pure_uint(dst_pformat)) { |
| nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1); |
| c[i] = nir_umin(&b, c[i], max); |
| } else { |
| nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1); |
| nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1))); |
| c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min); |
| } |
| } |
| |
| color = nir_vec4(&b, c[0], c[1], c[2], c[3]); |
| } |
| |
| nir_store_var(&b, fs_out_color, color, 0xf); |
| |
| return b.shader; |
| } |
| |
| static bool |
| create_pipeline(struct v3dv_device *device, |
| struct v3dv_render_pass *pass, |
| struct nir_shader *vs_nir, |
| struct nir_shader *fs_nir, |
| const VkPipelineVertexInputStateCreateInfo *vi_state, |
| const VkPipelineDepthStencilStateCreateInfo *ds_state, |
| const VkPipelineColorBlendStateCreateInfo *cb_state, |
| const VkPipelineLayout layout, |
| VkPipeline *pipeline) |
| { |
| struct v3dv_shader_module vs_m; |
| struct v3dv_shader_module fs_m; |
| |
| v3dv_shader_module_internal_init(&vs_m, vs_nir); |
| v3dv_shader_module_internal_init(&fs_m, fs_nir); |
| |
| VkPipelineShaderStageCreateInfo stages[2] = { |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_VERTEX_BIT, |
| .module = v3dv_shader_module_to_handle(&vs_m), |
| .pName = "main", |
| }, |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_FRAGMENT_BIT, |
| .module = v3dv_shader_module_to_handle(&fs_m), |
| .pName = "main", |
| }, |
| }; |
| |
| VkGraphicsPipelineCreateInfo info = { |
| .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| |
| .stageCount = 2, |
| .pStages = stages, |
| |
| .pVertexInputState = vi_state, |
| |
| .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, |
| .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, |
| .primitiveRestartEnable = false, |
| }, |
| |
| .pViewportState = &(VkPipelineViewportStateCreateInfo) { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, |
| .viewportCount = 1, |
| .scissorCount = 1, |
| }, |
| |
| .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, |
| .rasterizerDiscardEnable = false, |
| .polygonMode = VK_POLYGON_MODE_FILL, |
| .cullMode = VK_CULL_MODE_NONE, |
| .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, |
| .depthBiasEnable = false, |
| }, |
| |
| .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, |
| .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, |
| .sampleShadingEnable = false, |
| .pSampleMask = NULL, |
| .alphaToCoverageEnable = false, |
| .alphaToOneEnable = false, |
| }, |
| |
| .pDepthStencilState = ds_state, |
| |
| .pColorBlendState = cb_state, |
| |
| /* The meta clear pipeline declares all state as dynamic. |
| * As a consequence, vkCmdBindPipeline writes no dynamic state |
| * to the cmd buffer. Therefore, at the end of the meta clear, |
| * we need only restore dynamic state that was vkCmdSet. |
| */ |
| .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, |
| .dynamicStateCount = 6, |
| .pDynamicStates = (VkDynamicState[]) { |
| VK_DYNAMIC_STATE_VIEWPORT, |
| VK_DYNAMIC_STATE_SCISSOR, |
| VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, |
| VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, |
| VK_DYNAMIC_STATE_STENCIL_REFERENCE, |
| VK_DYNAMIC_STATE_BLEND_CONSTANTS, |
| VK_DYNAMIC_STATE_DEPTH_BIAS, |
| VK_DYNAMIC_STATE_LINE_WIDTH, |
| }, |
| }, |
| |
| .flags = 0, |
| .layout = layout, |
| .renderPass = v3dv_render_pass_to_handle(pass), |
| .subpass = 0, |
| }; |
| |
| VkResult result = |
| v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device), |
| VK_NULL_HANDLE, |
| 1, &info, |
| &device->alloc, |
| pipeline); |
| |
| ralloc_free(vs_nir); |
| ralloc_free(fs_nir); |
| |
| return result == VK_SUCCESS; |
| } |
| |
| static enum glsl_sampler_dim |
| get_sampler_dim_for_image_type(VkImageType type) |
| { |
| switch (type) { |
| case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D; |
| case VK_IMAGE_TYPE_2D: return GLSL_SAMPLER_DIM_2D; |
| case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D; |
| default: |
| unreachable("Invalid image type"); |
| } |
| } |
| |
| static bool |
| create_blit_pipeline(struct v3dv_device *device, |
| VkFormat dst_format, |
| VkFormat src_format, |
| VkColorComponentFlags cmask, |
| VkImageType src_type, |
| VkRenderPass _pass, |
| VkPipelineLayout pipeline_layout, |
| VkPipeline *pipeline) |
| { |
| struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass); |
| |
| /* We always rewrite depth/stencil blits to compatible color blits */ |
| assert(vk_format_is_color(dst_format)); |
| assert(vk_format_is_color(src_format)); |
| |
| const enum glsl_sampler_dim sampler_dim = |
| get_sampler_dim_for_image_type(src_type); |
| |
| nir_shader *vs_nir = get_blit_vs(); |
| nir_shader *fs_nir = |
| get_color_blit_fs(device, dst_format, src_format, sampler_dim); |
| |
| const VkPipelineVertexInputStateCreateInfo vi_state = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, |
| .vertexBindingDescriptionCount = 0, |
| .vertexAttributeDescriptionCount = 0, |
| }; |
| |
| VkPipelineDepthStencilStateCreateInfo ds_state = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, |
| }; |
| |
| VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 }; |
| blend_att_state[0] = (VkPipelineColorBlendAttachmentState) { |
| .blendEnable = false, |
| .colorWriteMask = cmask, |
| }; |
| |
| const VkPipelineColorBlendStateCreateInfo cb_state = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, |
| .logicOpEnable = false, |
| .attachmentCount = 1, |
| .pAttachments = blend_att_state |
| }; |
| |
| return create_pipeline(device, |
| pass, |
| vs_nir, fs_nir, |
| &vi_state, |
| &ds_state, |
| &cb_state, |
| pipeline_layout, |
| pipeline); |
| } |
| |
| /** |
| * Return a pipeline suitable for blitting the requested aspect given the |
| * destination and source formats. |
| */ |
| static bool |
| get_blit_pipeline(struct v3dv_device *device, |
| VkFormat dst_format, |
| VkFormat src_format, |
| VkColorComponentFlags cmask, |
| VkImageType src_type, |
| struct v3dv_meta_blit_pipeline **pipeline) |
| { |
| bool ok = true; |
| |
| mtx_lock(&device->meta.mtx); |
| if (!device->meta.blit.playout) { |
| ok = create_blit_pipeline_layout(device, |
| &device->meta.blit.dslayout, |
| &device->meta.blit.playout); |
| } |
| mtx_unlock(&device->meta.mtx); |
| if (!ok) |
| return false; |
| |
| uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE]; |
| get_blit_pipeline_cache_key(dst_format, src_format, cmask, key); |
| mtx_lock(&device->meta.mtx); |
| struct hash_entry *entry = |
| _mesa_hash_table_search(device->meta.blit.cache[src_type], &key); |
| if (entry) { |
| mtx_unlock(&device->meta.mtx); |
| *pipeline = entry->data; |
| return true; |
| } |
| |
| *pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| |
| if (*pipeline == NULL) |
| goto fail; |
| |
| ok = create_blit_render_pass(device, dst_format, src_format, |
| &(*pipeline)->pass); |
| if (!ok) |
| goto fail; |
| |
| ok = create_blit_pipeline(device, |
| dst_format, |
| src_format, |
| cmask, |
| src_type, |
| (*pipeline)->pass, |
| device->meta.blit.playout, |
| &(*pipeline)->pipeline); |
| if (!ok) |
| goto fail; |
| |
| _mesa_hash_table_insert(device->meta.blit.cache[src_type], &key, *pipeline); |
| |
| mtx_unlock(&device->meta.mtx); |
| return true; |
| |
| fail: |
| mtx_unlock(&device->meta.mtx); |
| |
| VkDevice _device = v3dv_device_to_handle(device); |
| if (*pipeline) { |
| if ((*pipeline)->pass) |
| v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->alloc); |
| if ((*pipeline)->pipeline) |
| v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc); |
| vk_free(&device->alloc, *pipeline); |
| *pipeline = NULL; |
| } |
| |
| return false; |
| } |
| |
| static void |
| compute_blit_box(const VkOffset3D *offsets, |
| uint32_t image_w, uint32_t image_h, |
| uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h, |
| bool *mirror_x, bool *mirror_y) |
| { |
| if (offsets[1].x >= offsets[0].x) { |
| *mirror_x = false; |
| *x = MIN2(offsets[0].x, image_w - 1); |
| *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x); |
| } else { |
| *mirror_x = true; |
| *x = MIN2(offsets[1].x, image_w - 1); |
| *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x); |
| } |
| if (offsets[1].y >= offsets[0].y) { |
| *mirror_y = false; |
| *y = MIN2(offsets[0].y, image_h - 1); |
| *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y); |
| } else { |
| *mirror_y = true; |
| *y = MIN2(offsets[1].y, image_h - 1); |
| *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y); |
| } |
| } |
| |
| static void |
| ensure_meta_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer) |
| { |
| if (cmd_buffer->meta.blit.dspool) |
| return; |
| |
| /* |
| * FIXME: the size for the descriptor pool is based on what it is needed |
| * for the tests/programs that we tested. It would be good to try to use a |
| * smaller value, and create descriptor pool on demand as we find ourselves |
| * running out of pool space. |
| */ |
| const uint32_t POOL_DESCRIPTOR_COUNT = 1024; |
| |
| VkDescriptorPoolSize pool_size = { |
| .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, |
| .descriptorCount = POOL_DESCRIPTOR_COUNT, |
| }; |
| |
| VkDescriptorPoolCreateInfo info = { |
| .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, |
| .maxSets = POOL_DESCRIPTOR_COUNT, |
| .poolSizeCount = 1, |
| .pPoolSizes = &pool_size, |
| .flags = 0, |
| }; |
| |
| v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device), |
| &info, |
| &cmd_buffer->device->alloc, |
| &cmd_buffer->meta.blit.dspool); |
| } |
| |
| /** |
| * Returns true if the implementation supports the requested operation (even if |
| * it failed to process it, for example, due to an out-of-memory error). |
| * |
| * The caller can specify the channels on the destination to be written via the |
| * cmask parameter (which can be 0 to default to all channels), as well as a |
| * swizzle to apply to the source via the cswizzle parameter (which can be NULL |
| * to use the default identity swizzle). |
| */ |
| static bool |
| blit_shader(struct v3dv_cmd_buffer *cmd_buffer, |
| struct v3dv_image *dst, |
| VkFormat dst_format, |
| struct v3dv_image *src, |
| VkFormat src_format, |
| VkColorComponentFlags cmask, |
| VkComponentMapping *cswizzle, |
| const VkImageBlit *_region, |
| VkFilter filter) |
| { |
| bool handled = true; |
| |
| /* We don't support rendering to linear depth/stencil, this should have |
| * been rewritten to a compatible color blit by the caller. |
| */ |
| assert(dst->tiling != VK_IMAGE_TILING_LINEAR || |
| !vk_format_is_depth_or_stencil(dst_format)); |
| |
| VkImageBlit region = *_region; |
| |
| /* Rewrite combined D/S blits to compatible color blits */ |
| if (vk_format_is_depth_or_stencil(dst_format)) { |
| assert(src_format == dst_format); |
| assert(cmask == 0); |
| switch(dst_format) { |
| case VK_FORMAT_D16_UNORM: |
| dst_format = VK_FORMAT_R16_UINT; |
| break; |
| case VK_FORMAT_D32_SFLOAT: |
| dst_format = VK_FORMAT_R32_UINT; |
| break; |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| cmask |= VK_COLOR_COMPONENT_G_BIT | |
| VK_COLOR_COMPONENT_B_BIT | |
| VK_COLOR_COMPONENT_A_BIT; |
| } |
| if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT); |
| cmask |= VK_COLOR_COMPONENT_R_BIT; |
| } |
| dst_format = VK_FORMAT_R8G8B8A8_UINT; |
| break; |
| default: |
| unreachable("Unsupported depth/stencil format"); |
| }; |
| src_format = dst_format; |
| region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; |
| region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; |
| } |
| |
| if (cmask == 0) { |
| cmask = VK_COLOR_COMPONENT_R_BIT | |
| VK_COLOR_COMPONENT_G_BIT | |
| VK_COLOR_COMPONENT_B_BIT | |
| VK_COLOR_COMPONENT_A_BIT; |
| } |
| |
| VkComponentMapping ident_swizzle = { |
| .r = VK_COMPONENT_SWIZZLE_IDENTITY, |
| .g = VK_COMPONENT_SWIZZLE_IDENTITY, |
| .b = VK_COMPONENT_SWIZZLE_IDENTITY, |
| .a = VK_COMPONENT_SWIZZLE_IDENTITY, |
| }; |
| if (!cswizzle) |
| cswizzle = &ident_swizzle; |
| |
| /* When we get here from a copy between compressed / uncompressed images |
| * we choose to specify the destination blit region based on the size |
| * semantics of the source image of the copy (see copy_image_blit), so we |
| * need to apply those same semantics here when we compute the size of the |
| * destination image level. |
| */ |
| const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format); |
| const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format); |
| const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format); |
| const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format); |
| const uint32_t dst_level_w = |
| u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w), |
| region.dstSubresource.mipLevel); |
| const uint32_t dst_level_h = |
| u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h), |
| region.dstSubresource.mipLevel); |
| |
| const uint32_t src_level_w = |
| u_minify(src->extent.width, region.srcSubresource.mipLevel); |
| const uint32_t src_level_h = |
| u_minify(src->extent.height, region.srcSubresource.mipLevel); |
| const uint32_t src_level_d = |
| u_minify(src->extent.depth, region.srcSubresource.mipLevel); |
| |
| uint32_t dst_x, dst_y, dst_w, dst_h; |
| bool dst_mirror_x, dst_mirror_y; |
| compute_blit_box(region.dstOffsets, |
| dst_level_w, dst_level_h, |
| &dst_x, &dst_y, &dst_w, &dst_h, |
| &dst_mirror_x, &dst_mirror_y); |
| |
| uint32_t src_x, src_y, src_w, src_h; |
| bool src_mirror_x, src_mirror_y; |
| compute_blit_box(region.srcOffsets, |
| src_level_w, src_level_h, |
| &src_x, &src_y, &src_w, &src_h, |
| &src_mirror_x, &src_mirror_y); |
| |
| uint32_t min_dst_layer; |
| uint32_t max_dst_layer; |
| if (dst->type != VK_IMAGE_TYPE_3D) { |
| min_dst_layer = region.dstSubresource.baseArrayLayer; |
| max_dst_layer = min_dst_layer + region.dstSubresource.layerCount; |
| } else { |
| min_dst_layer = region.dstOffsets[0].z; |
| max_dst_layer = region.dstOffsets[1].z; |
| } |
| |
| uint32_t min_src_layer; |
| uint32_t max_src_layer; |
| if (src->type != VK_IMAGE_TYPE_3D) { |
| min_src_layer = region.srcSubresource.baseArrayLayer; |
| max_src_layer = min_src_layer + region.srcSubresource.layerCount; |
| } else { |
| min_src_layer = region.srcOffsets[0].z; |
| max_src_layer = region.srcOffsets[1].z; |
| } |
| |
| uint32_t layer_count = max_dst_layer - min_dst_layer; |
| |
| /* Translate source blit coordinates to normalized texture coordinates |
| * and handle mirroring. |
| */ |
| const float coords[4] = { |
| (float)src_x / (float)src_level_w, |
| (float)src_y / (float)src_level_h, |
| (float)(src_x + src_w) / (float)src_level_w, |
| (float)(src_y + src_h) / (float)src_level_h, |
| }; |
| |
| const bool mirror_x = dst_mirror_x != src_mirror_x; |
| const bool mirror_y = dst_mirror_y != src_mirror_y; |
| float tex_coords[5] = { |
| !mirror_x ? coords[0] : coords[2], |
| !mirror_y ? coords[1] : coords[3], |
| !mirror_x ? coords[2] : coords[0], |
| !mirror_y ? coords[3] : coords[1], |
| /* Z coordinate for 3D blit sources, to be filled for each |
| * destination layer |
| */ |
| 0.0f |
| }; |
| |
| |
| /* For blits from 3D images we also need to compute the slice coordinate to |
| * sample from, which will change for each layer in the destination. |
| * Compute the step we should increase for each iteration. |
| */ |
| const float src_z_step = |
| (float)(max_src_layer - min_src_layer) / (float)layer_count; |
| |
| /* Create the descriptor pool for the source blit texture if needed */ |
| ensure_meta_blit_descriptor_pool(cmd_buffer); |
| |
| /* Get the blit pipeline */ |
| struct v3dv_meta_blit_pipeline *pipeline = NULL; |
| bool ok = get_blit_pipeline(cmd_buffer->device, |
| dst_format, src_format, |
| cmask, src->type, &pipeline); |
| if (!ok) |
| return handled; |
| assert(pipeline && pipeline->pipeline && pipeline->pass); |
| |
| struct v3dv_device *device = cmd_buffer->device; |
| assert(cmd_buffer->meta.blit.dspool); |
| assert(device->meta.blit.dslayout); |
| |
| /* Push command buffer state before starting meta operation */ |
| v3dv_cmd_buffer_meta_state_push(cmd_buffer, true); |
| |
| /* Setup framebuffer */ |
| VkDevice _device = v3dv_device_to_handle(device); |
| VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer); |
| |
| VkResult result; |
| uint32_t dirty_dynamic_state = 0; |
| VkImageAspectFlags aspects = region.dstSubresource.aspectMask; |
| for (uint32_t i = 0; i < layer_count; i++) { |
| VkImageViewCreateInfo dst_image_view_info = { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| .image = v3dv_image_to_handle(dst), |
| .viewType = v3dv_image_type_to_view_type(dst->type), |
| .format = dst_format, |
| .subresourceRange = { |
| .aspectMask = aspects, |
| .baseMipLevel = region.dstSubresource.mipLevel, |
| .levelCount = 1, |
| .baseArrayLayer = min_dst_layer + i, |
| .layerCount = 1 |
| }, |
| }; |
| VkImageView dst_image_view; |
| result = v3dv_CreateImageView(_device, &dst_image_view_info, |
| &device->alloc, &dst_image_view); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uintptr_t)dst_image_view, |
| (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); |
| |
| VkFramebufferCreateInfo fb_info = { |
| .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, |
| .renderPass = pipeline->pass, |
| .attachmentCount = 1, |
| .pAttachments = &dst_image_view, |
| .width = dst_level_w, |
| .height = dst_level_h, |
| .layers = 1, |
| }; |
| |
| VkFramebuffer fb; |
| result = v3dv_CreateFramebuffer(_device, &fb_info, |
| &cmd_buffer->device->alloc, &fb); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uintptr_t)fb, |
| (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer); |
| |
| /* Setup descriptor set for blit source texture. We don't have to |
| * register the descriptor as a private command buffer object since |
| * all descriptors will be freed automatically with the descriptor |
| * pool. |
| */ |
| VkDescriptorSet set; |
| VkDescriptorSetAllocateInfo set_alloc_info = { |
| .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, |
| .descriptorPool = cmd_buffer->meta.blit.dspool, |
| .descriptorSetCount = 1, |
| .pSetLayouts = &device->meta.blit.dslayout, |
| }; |
| result = v3dv_AllocateDescriptorSets(_device, &set_alloc_info, &set); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| VkSamplerCreateInfo sampler_info = { |
| .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, |
| .magFilter = filter, |
| .minFilter = filter, |
| .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, |
| .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, |
| .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, |
| .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, |
| }; |
| VkSampler sampler; |
| result = v3dv_CreateSampler(_device, &sampler_info, &device->alloc, |
| &sampler); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uintptr_t)sampler, |
| (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler); |
| |
| VkImageViewCreateInfo src_image_view_info = { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| .image = v3dv_image_to_handle(src), |
| .viewType = v3dv_image_type_to_view_type(src->type), |
| .format = src_format, |
| .components = *cswizzle, |
| .subresourceRange = { |
| .aspectMask = aspects, |
| .baseMipLevel = region.srcSubresource.mipLevel, |
| .levelCount = 1, |
| .baseArrayLayer = |
| src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i, |
| .layerCount = 1 |
| }, |
| }; |
| VkImageView src_image_view; |
| result = v3dv_CreateImageView(_device, &src_image_view_info, |
| &device->alloc, &src_image_view); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| v3dv_cmd_buffer_add_private_obj( |
| cmd_buffer, (uintptr_t)src_image_view, |
| (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); |
| |
| VkDescriptorImageInfo image_info = { |
| .sampler = sampler, |
| .imageView = src_image_view, |
| .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| }; |
| VkWriteDescriptorSet write = { |
| .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, |
| .dstSet = set, |
| .dstBinding = 0, |
| .dstArrayElement = 0, |
| .descriptorCount = 1, |
| .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, |
| .pImageInfo = &image_info, |
| }; |
| v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL); |
| |
| /* Record blit */ |
| VkRenderPassBeginInfo rp_info = { |
| .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| .renderPass = pipeline->pass, |
| .framebuffer = fb, |
| .renderArea = { |
| .offset = { dst_x, dst_y }, |
| .extent = { dst_w, dst_h } |
| }, |
| .clearValueCount = 0, |
| }; |
| |
| v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE); |
| struct v3dv_job *job = cmd_buffer->state.job; |
| if (!job) |
| goto fail; |
| |
| if (src->type == VK_IMAGE_TYPE_3D) |
| tex_coords[4] = (min_src_layer + i * src_z_step) / (float)src_level_d; |
| |
| v3dv_CmdPushConstants(_cmd_buffer, |
| device->meta.blit.playout, |
| VK_SHADER_STAGE_VERTEX_BIT, 0, 20, |
| &tex_coords); |
| |
| v3dv_CmdBindPipeline(_cmd_buffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| pipeline->pipeline); |
| |
| v3dv_CmdBindDescriptorSets(_cmd_buffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| device->meta.blit.playout, |
| 0, 1, &set, |
| 0, NULL); |
| |
| const VkViewport viewport = { |
| .x = dst_x, |
| .y = dst_y, |
| .width = dst_w, |
| .height = dst_h, |
| .minDepth = 0.0f, |
| .maxDepth = 1.0f |
| }; |
| v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport); |
| const VkRect2D scissor = { |
| .offset = { dst_x, dst_y }, |
| .extent = { dst_w, dst_h } |
| }; |
| v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor); |
| |
| v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0); |
| |
| v3dv_CmdEndRenderPass(_cmd_buffer); |
| dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR; |
| } |
| |
| fail: |
| v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true); |
| |
| return handled; |
| } |
| |
| void |
| v3dv_CmdBlitImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageBlit* pRegions, |
| VkFilter filter) |
| { |
| V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); |
| V3DV_FROM_HANDLE(v3dv_image, src, srcImage); |
| V3DV_FROM_HANDLE(v3dv_image, dst, dstImage); |
| |
| /* This command can only happen outside a render pass */ |
| assert(cmd_buffer->state.pass == NULL); |
| assert(cmd_buffer->state.job == NULL); |
| |
| /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */ |
| assert(dst->samples == VK_SAMPLE_COUNT_1_BIT && |
| src->samples == VK_SAMPLE_COUNT_1_BIT); |
| |
| for (uint32_t i = 0; i < regionCount; i++) { |
| if (blit_tfu(cmd_buffer, dst, src, &pRegions[i], filter)) |
| continue; |
| if (blit_shader(cmd_buffer, |
| dst, dst->vk_format, |
| src, src->vk_format, |
| 0, NULL, |
| &pRegions[i], filter)) { |
| continue; |
| } |
| unreachable("Unsupported blit operation"); |
| } |
| } |