| /* |
| * Copyright 2019-2020 Valve Corporation |
| * SPDX-License-Identifier: MIT |
| * |
| * Authors: |
| * Jonathan Marek <jonathan@marek.ca> |
| */ |
| |
| #include "tu_private.h" |
| |
| #include "tu_cs.h" |
| #include "vk_format.h" |
| |
| #include "util/format_r11g11b10f.h" |
| #include "util/format_rgb9e5.h" |
| #include "util/format_srgb.h" |
| #include "util/half_float.h" |
| |
| static uint32_t |
| tu_pack_float32_for_unorm(float val, int bits) |
| { |
| return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1)); |
| } |
| |
| /* r2d_ = BLIT_OP_SCALE operations */ |
| |
| static enum a6xx_2d_ifmt |
| format_to_ifmt(VkFormat format) |
| { |
| if (format == VK_FORMAT_D24_UNORM_S8_UINT || |
| format == VK_FORMAT_X8_D24_UNORM_PACK32) |
| return R2D_UNORM8; |
| |
| /* get_component_bits doesn't work with depth/stencil formats: */ |
| if (format == VK_FORMAT_D16_UNORM || format == VK_FORMAT_D32_SFLOAT) |
| return R2D_FLOAT32; |
| if (format == VK_FORMAT_S8_UINT) |
| return R2D_INT8; |
| |
| /* use the size of the red channel to find the corresponding "ifmt" */ |
| bool is_int = vk_format_is_int(format); |
| switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { |
| case 4: case 5: case 8: |
| return is_int ? R2D_INT8 : R2D_UNORM8; |
| case 10: case 11: |
| return is_int ? R2D_INT16 : R2D_FLOAT16; |
| case 16: |
| if (vk_format_is_float(format)) |
| return R2D_FLOAT16; |
| return is_int ? R2D_INT16 : R2D_FLOAT32; |
| case 32: |
| return is_int ? R2D_INT32 : R2D_FLOAT32; |
| default: |
| unreachable("bad format"); |
| return 0; |
| } |
| } |
| |
| static void |
| r2d_coords(struct tu_cs *cs, |
| const VkOffset2D *dst, |
| const VkOffset2D *src, |
| const VkExtent2D *extent) |
| { |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_DST_TL(.x = dst->x, .y = dst->y), |
| A6XX_GRAS_2D_DST_BR(.x = dst->x + extent->width - 1, .y = dst->y + extent->height - 1)); |
| |
| if (!src) |
| return; |
| |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_SRC_TL_X(src->x), |
| A6XX_GRAS_2D_SRC_BR_X(src->x + extent->width - 1), |
| A6XX_GRAS_2D_SRC_TL_Y(src->y), |
| A6XX_GRAS_2D_SRC_BR_Y(src->y + extent->height - 1)); |
| } |
| |
| static void |
| r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) |
| { |
| uint32_t clear_value[4] = {}; |
| |
| switch (format) { |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| /* cleared as r8g8b8a8_unorm using special format */ |
| clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); |
| clear_value[1] = clear_value[0] >> 8; |
| clear_value[2] = clear_value[0] >> 16; |
| clear_value[3] = val->depthStencil.stencil; |
| break; |
| case VK_FORMAT_D16_UNORM: |
| case VK_FORMAT_D32_SFLOAT: |
| /* R2D_FLOAT32 */ |
| clear_value[0] = fui(val->depthStencil.depth); |
| break; |
| case VK_FORMAT_S8_UINT: |
| clear_value[0] = val->depthStencil.stencil; |
| break; |
| case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
| /* cleared as UINT32 */ |
| clear_value[0] = float3_to_rgb9e5(val->color.float32); |
| break; |
| default: |
| assert(!vk_format_is_depth_or_stencil(format)); |
| const struct util_format_description *desc = vk_format_description(format); |
| enum a6xx_2d_ifmt ifmt = format_to_ifmt(format); |
| |
| assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || |
| format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)); |
| |
| for (unsigned i = 0; i < desc->nr_channels; i++) { |
| const struct util_format_channel_description *ch = &desc->channel[i]; |
| if (ifmt == R2D_UNORM8) { |
| float linear = val->color.float32[i]; |
| if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3) |
| linear = util_format_linear_to_srgb_float(val->color.float32[i]); |
| |
| if (ch->type == UTIL_FORMAT_TYPE_SIGNED) |
| clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f); |
| else |
| clear_value[i] = tu_pack_float32_for_unorm(linear, 8); |
| } else if (ifmt == R2D_FLOAT16) { |
| clear_value[i] = _mesa_float_to_half(val->color.float32[i]); |
| } else { |
| assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 || |
| ifmt == R2D_INT16 || ifmt == R2D_INT8); |
| clear_value[i] = val->color.uint32[i]; |
| } |
| } |
| break; |
| } |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); |
| tu_cs_emit_array(cs, clear_value, 4); |
| } |
| |
| static void |
| r2d_src(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| uint32_t layer, |
| VkFilter filter) |
| { |
| uint32_t src_info = iview->SP_PS_2D_SRC_INFO; |
| if (filter != VK_FILTER_NEAREST) |
| src_info |= A6XX_SP_PS_2D_SRC_INFO_FILTER; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); |
| tu_cs_emit(cs, src_info); |
| tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE); |
| tu_cs_image_ref_2d(cs, iview, layer, true); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 3); |
| tu_cs_image_flag_ref(cs, iview, layer); |
| } |
| |
| static void |
| r2d_src_buffer(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| uint64_t va, uint32_t pitch, |
| uint32_t width, uint32_t height) |
| { |
| struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_SP_PS_2D_SRC_INFO( |
| .color_format = format.fmt, |
| .color_swap = format.swap, |
| .srgb = vk_format_is_srgb(vk_format), |
| .unk20 = 1, |
| .unk22 = 1), |
| A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height), |
| A6XX_SP_PS_2D_SRC_LO((uint32_t) va), |
| A6XX_SP_PS_2D_SRC_HI(va >> 32), |
| A6XX_SP_PS_2D_SRC_PITCH(.pitch = pitch)); |
| } |
| |
| static void |
| r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) |
| { |
| assert(iview->image->samples == 1); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); |
| tu_cs_emit(cs, iview->RB_2D_DST_INFO); |
| tu_cs_image_ref_2d(cs, iview, layer, false); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 3); |
| tu_cs_image_flag_ref(cs, iview, layer); |
| } |
| |
| static void |
| r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) |
| { |
| assert(iview->image->samples == 1); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); |
| tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS); |
| tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); |
| tu_cs_emit(cs, iview->stencil_PITCH); |
| } |
| |
| static void |
| r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch) |
| { |
| struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_2D_DST_INFO( |
| .color_format = format.fmt, |
| .color_swap = format.swap, |
| .srgb = vk_format_is_srgb(vk_format)), |
| A6XX_RB_2D_DST_LO((uint32_t) va), |
| A6XX_RB_2D_DST_HI(va >> 32), |
| A6XX_RB_2D_DST_PITCH(pitch)); |
| } |
| |
| static void |
| r2d_setup_common(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear, |
| bool ubwc, |
| bool scissor) |
| { |
| enum a6xx_format format = tu6_base_format(vk_format); |
| enum a6xx_2d_ifmt ifmt = format_to_ifmt(vk_format); |
| uint32_t unknown_8c01 = 0; |
| |
| if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT || |
| vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) { |
| format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; |
| } |
| |
| /* note: the only format with partial clearing is D24S8 */ |
| if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { |
| /* preserve stencil channel */ |
| if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) |
| unknown_8c01 = 0x08000041; |
| /* preserve depth channels */ |
| if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) |
| unknown_8c01 = 0x00084001; |
| } |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_UNKNOWN_8C01, 1); |
| tu_cs_emit(cs, unknown_8c01); |
| |
| uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL( |
| .scissor = scissor, |
| .rotate = rotation, |
| .solid_color = clear, |
| .d24s8 = format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, |
| .color_format = format, |
| .mask = 0xf, |
| .ifmt = vk_format_is_srgb(vk_format) ? R2D_UNORM8_SRGB : ifmt, |
| ).value; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); |
| tu_cs_emit(cs, blit_cntl); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); |
| tu_cs_emit(cs, blit_cntl); |
| |
| if (format == FMT6_10_10_10_2_UNORM_DEST) |
| format = FMT6_16_16_16_16_FLOAT; |
| |
| tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT( |
| .sint = vk_format_is_sint(vk_format), |
| .uint = vk_format_is_uint(vk_format), |
| .color_format = format, |
| .srgb = vk_format_is_srgb(vk_format), |
| .mask = 0xf)); |
| } |
| |
| static void |
| r2d_setup(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear, |
| bool ubwc) |
| { |
| tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); |
| |
| r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, ubwc, false); |
| } |
| |
| static void |
| r2d_teardown(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs) |
| { |
| /* nothing to do here */ |
| } |
| |
| static void |
| r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) |
| { |
| tu_cs_emit_pkt7(cs, CP_BLIT, 1); |
| tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); |
| } |
| |
| /* r3d_ = shader path operations */ |
| |
| void |
| tu_init_clear_blit_shaders(struct tu6_global *global) |
| { |
| #define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, args } } |
| #define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } } |
| #define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } } |
| |
| static const instr_t vs_code[] = { |
| /* r0.xyz = r0.w ? c1.xyz : c0.xyz |
| * r1.xy = r0.w ? c1.zw : c0.zw |
| * r0.w = 1.0f |
| */ |
| CAT3(OPC_SEL_B32, .repeat = 2, .dst = 0, |
| .c1 = {.src1_c = 1, .src1 = 4}, .src1_r = 1, |
| .src2 = 3, |
| .c2 = {.src3_c = 1, .dummy = 1, .src3 = 0}), |
| CAT3(OPC_SEL_B32, .repeat = 1, .dst = 4, |
| .c1 = {.src1_c = 1, .src1 = 6}, .src1_r = 1, |
| .src2 = 3, |
| .c2 = {.src3_c = 1, .dummy = 1, .src3 = 2}), |
| MOV(.dst = 3, .src_im = 1, .fim_val = 1.0f ), |
| { .cat0 = { .opc = OPC_END } }, |
| }; |
| |
| static const instr_t fs_blit[] = { |
| /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its |
| * blit path (its not clear what allows it to not have it) |
| */ |
| CAT2(OPC_BARY_F, .ei = 1, .full = 1, .dst = 63 * 4, .src1_im = 1), |
| { .cat0 = { .opc = OPC_END } }, |
| }; |
| |
| memcpy(&global->shaders[GLOBAL_SH_VS], vs_code, sizeof(vs_code)); |
| memcpy(&global->shaders[GLOBAL_SH_FS_BLIT], fs_blit, sizeof(fs_blit)); |
| |
| for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) { |
| instr_t *code = global->shaders[GLOBAL_SH_FS_CLEAR0 + num_rts]; |
| for (uint32_t i = 0; i < num_rts; i++) { |
| /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */ |
| *code++ = (instr_t) MOV(.repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4); |
| } |
| *code++ = (instr_t) { .cat0 = { .opc = OPC_END } }; |
| } |
| } |
| |
| static void |
| r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_rts, |
| bool layered_clear) |
| { |
| struct ir3_const_state dummy_const_state = {}; |
| struct ir3_shader dummy_shader = {}; |
| |
| struct ir3_shader_variant vs = { |
| .type = MESA_SHADER_VERTEX, |
| .instrlen = 1, |
| .constlen = 4, |
| .info.max_reg = 1, |
| .inputs_count = 1, |
| .inputs[0] = { |
| .slot = SYSTEM_VALUE_VERTEX_ID, |
| .regid = regid(0, 3), |
| .sysval = true, |
| }, |
| .outputs_count = blit ? 2 : 1, |
| .outputs[0] = { |
| .slot = VARYING_SLOT_POS, |
| .regid = regid(0, 0), |
| }, |
| .outputs[1] = { |
| .slot = VARYING_SLOT_VAR0, |
| .regid = regid(1, 0), |
| }, |
| .shader = &dummy_shader, |
| .const_state = &dummy_const_state, |
| }; |
| if (layered_clear) { |
| vs.outputs[1].slot = VARYING_SLOT_LAYER; |
| vs.outputs[1].regid = regid(1, 1); |
| vs.outputs_count = 2; |
| } |
| |
| struct ir3_shader_variant fs = { |
| .type = MESA_SHADER_FRAGMENT, |
| .instrlen = 1, /* max of 9 instructions with num_rts = 8 */ |
| .constlen = align(num_rts, 4), |
| .info.max_reg = MAX2(num_rts, 1) - 1, |
| .total_in = blit ? 2 : 0, |
| .num_samp = blit ? 1 : 0, |
| .inputs_count = blit ? 2 : 0, |
| .inputs[0] = { |
| .slot = VARYING_SLOT_VAR0, |
| .inloc = 0, |
| .compmask = 3, |
| .bary = true, |
| }, |
| .inputs[1] = { |
| .slot = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL, |
| .regid = regid(0, 0), |
| .sysval = 1, |
| }, |
| .num_sampler_prefetch = blit ? 1 : 0, |
| .sampler_prefetch[0] = { |
| .src = 0, |
| .wrmask = 0xf, |
| .cmd = 4, |
| }, |
| .shader = &dummy_shader, |
| .const_state = &dummy_const_state, |
| }; |
| |
| tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( |
| .vs_state = true, |
| .hs_state = true, |
| .ds_state = true, |
| .gs_state = true, |
| .fs_state = true, |
| .cs_state = true, |
| .gfx_ibo = true, |
| .cs_ibo = true, |
| .gfx_shared_const = true, |
| .gfx_bindless = 0x1f, |
| .cs_bindless = 0x1f)); |
| |
| tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS])); |
| tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0); |
| tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL, 0); |
| tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL, 0); |
| tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, |
| global_iova(cmd, shaders[blit ? GLOBAL_SH_FS_BLIT : (GLOBAL_SH_FS_CLEAR0 + num_rts)])); |
| |
| tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0()); |
| tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0()); |
| |
| /* Copy what the blob does here. This will emit an extra 0x3f |
| * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what |
| * this is working around yet. |
| */ |
| tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); |
| tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE)); |
| tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit_regs(cs, A6XX_VFD_MULTIVIEW_CNTL()); |
| |
| tu6_emit_vpc(cs, &vs, NULL, NULL, NULL, &fs, 0, false); |
| |
| /* REPL_MODE for varying with RECTLIST (2 vertices only) */ |
| tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0)); |
| tu_cs_emit_regs(cs, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0)); |
| |
| tu6_emit_fs_inputs(cs, &fs); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_CL_CNTL( |
| .persp_division_disable = 1, |
| .vp_xform_disable = 1, |
| .vp_clip_code_ignore = 1, |
| .clip_disable = 1)); |
| tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable? |
| |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = 0, .y = 0), |
| A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff)); |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = 0, .y = 0), |
| A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff)); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_VFD_INDEX_OFFSET(), |
| A6XX_VFD_INSTANCE_START_OFFSET()); |
| } |
| |
| static void |
| r3d_coords_raw(struct tu_cs *cs, const float *coords) |
| { |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | |
| CP_LOAD_STATE6_0_NUM_UNIT(2)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); |
| tu_cs_emit_array(cs, (const uint32_t *) coords, 8); |
| } |
| |
| static void |
| r3d_coords(struct tu_cs *cs, |
| const VkOffset2D *dst, |
| const VkOffset2D *src, |
| const VkExtent2D *extent) |
| { |
| int32_t src_x1 = src ? src->x : 0; |
| int32_t src_y1 = src ? src->y : 0; |
| r3d_coords_raw(cs, (float[]) { |
| dst->x, dst->y, |
| src_x1, src_y1, |
| dst->x + extent->width, dst->y + extent->height, |
| src_x1 + extent->width, src_y1 + extent->height, |
| }); |
| } |
| |
| static void |
| r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) |
| { |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | |
| CP_LOAD_STATE6_0_NUM_UNIT(1)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); |
| switch (format) { |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: { |
| /* cleared as r8g8b8a8_unorm using special format */ |
| uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); |
| tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f)); |
| tu_cs_emit(cs, fui((tmp >> 8 & 0xff) / 255.0f)); |
| tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f)); |
| tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f)); |
| } break; |
| case VK_FORMAT_D16_UNORM: |
| case VK_FORMAT_D32_SFLOAT: |
| tu_cs_emit(cs, fui(val->depthStencil.depth)); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| break; |
| case VK_FORMAT_S8_UINT: |
| tu_cs_emit(cs, val->depthStencil.stencil & 0xff); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| break; |
| default: |
| /* as color formats use clear value as-is */ |
| assert(!vk_format_is_depth_or_stencil(format)); |
| tu_cs_emit_array(cs, val->color.uint32, 4); |
| break; |
| } |
| } |
| |
| static void |
| r3d_src_common(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const uint32_t *tex_const, |
| uint32_t offset_base, |
| uint32_t offset_ubwc, |
| VkFilter filter) |
| { |
| struct tu_cs_memory texture = { }; |
| VkResult result = tu_cs_alloc(&cmd->sub_cs, |
| 2, /* allocate space for a sampler too */ |
| A6XX_TEX_CONST_DWORDS, &texture); |
| if (result != VK_SUCCESS) { |
| cmd->record_result = result; |
| return; |
| } |
| |
| memcpy(texture.map, tex_const, A6XX_TEX_CONST_DWORDS * 4); |
| |
| /* patch addresses for layer offset */ |
| *(uint64_t*) (texture.map + 4) += offset_base; |
| uint64_t ubwc_addr = (texture.map[7] | (uint64_t) texture.map[8] << 32) + offset_ubwc; |
| texture.map[7] = ubwc_addr; |
| texture.map[8] = ubwc_addr >> 32; |
| |
| texture.map[A6XX_TEX_CONST_DWORDS + 0] = |
| A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter, false)) | |
| A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter, false)) | |
| A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE) | |
| A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE) | |
| A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) | |
| 0x60000; /* XXX used by blob, doesn't seem necessary */ |
| texture.map[A6XX_TEX_CONST_DWORDS + 1] = |
| 0x1 | /* XXX used by blob, doesn't seem necessary */ |
| A6XX_TEX_SAMP_1_UNNORM_COORDS | |
| A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR; |
| texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0; |
| texture.map[A6XX_TEX_CONST_DWORDS + 3] = 0; |
| |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | |
| CP_LOAD_STATE6_0_NUM_UNIT(1)); |
| tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_TEX_SAMP_LO, 2); |
| tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4); |
| |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | |
| CP_LOAD_STATE6_0_NUM_UNIT(1)); |
| tu_cs_emit_qw(cs, texture.iova); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_TEX_CONST_LO, 2); |
| tu_cs_emit_qw(cs, texture.iova); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(1)); |
| } |
| |
| static void |
| r3d_src(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| uint32_t layer, |
| VkFilter filter) |
| { |
| r3d_src_common(cmd, cs, iview->descriptor, |
| iview->layer_size * layer, |
| iview->ubwc_layer_size * layer, |
| filter); |
| } |
| |
| static void |
| r3d_src_buffer(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| uint64_t va, uint32_t pitch, |
| uint32_t width, uint32_t height) |
| { |
| uint32_t desc[A6XX_TEX_CONST_DWORDS]; |
| |
| struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR); |
| |
| desc[0] = |
| COND(vk_format_is_srgb(vk_format), A6XX_TEX_CONST_0_SRGB) | |
| A6XX_TEX_CONST_0_FMT(format.fmt) | |
| A6XX_TEX_CONST_0_SWAP(format.swap) | |
| A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | |
| // XXX to swizzle into .w for stencil buffer_to_image |
| A6XX_TEX_CONST_0_SWIZ_Y(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) | |
| A6XX_TEX_CONST_0_SWIZ_Z(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) | |
| A6XX_TEX_CONST_0_SWIZ_W(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W); |
| desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); |
| desc[2] = |
| A6XX_TEX_CONST_2_PITCH(pitch) | |
| A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D); |
| desc[3] = 0; |
| desc[4] = va; |
| desc[5] = va >> 32; |
| for (uint32_t i = 6; i < A6XX_TEX_CONST_DWORDS; i++) |
| desc[i] = 0; |
| |
| r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); |
| } |
| |
| static void |
| r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) |
| { |
| tu6_emit_msaa(cs, iview->image->samples); /* TODO: move to setup */ |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); |
| tu_cs_emit(cs, iview->RB_MRT_BUF_INFO); |
| tu_cs_image_ref(cs, iview, layer); |
| tu_cs_emit(cs, 0); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); |
| tu_cs_image_flag_ref(cs, iview, layer); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled)); |
| } |
| |
| static void |
| r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) |
| { |
| tu6_emit_msaa(cs, iview->image->samples); /* TODO: move to setup */ |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); |
| tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO)); |
| tu_cs_image_stencil_ref(cs, iview, layer); |
| tu_cs_emit(cs, 0); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); |
| } |
| |
| static void |
| r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch) |
| { |
| struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR); |
| |
| tu6_emit_msaa(cs, 1); /* TODO: move to setup */ |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_MRT_BUF_INFO(0, .color_format = format.fmt, .color_swap = format.swap), |
| A6XX_RB_MRT_PITCH(0, pitch), |
| A6XX_RB_MRT_ARRAY_PITCH(0, 0), |
| A6XX_RB_MRT_BASE_LO(0, (uint32_t) va), |
| A6XX_RB_MRT_BASE_HI(0, va >> 32), |
| A6XX_RB_MRT_BASE_GMEM(0, 0)); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); |
| } |
| |
| static uint8_t |
| aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask) |
| { |
| uint8_t mask = 0xf; |
| assert(aspect_mask); |
| /* note: the only format with partial writing is D24S8, |
| * clear/blit uses the _AS_R8G8B8A8 format to access it |
| */ |
| if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { |
| if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) |
| mask = 0x7; |
| if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) |
| mask = 0x8; |
| } |
| return mask; |
| } |
| |
| static void |
| r3d_setup(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear, |
| bool ubwc) |
| { |
| enum a6xx_format format = tu6_base_format(vk_format); |
| |
| if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT || |
| vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) { |
| format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; |
| } |
| |
| if (!cmd->state.pass) { |
| tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); |
| tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff); |
| } |
| |
| tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000)); |
| tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000)); |
| |
| r3d_common(cmd, cs, !clear, clear ? 1 : 0, false); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | |
| A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | |
| 0xfc000000); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1)); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 1); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(0)); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_FS_OUTPUT_CNTL0(), |
| A6XX_RB_FS_OUTPUT_CNTL1(.mrt = 1)); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.sample_mask = 0xffff)); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILREF()); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_RENDER_COMPONENTS(.rt0 = 0xf)); |
| tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf)); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0, |
| .color_format = format, |
| .color_sint = vk_format_is_sint(vk_format), |
| .color_uint = vk_format_is_uint(vk_format))); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, |
| .component_enable = aspect_write_mask(vk_format, aspect_mask))); |
| tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format))); |
| tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format))); |
| |
| if (cmd->state.predication_active) { |
| tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1); |
| tu_cs_emit(cs, 0); |
| } |
| } |
| |
| static void |
| r3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) |
| { |
| tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); |
| tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) | |
| CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | |
| CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY)); |
| tu_cs_emit(cs, 1); /* instance count */ |
| tu_cs_emit(cs, 2); /* vertex count */ |
| } |
| |
| static void |
| r3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs) |
| { |
| if (cmd->state.predication_active) { |
| tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1); |
| tu_cs_emit(cs, 1); |
| } |
| } |
| |
| /* blit ops - common interface for 2d/shader paths */ |
| |
| struct blit_ops { |
| void (*coords)(struct tu_cs *cs, |
| const VkOffset2D *dst, |
| const VkOffset2D *src, |
| const VkExtent2D *extent); |
| void (*clear_value)(struct tu_cs *cs, VkFormat format, const VkClearValue *val); |
| void (*src)( |
| struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| uint32_t layer, |
| VkFilter filter); |
| void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs, |
| VkFormat vk_format, |
| uint64_t va, uint32_t pitch, |
| uint32_t width, uint32_t height); |
| void (*dst)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); |
| void (*dst_buffer)(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch); |
| void (*setup)(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear, |
| bool ubwc); |
| void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs); |
| void (*teardown)(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs); |
| }; |
| |
| static const struct blit_ops r2d_ops = { |
| .coords = r2d_coords, |
| .clear_value = r2d_clear_value, |
| .src = r2d_src, |
| .src_buffer = r2d_src_buffer, |
| .dst = r2d_dst, |
| .dst_buffer = r2d_dst_buffer, |
| .setup = r2d_setup, |
| .run = r2d_run, |
| .teardown = r2d_teardown, |
| }; |
| |
| static const struct blit_ops r3d_ops = { |
| .coords = r3d_coords, |
| .clear_value = r3d_clear_value, |
| .src = r3d_src, |
| .src_buffer = r3d_src_buffer, |
| .dst = r3d_dst, |
| .dst_buffer = r3d_dst_buffer, |
| .setup = r3d_setup, |
| .run = r3d_run, |
| .teardown = r3d_teardown, |
| }; |
| |
| /* passthrough set coords from 3D extents */ |
| static void |
| coords(const struct blit_ops *ops, |
| struct tu_cs *cs, |
| const VkOffset3D *dst, |
| const VkOffset3D *src, |
| const VkExtent3D *extent) |
| { |
| ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent); |
| } |
| |
| static VkFormat |
| copy_format(VkFormat format, VkImageAspectFlags aspect_mask, bool copy_buffer) |
| { |
| if (vk_format_is_compressed(format)) { |
| switch (vk_format_get_blocksize(format)) { |
| case 1: return VK_FORMAT_R8_UINT; |
| case 2: return VK_FORMAT_R16_UINT; |
| case 4: return VK_FORMAT_R32_UINT; |
| case 8: return VK_FORMAT_R32G32_UINT; |
| case 16:return VK_FORMAT_R32G32B32A32_UINT; |
| default: |
| unreachable("unhandled format size"); |
| } |
| } |
| |
| switch (format) { |
| case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: |
| if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT) |
| return VK_FORMAT_R8G8_UNORM; |
| /* fallthrough */ |
| case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: |
| return VK_FORMAT_R8_UNORM; |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT && copy_buffer) |
| return VK_FORMAT_R8_UNORM; |
| /* fallthrough */ |
| default: |
| return format; |
| case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
| return VK_FORMAT_R32_UINT; |
| case VK_FORMAT_D32_SFLOAT_S8_UINT: |
| if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) |
| return VK_FORMAT_S8_UINT; |
| assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT); |
| return VK_FORMAT_D32_SFLOAT; |
| } |
| } |
| |
| void |
| tu6_clear_lrz(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| struct tu_image *image, |
| const VkClearValue *value) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| |
| ops->setup(cmd, cs, VK_FORMAT_D16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, ROTATE_0, true, false); |
| ops->clear_value(cs, VK_FORMAT_D16_UNORM, value); |
| ops->dst_buffer(cs, VK_FORMAT_D16_UNORM, |
| image->bo->iova + image->bo_offset + image->lrz_offset, |
| image->lrz_pitch * 2); |
| ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {image->lrz_pitch, image->lrz_height}); |
| ops->run(cmd, cs); |
| ops->teardown(cmd, cs); |
| } |
| |
| static void |
| tu_image_view_copy_blit(struct tu_image_view *iview, |
| struct tu_image *image, |
| VkFormat format, |
| const VkImageSubresourceLayers *subres, |
| uint32_t layer, |
| bool stencil_read) |
| { |
| VkImageAspectFlags aspect_mask = subres->aspectMask; |
| |
| /* always use the AS_R8G8B8A8 format for these */ |
| if (format == VK_FORMAT_D24_UNORM_S8_UINT || |
| format == VK_FORMAT_X8_D24_UNORM_PACK32) { |
| aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; |
| } |
| |
| tu_image_view_init(iview, &(VkImageViewCreateInfo) { |
| .image = tu_image_to_handle(image), |
| .viewType = VK_IMAGE_VIEW_TYPE_2D, |
| .format = format, |
| /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */ |
| .components.r = stencil_read ? VK_COMPONENT_SWIZZLE_A : VK_COMPONENT_SWIZZLE_R, |
| .subresourceRange = { |
| .aspectMask = aspect_mask, |
| .baseMipLevel = subres->mipLevel, |
| .levelCount = 1, |
| .baseArrayLayer = subres->baseArrayLayer + layer, |
| .layerCount = 1, |
| }, |
| }, false); |
| } |
| |
| static void |
| tu_image_view_copy(struct tu_image_view *iview, |
| struct tu_image *image, |
| VkFormat format, |
| const VkImageSubresourceLayers *subres, |
| uint32_t layer, |
| bool stencil_read) |
| { |
| format = copy_format(format, subres->aspectMask, false); |
| tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read); |
| } |
| |
| static void |
| tu_image_view_blit(struct tu_image_view *iview, |
| struct tu_image *image, |
| const VkImageSubresourceLayers *subres, |
| uint32_t layer) |
| { |
| tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false); |
| } |
| |
| static void |
| tu6_blit_image(struct tu_cmd_buffer *cmd, |
| struct tu_image *src_image, |
| struct tu_image *dst_image, |
| const VkImageBlit *info, |
| VkFilter filter) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| uint32_t layers; |
| |
| /* 2D blit can't do rotation mirroring from just coordinates */ |
| static const enum a6xx_rotation rotate[2][2] = { |
| {ROTATE_0, ROTATE_HFLIP}, |
| {ROTATE_VFLIP, ROTATE_180}, |
| }; |
| |
| bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) != |
| (info->dstOffsets[1].x < info->dstOffsets[0].x); |
| bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) != |
| (info->dstOffsets[1].y < info->dstOffsets[0].y); |
| bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) != |
| (info->dstOffsets[1].z < info->dstOffsets[0].z); |
| |
| if (mirror_z) { |
| tu_finishme("blit z mirror\n"); |
| return; |
| } |
| |
| if (info->srcOffsets[1].z - info->srcOffsets[0].z != |
| info->dstOffsets[1].z - info->dstOffsets[0].z) { |
| tu_finishme("blit z filter\n"); |
| return; |
| } |
| |
| layers = info->srcOffsets[1].z - info->srcOffsets[0].z; |
| if (info->dstSubresource.layerCount > 1) { |
| assert(layers <= 1); |
| layers = info->dstSubresource.layerCount; |
| } |
| |
| /* BC1_RGB_* formats need to have their last components overriden with 1 |
| * when sampling, which is normally handled with the texture descriptor |
| * swizzle. The 2d path can't handle that, so use the 3d path. |
| * |
| * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with |
| * the 2d path. |
| */ |
| |
| if (dst_image->samples > 1 || |
| src_image->vk_format == VK_FORMAT_BC1_RGB_UNORM_BLOCK || |
| src_image->vk_format == VK_FORMAT_BC1_RGB_SRGB_BLOCK || |
| filter == VK_FILTER_CUBIC_EXT) |
| ops = &r3d_ops; |
| |
| /* use the right format in setup() for D32_S8 |
| * TODO: this probably should use a helper |
| */ |
| VkFormat format = dst_image->vk_format; |
| if (format == VK_FORMAT_D32_SFLOAT_S8_UINT) { |
| if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) |
| format = VK_FORMAT_D32_SFLOAT; |
| else if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) |
| format = VK_FORMAT_S8_UINT; |
| else |
| unreachable("unexpected D32_S8 aspect mask in blit_image"); |
| } |
| |
| ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, |
| rotate[mirror_y][mirror_x], false, dst_image->layout[0].ubwc); |
| |
| if (ops == &r3d_ops) { |
| r3d_coords_raw(cs, (float[]) { |
| info->dstOffsets[0].x, info->dstOffsets[0].y, |
| info->srcOffsets[0].x, info->srcOffsets[0].y, |
| info->dstOffsets[1].x, info->dstOffsets[1].y, |
| info->srcOffsets[1].x, info->srcOffsets[1].y |
| }); |
| } else { |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_DST_TL(.x = MIN2(info->dstOffsets[0].x, info->dstOffsets[1].x), |
| .y = MIN2(info->dstOffsets[0].y, info->dstOffsets[1].y)), |
| A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1, |
| .y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1)); |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_SRC_TL_X(MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)), |
| A6XX_GRAS_2D_SRC_BR_X(MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1), |
| A6XX_GRAS_2D_SRC_TL_Y(MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)), |
| A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1)); |
| } |
| |
| struct tu_image_view dst, src; |
| tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffsets[0].z); |
| tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->dst(cs, &dst, i); |
| ops->src(cmd, cs, &src, i, filter); |
| ops->run(cmd, cs); |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_CmdBlitImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageBlit *pRegions, |
| VkFilter filter) |
| |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_image, dst_image, dstImage); |
| |
| for (uint32_t i = 0; i < regionCount; ++i) { |
| /* can't blit both depth and stencil at once with D32_S8 |
| * TODO: more advanced 3D blit path to support it instead? |
| */ |
| if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT || |
| dst_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { |
| VkImageBlit region = pRegions[i]; |
| uint32_t b; |
| for_each_bit(b, pRegions[i].dstSubresource.aspectMask) { |
| region.srcSubresource.aspectMask = BIT(b); |
| region.dstSubresource.aspectMask = BIT(b); |
| tu6_blit_image(cmd, src_image, dst_image, ®ion, filter); |
| } |
| continue; |
| } |
| tu6_blit_image(cmd, src_image, dst_image, pRegions + i, filter); |
| } |
| } |
| |
| static void |
| copy_compressed(VkFormat format, |
| VkOffset3D *offset, |
| VkExtent3D *extent, |
| uint32_t *width, |
| uint32_t *height) |
| { |
| if (!vk_format_is_compressed(format)) |
| return; |
| |
| uint32_t block_width = vk_format_get_blockwidth(format); |
| uint32_t block_height = vk_format_get_blockheight(format); |
| |
| offset->x /= block_width; |
| offset->y /= block_height; |
| |
| if (extent) { |
| extent->width = DIV_ROUND_UP(extent->width, block_width); |
| extent->height = DIV_ROUND_UP(extent->height, block_height); |
| } |
| if (width) |
| *width = DIV_ROUND_UP(*width, block_width); |
| if (height) |
| *height = DIV_ROUND_UP(*height, block_height); |
| } |
| |
| static void |
| tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, |
| struct tu_buffer *src_buffer, |
| struct tu_image *dst_image, |
| const VkBufferImageCopy *info) |
| { |
| struct tu_cs *cs = &cmd->cs; |
| uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); |
| VkFormat src_format = |
| copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, true); |
| const struct blit_ops *ops = &r2d_ops; |
| |
| /* special case for buffer to stencil */ |
| if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && |
| info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { |
| ops = &r3d_ops; |
| } |
| |
| /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format, |
| * which matters for UBWC. buffer_to_image/etc can fail because of this |
| */ |
| |
| VkOffset3D offset = info->imageOffset; |
| VkExtent3D extent = info->imageExtent; |
| uint32_t src_width = info->bufferRowLength ?: extent.width; |
| uint32_t src_height = info->bufferImageHeight ?: extent.height; |
| |
| copy_compressed(dst_image->vk_format, &offset, &extent, &src_width, &src_height); |
| |
| uint32_t pitch = src_width * vk_format_get_blocksize(src_format); |
| uint32_t layer_size = src_height * pitch; |
| |
| ops->setup(cmd, cs, |
| copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false), |
| info->imageSubresource.aspectMask, ROTATE_0, false, dst_image->layout[0].ubwc); |
| |
| struct tu_image_view dst; |
| tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->dst(cs, &dst, i); |
| |
| uint64_t src_va = tu_buffer_iova(src_buffer) + info->bufferOffset + layer_size * i; |
| if ((src_va & 63) || (pitch & 63)) { |
| for (uint32_t y = 0; y < extent.height; y++) { |
| uint32_t x = (src_va & 63) / vk_format_get_blocksize(src_format); |
| ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch, |
| x + extent.width, 1); |
| ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x}, |
| &(VkExtent2D) {extent.width, 1}); |
| ops->run(cmd, cs); |
| src_va += pitch; |
| } |
| } else { |
| ops->src_buffer(cmd, cs, src_format, src_va, pitch, extent.width, extent.height); |
| coords(ops, cs, &offset, &(VkOffset3D){}, &extent); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, |
| VkBuffer srcBuffer, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkBufferImageCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, dst_image, dstImage); |
| TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); |
| |
| for (unsigned i = 0; i < regionCount; ++i) |
| tu_copy_buffer_to_image(cmd, src_buffer, dst_image, pRegions + i); |
| } |
| |
| static void |
| tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, |
| struct tu_image *src_image, |
| struct tu_buffer *dst_buffer, |
| const VkBufferImageCopy *info) |
| { |
| struct tu_cs *cs = &cmd->cs; |
| uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); |
| VkFormat dst_format = |
| copy_format(src_image->vk_format, info->imageSubresource.aspectMask, true); |
| bool stencil_read = false; |
| |
| if (src_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && |
| info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { |
| stencil_read = true; |
| } |
| |
| const struct blit_ops *ops = stencil_read ? &r3d_ops : &r2d_ops; |
| VkOffset3D offset = info->imageOffset; |
| VkExtent3D extent = info->imageExtent; |
| uint32_t dst_width = info->bufferRowLength ?: extent.width; |
| uint32_t dst_height = info->bufferImageHeight ?: extent.height; |
| |
| copy_compressed(src_image->vk_format, &offset, &extent, &dst_width, &dst_height); |
| |
| uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format); |
| uint32_t layer_size = pitch * dst_height; |
| |
| ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); |
| |
| struct tu_image_view src; |
| tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| |
| uint64_t dst_va = tu_buffer_iova(dst_buffer) + info->bufferOffset + layer_size * i; |
| if ((dst_va & 63) || (pitch & 63)) { |
| for (uint32_t y = 0; y < extent.height; y++) { |
| uint32_t x = (dst_va & 63) / vk_format_get_blocksize(dst_format); |
| ops->dst_buffer(cs, dst_format, dst_va & ~63, 0); |
| ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y}, |
| &(VkExtent2D) {extent.width, 1}); |
| ops->run(cmd, cs); |
| dst_va += pitch; |
| } |
| } else { |
| ops->dst_buffer(cs, dst_format, dst_va, pitch); |
| coords(ops, cs, &(VkOffset3D) {0, 0}, &offset, &extent); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkBuffer dstBuffer, |
| uint32_t regionCount, |
| const VkBufferImageCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer); |
| |
| for (unsigned i = 0; i < regionCount; ++i) |
| tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i); |
| } |
| |
| /* Tiled formats don't support swapping, which means that we can't support |
| * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some |
| * formats like B5G5R5A1 have a separate linear-only format when sampling. |
| * Currently we fake support for tiled swapped formats and use the unswapped |
| * format instead, but this means that reinterpreting copies to and from |
| * swapped formats can't be performed correctly unless we can swizzle the |
| * components by reinterpreting the other image as the "correct" swapped |
| * format, i.e. only when the other image is linear. |
| */ |
| |
| static bool |
| is_swapped_format(VkFormat format) |
| { |
| struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR); |
| struct tu_native_format tiled = tu6_format_texture(format, TILE6_3); |
| return linear.fmt != tiled.fmt || linear.swap != tiled.swap; |
| } |
| |
| /* R8G8_* formats have a different tiling layout than other cpp=2 formats, and |
| * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice |
| * versa). This should mirror the logic in fdl6_layout. |
| */ |
| static bool |
| image_is_r8g8(struct tu_image *image) |
| { |
| return image->layout[0].cpp == 2 && |
| vk_format_get_nr_components(image->vk_format) == 2; |
| } |
| |
| static void |
| tu_copy_image_to_image(struct tu_cmd_buffer *cmd, |
| struct tu_image *src_image, |
| struct tu_image *dst_image, |
| const VkImageCopy *info) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| |
| if (dst_image->samples > 1) |
| ops = &r3d_ops; |
| |
| VkFormat format = VK_FORMAT_UNDEFINED; |
| VkOffset3D src_offset = info->srcOffset; |
| VkOffset3D dst_offset = info->dstOffset; |
| VkExtent3D extent = info->extent; |
| |
| /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between |
| * Images": |
| * |
| * When copying between compressed and uncompressed formats the extent |
| * members represent the texel dimensions of the source image and not |
| * the destination. When copying from a compressed image to an |
| * uncompressed image the image texel dimensions written to the |
| * uncompressed image will be source extent divided by the compressed |
| * texel block dimensions. When copying from an uncompressed image to a |
| * compressed image the image texel dimensions written to the compressed |
| * image will be the source extent multiplied by the compressed texel |
| * block dimensions. |
| * |
| * This means we only have to adjust the extent if the source image is |
| * compressed. |
| */ |
| copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL); |
| copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL); |
| |
| VkFormat dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false); |
| VkFormat src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false); |
| |
| bool use_staging_blit = false; |
| |
| if (src_format == dst_format) { |
| /* Images that share a format can always be copied directly because it's |
| * the same as a blit. |
| */ |
| format = src_format; |
| } else if (!src_image->layout[0].tile_mode) { |
| /* If an image is linear, we can always safely reinterpret it with the |
| * other image's format and then do a regular blit. |
| */ |
| format = dst_format; |
| } else if (!dst_image->layout[0].tile_mode) { |
| format = src_format; |
| } else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) { |
| /* We can't currently copy r8g8 images to/from other cpp=2 images, |
| * due to the different tile layout. |
| */ |
| use_staging_blit = true; |
| } else if (is_swapped_format(src_format) || |
| is_swapped_format(dst_format)) { |
| /* If either format has a non-identity swap, then we can't copy |
| * to/from it. |
| */ |
| use_staging_blit = true; |
| } else if (!src_image->layout[0].ubwc) { |
| format = dst_format; |
| } else if (!dst_image->layout[0].ubwc) { |
| format = src_format; |
| } else { |
| /* Both formats use UBWC and so neither can be reinterpreted. |
| * TODO: We could do an in-place decompression of the dst instead. |
| */ |
| use_staging_blit = true; |
| } |
| |
| struct tu_image_view dst, src; |
| |
| if (use_staging_blit) { |
| tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false); |
| tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false); |
| |
| struct tu_image staging_image = { |
| .vk_format = src_format, |
| .type = src_image->type, |
| .tiling = VK_IMAGE_TILING_LINEAR, |
| .extent = extent, |
| .level_count = 1, |
| .layer_count = info->srcSubresource.layerCount, |
| .samples = src_image->samples, |
| .bo_offset = 0, |
| }; |
| |
| VkImageSubresourceLayers staging_subresource = { |
| .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| .mipLevel = 0, |
| .baseArrayLayer = 0, |
| .layerCount = info->srcSubresource.layerCount, |
| }; |
| |
| VkOffset3D staging_offset = { 0 }; |
| |
| staging_image.layout[0].tile_mode = TILE6_LINEAR; |
| staging_image.layout[0].ubwc = false; |
| |
| fdl6_layout(&staging_image.layout[0], |
| vk_format_to_pipe_format(staging_image.vk_format), |
| staging_image.samples, |
| staging_image.extent.width, |
| staging_image.extent.height, |
| staging_image.extent.depth, |
| staging_image.level_count, |
| staging_image.layer_count, |
| staging_image.type == VK_IMAGE_TYPE_3D, |
| NULL); |
| |
| VkResult result = tu_get_scratch_bo(cmd->device, |
| staging_image.layout[0].size, |
| &staging_image.bo); |
| if (result != VK_SUCCESS) { |
| cmd->record_result = result; |
| return; |
| } |
| |
| struct tu_image_view staging; |
| tu_image_view_copy(&staging, &staging_image, src_format, |
| &staging_subresource, 0, false); |
| |
| ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); |
| coords(ops, cs, &staging_offset, &src_offset, &extent); |
| |
| for (uint32_t i = 0; i < info->extent.depth; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &staging, i); |
| ops->run(cmd, cs); |
| } |
| |
| /* When executed by the user there has to be a pipeline barrier here, |
| * but since we're doing it manually we'll have to flush ourselves. |
| */ |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); |
| |
| tu_image_view_copy(&staging, &staging_image, dst_format, |
| &staging_subresource, 0, false); |
| |
| ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask, |
| ROTATE_0, false, dst_image->layout[0].ubwc); |
| coords(ops, cs, &dst_offset, &staging_offset, &extent); |
| |
| for (uint32_t i = 0; i < info->extent.depth; i++) { |
| ops->src(cmd, cs, &staging, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } else { |
| tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false); |
| tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z, false); |
| |
| ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, |
| ROTATE_0, false, dst_image->layout[0].ubwc); |
| coords(ops, cs, &dst_offset, &src_offset, &extent); |
| |
| for (uint32_t i = 0; i < info->extent.depth; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_CmdCopyImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage destImage, |
| VkImageLayout destImageLayout, |
| uint32_t regionCount, |
| const VkImageCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_image, dst_image, destImage); |
| |
| for (uint32_t i = 0; i < regionCount; ++i) |
| tu_copy_image_to_image(cmd, src_image, dst_image, pRegions + i); |
| } |
| |
| static void |
| copy_buffer(struct tu_cmd_buffer *cmd, |
| uint64_t dst_va, |
| uint64_t src_va, |
| uint64_t size, |
| uint32_t block_size) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM; |
| uint64_t blocks = size / block_size; |
| |
| ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); |
| |
| while (blocks) { |
| uint32_t src_x = (src_va & 63) / block_size; |
| uint32_t dst_x = (dst_va & 63) / block_size; |
| uint32_t width = MIN2(MIN2(blocks, 0x4000 - src_x), 0x4000 - dst_x); |
| |
| ops->src_buffer(cmd, cs, format, src_va & ~63, 0, src_x + width, 1); |
| ops->dst_buffer( cs, format, dst_va & ~63, 0); |
| ops->coords(cs, &(VkOffset2D) {dst_x}, &(VkOffset2D) {src_x}, &(VkExtent2D) {width, 1}); |
| ops->run(cmd, cs); |
| |
| src_va += width * block_size; |
| dst_va += width * block_size; |
| blocks -= width; |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_CmdCopyBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer srcBuffer, |
| VkBuffer dstBuffer, |
| uint32_t regionCount, |
| const VkBufferCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); |
| TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer); |
| |
| for (unsigned i = 0; i < regionCount; ++i) { |
| copy_buffer(cmd, |
| tu_buffer_iova(dst_buffer) + pRegions[i].dstOffset, |
| tu_buffer_iova(src_buffer) + pRegions[i].srcOffset, |
| pRegions[i].size, 1); |
| } |
| } |
| |
| void |
| tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize dataSize, |
| const void *pData) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); |
| |
| struct tu_cs_memory tmp; |
| VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64, &tmp); |
| if (result != VK_SUCCESS) { |
| cmd->record_result = result; |
| return; |
| } |
| |
| memcpy(tmp.map, pData, dataSize); |
| copy_buffer(cmd, tu_buffer_iova(buffer) + dstOffset, tmp.iova, dataSize, 4); |
| } |
| |
| void |
| tu_CmdFillBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize fillSize, |
| uint32_t data) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| |
| if (fillSize == VK_WHOLE_SIZE) |
| fillSize = buffer->size - dstOffset; |
| |
| uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset; |
| uint32_t blocks = fillSize / 4; |
| |
| ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true, false); |
| ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); |
| |
| while (blocks) { |
| uint32_t dst_x = (dst_va & 63) / 4; |
| uint32_t width = MIN2(blocks, 0x4000 - dst_x); |
| |
| ops->dst_buffer(cs, VK_FORMAT_R32_UINT, dst_va & ~63, 0); |
| ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1}); |
| ops->run(cmd, cs); |
| |
| dst_va += width * 4; |
| blocks -= width; |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_CmdResolveImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageResolve *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_image, dst_image, dstImage); |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| |
| ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, |
| ROTATE_0, false, dst_image->layout[0].ubwc); |
| |
| for (uint32_t i = 0; i < regionCount; ++i) { |
| const VkImageResolve *info = &pRegions[i]; |
| uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount); |
| |
| assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount); |
| /* TODO: aspect masks possible ? */ |
| |
| coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent); |
| |
| struct tu_image_view dst, src; |
| tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z); |
| tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| #define for_each_layer(layer, layer_mask, layers) \ |
| for (uint32_t layer = 0; \ |
| layer < ((layer_mask) ? (util_logbase2(layer_mask) + 1) : layers); \ |
| layer++) \ |
| if (!layer_mask || (layer_mask & BIT(layer))) |
| |
| void |
| tu_resolve_sysmem(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| struct tu_image_view *src, |
| struct tu_image_view *dst, |
| uint32_t layer_mask, |
| uint32_t layers, |
| const VkRect2D *rect) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| |
| assert(src->image->vk_format == dst->image->vk_format); |
| |
| ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, |
| ROTATE_0, false, dst->ubwc_enabled); |
| ops->coords(cs, &rect->offset, &rect->offset, &rect->extent); |
| |
| for_each_layer(i, layer_mask, layers) { |
| ops->src(cmd, cs, src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, dst, i); |
| ops->run(cmd, cs); |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| static void |
| clear_image(struct tu_cmd_buffer *cmd, |
| struct tu_image *image, |
| const VkClearValue *clear_value, |
| const VkImageSubresourceRange *range, |
| VkImageAspectFlags aspect_mask) |
| { |
| uint32_t level_count = tu_get_levelCount(image, range); |
| uint32_t layer_count = tu_get_layerCount(image, range); |
| struct tu_cs *cs = &cmd->cs; |
| VkFormat format = image->vk_format; |
| if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) |
| format = copy_format(format, aspect_mask, false); |
| |
| if (image->type == VK_IMAGE_TYPE_3D) { |
| assert(layer_count == 1); |
| assert(range->baseArrayLayer == 0); |
| } |
| |
| const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops; |
| |
| ops->setup(cmd, cs, format, aspect_mask, ROTATE_0, true, image->layout[0].ubwc); |
| if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) |
| ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value); |
| else |
| ops->clear_value(cs, format, clear_value); |
| |
| for (unsigned j = 0; j < level_count; j++) { |
| if (image->type == VK_IMAGE_TYPE_3D) |
| layer_count = u_minify(image->extent.depth, range->baseMipLevel + j); |
| |
| ops->coords(cs, &(VkOffset2D){}, NULL, &(VkExtent2D) { |
| u_minify(image->extent.width, range->baseMipLevel + j), |
| u_minify(image->extent.height, range->baseMipLevel + j) |
| }); |
| |
| struct tu_image_view dst; |
| tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) { |
| .aspectMask = aspect_mask, |
| .mipLevel = range->baseMipLevel + j, |
| .baseArrayLayer = range->baseArrayLayer, |
| .layerCount = 1, |
| }, 0, false); |
| |
| for (uint32_t i = 0; i < layer_count; i++) { |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_CmdClearColorImage(VkCommandBuffer commandBuffer, |
| VkImage image_h, |
| VkImageLayout imageLayout, |
| const VkClearColorValue *pColor, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, image, image_h); |
| |
| for (unsigned i = 0; i < rangeCount; i++) |
| clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT); |
| } |
| |
| void |
| tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, |
| VkImage image_h, |
| VkImageLayout imageLayout, |
| const VkClearDepthStencilValue *pDepthStencil, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, image, image_h); |
| |
| for (unsigned i = 0; i < rangeCount; i++) { |
| const VkImageSubresourceRange *range = &pRanges[i]; |
| |
| if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { |
| /* can't clear both depth and stencil at once, split up the aspect mask */ |
| uint32_t b; |
| for_each_bit(b, range->aspectMask) |
| clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b)); |
| continue; |
| } |
| |
| clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask); |
| } |
| } |
| |
| static void |
| tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, |
| uint32_t attachment_count, |
| const VkClearAttachment *attachments, |
| uint32_t rect_count, |
| const VkClearRect *rects) |
| { |
| /* the shader path here is special, it avoids changing MRT/etc state */ |
| const struct tu_render_pass *pass = cmd->state.pass; |
| const struct tu_subpass *subpass = cmd->state.subpass; |
| const uint32_t mrt_count = subpass->color_count; |
| struct tu_cs *cs = &cmd->draw_cs; |
| uint32_t clear_value[MAX_RTS][4]; |
| float z_clear_val = 0.0f; |
| uint8_t s_clear_val = 0; |
| uint32_t clear_rts = 0, clear_components = 0, num_rts = 0, b; |
| bool z_clear = false; |
| bool s_clear = false; |
| bool layered_clear = false; |
| uint32_t max_samples = 1; |
| |
| for (uint32_t i = 0; i < attachment_count; i++) { |
| uint32_t a; |
| if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { |
| uint32_t c = attachments[i].colorAttachment; |
| a = subpass->color_attachments[c].attachment; |
| if (a == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| clear_rts |= 1 << c; |
| clear_components |= 0xf << (c * 4); |
| memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t)); |
| } else { |
| a = subpass->depth_stencil_attachment.attachment; |
| if (a == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| z_clear = true; |
| z_clear_val = attachments[i].clearValue.depthStencil.depth; |
| } |
| |
| if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| s_clear = true; |
| s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff; |
| } |
| } |
| |
| max_samples = MAX2(max_samples, pass->attachments[a].samples); |
| } |
| |
| /* disable all draw states so they don't interfere |
| * TODO: use and re-use draw states |
| * we have to disable draw states individually to preserve |
| * input attachment states, because a secondary command buffer |
| * won't be able to restore them |
| */ |
| tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2)); |
| for (uint32_t i = 0; i < TU_DRAW_STATE_COUNT; i++) { |
| if (i == TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM || |
| i == TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM) |
| continue; |
| tu_cs_emit(cs, CP_SET_DRAW_STATE__0_GROUP_ID(i) | |
| CP_SET_DRAW_STATE__0_DISABLE); |
| tu_cs_emit_qw(cs, 0); |
| } |
| cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | |
| A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | |
| 0xfc000000); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count); |
| for (uint32_t i = 0; i < mrt_count; i++) { |
| if (clear_rts & (1 << i)) |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts++ * 4)); |
| else |
| tu_cs_emit(cs, 0); |
| } |
| |
| for (uint32_t i = 0; i < rect_count; i++) { |
| if (rects[i].baseArrayLayer || rects[i].layerCount > 1) |
| layered_clear = true; |
| } |
| |
| /* a630 doesn't support multiview masks, which means that we can't use the |
| * normal multiview path without potentially recompiling a shader on-demand |
| * or using a more complicated variant that takes the mask as a const. Just |
| * use the layered path instead, since it shouldn't be much worse. |
| */ |
| if (subpass->multiview_mask) { |
| layered_clear = true; |
| } |
| |
| r3d_common(cmd, cs, false, num_rts, layered_clear); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_SP_FS_RENDER_COMPONENTS(.dword = clear_components)); |
| tu_cs_emit_regs(cs, |
| A6XX_RB_RENDER_COMPONENTS(.dword = clear_components)); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_FS_OUTPUT_CNTL0(), |
| A6XX_RB_FS_OUTPUT_CNTL1(.mrt = mrt_count)); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.independent_blend = 1, .sample_mask = 0xffff)); |
| for (uint32_t i = 0; i < mrt_count; i++) { |
| tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(i, |
| .component_enable = COND(clear_rts & (1 << i), 0xf))); |
| } |
| |
| if (z_clear) { |
| tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); |
| tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); |
| } |
| |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL( |
| .z_enable = z_clear, |
| .z_write_enable = z_clear, |
| .zfunc = FUNC_ALWAYS)); |
| tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL( |
| .stencil_enable = s_clear, |
| .func = FUNC_ALWAYS, |
| .zpass = STENCIL_REPLACE)); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff)); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff)); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val)); |
| |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_rts); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | |
| CP_LOAD_STATE6_0_NUM_UNIT(num_rts)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); |
| for_each_bit(b, clear_rts) |
| tu_cs_emit_array(cs, clear_value[b], 4); |
| |
| for (uint32_t i = 0; i < rect_count; i++) { |
| /* This should be true because of this valid usage for |
| * vkCmdClearAttachments: |
| * |
| * "If the render pass instance this is recorded in uses multiview, |
| * then baseArrayLayer must be zero and layerCount must be one" |
| */ |
| assert(!subpass->multiview_mask || rects[i].baseArrayLayer == 0); |
| |
| for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount) { |
| r3d_coords_raw(cs, (float[]) { |
| rects[i].rect.offset.x, rects[i].rect.offset.y, |
| z_clear_val, uif(rects[i].baseArrayLayer + layer), |
| rects[i].rect.offset.x + rects[i].rect.extent.width, |
| rects[i].rect.offset.y + rects[i].rect.extent.height, |
| z_clear_val, 1.0f, |
| }); |
| r3d_run(cmd, cs); |
| } |
| } |
| } |
| |
| static void |
| pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_value[4]) |
| { |
| switch (format) { |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) | |
| val->depthStencil.stencil << 24; |
| return; |
| case VK_FORMAT_D16_UNORM: |
| clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16); |
| return; |
| case VK_FORMAT_D32_SFLOAT: |
| clear_value[0] = fui(val->depthStencil.depth); |
| return; |
| case VK_FORMAT_S8_UINT: |
| clear_value[0] = val->depthStencil.stencil; |
| return; |
| default: |
| break; |
| } |
| |
| float tmp[4]; |
| memcpy(tmp, val->color.float32, 4 * sizeof(float)); |
| if (vk_format_is_srgb(format)) { |
| for (int i = 0; i < 4; i++) |
| tmp[i] = util_format_linear_to_srgb_float(tmp[i]); |
| } |
| |
| #define PACK_F(type) util_format_##type##_pack_rgba_float \ |
| ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1) |
| switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { |
| case 4: |
| PACK_F(r4g4b4a4_unorm); |
| break; |
| case 5: |
| if (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6) |
| PACK_F(r5g6b5_unorm); |
| else |
| PACK_F(r5g5b5a1_unorm); |
| break; |
| case 8: |
| if (vk_format_is_snorm(format)) |
| PACK_F(r8g8b8a8_snorm); |
| else if (vk_format_is_unorm(format)) |
| PACK_F(r8g8b8a8_unorm); |
| else |
| pack_int8(clear_value, val->color.uint32); |
| break; |
| case 10: |
| if (vk_format_is_int(format)) |
| pack_int10_2(clear_value, val->color.uint32); |
| else |
| PACK_F(r10g10b10a2_unorm); |
| break; |
| case 11: |
| clear_value[0] = float3_to_r11g11b10f(val->color.float32); |
| break; |
| case 16: |
| if (vk_format_is_snorm(format)) |
| PACK_F(r16g16b16a16_snorm); |
| else if (vk_format_is_unorm(format)) |
| PACK_F(r16g16b16a16_unorm); |
| else if (vk_format_is_float(format)) |
| PACK_F(r16g16b16a16_float); |
| else |
| pack_int16(clear_value, val->color.uint32); |
| break; |
| case 32: |
| memcpy(clear_value, val->color.float32, 4 * sizeof(float)); |
| break; |
| default: |
| unreachable("unexpected channel size"); |
| } |
| #undef PACK_F |
| } |
| |
| static void |
| clear_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat format, |
| uint8_t clear_mask, |
| uint32_t gmem_offset, |
| const VkClearValue *value) |
| { |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); |
| tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format))); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask)); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); |
| tu_cs_emit(cs, gmem_offset); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); |
| tu_cs_emit(cs, 0); |
| |
| uint32_t clear_vals[4] = {}; |
| pack_gmem_clear_value(value, format, clear_vals); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); |
| tu_cs_emit_array(cs, clear_vals, 4); |
| |
| tu6_emit_event_write(cmd, cs, BLIT); |
| } |
| |
| static void |
| tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t attachment, |
| VkImageAspectFlags mask, |
| const VkClearValue *value) |
| { |
| const struct tu_render_pass_attachment *att = |
| &cmd->state.pass->attachments[attachment]; |
| |
| if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { |
| if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) |
| clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value); |
| if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) |
| clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value); |
| return; |
| } |
| |
| clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value); |
| } |
| |
| static void |
| tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, |
| uint32_t attachment_count, |
| const VkClearAttachment *attachments, |
| uint32_t rect_count, |
| const VkClearRect *rects) |
| { |
| const struct tu_subpass *subpass = cmd->state.subpass; |
| struct tu_cs *cs = &cmd->draw_cs; |
| |
| /* TODO: swap the loops for smaller cmdstream */ |
| for (unsigned i = 0; i < rect_count; i++) { |
| unsigned x1 = rects[i].rect.offset.x; |
| unsigned y1 = rects[i].rect.offset.y; |
| unsigned x2 = x1 + rects[i].rect.extent.width - 1; |
| unsigned y2 = y1 + rects[i].rect.extent.height - 1; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); |
| tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); |
| tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); |
| |
| for (unsigned j = 0; j < attachment_count; j++) { |
| uint32_t a; |
| if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) |
| a = subpass->color_attachments[attachments[j].colorAttachment].attachment; |
| else |
| a = subpass->depth_stencil_attachment.attachment; |
| |
| if (a == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask, |
| &attachments[j].clearValue); |
| } |
| } |
| } |
| |
| void |
| tu_CmdClearAttachments(VkCommandBuffer commandBuffer, |
| uint32_t attachmentCount, |
| const VkClearAttachment *pAttachments, |
| uint32_t rectCount, |
| const VkClearRect *pRects) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| struct tu_cs *cs = &cmd->draw_cs; |
| |
| /* sysmem path behaves like a draw, note we don't have a way of using different |
| * flushes for sysmem/gmem, so this needs to be outside of the cond_exec |
| */ |
| tu_emit_cache_flush_renderpass(cmd, cs); |
| |
| /* vkCmdClearAttachments is supposed to respect the predicate if active. |
| * The easiest way to do this is to always use the 3d path, which always |
| * works even with GMEM because it's just a simple draw using the existing |
| * attachment state. However it seems that IGNORE_VISIBILITY draws must be |
| * skipped in the binning pass, since otherwise they produce binning data |
| * which isn't consumed and leads to the wrong binning data being read, so |
| * condition on GMEM | SYSMEM. |
| */ |
| if (cmd->state.predication_active) { |
| tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM | |
| CP_COND_EXEC_0_RENDER_MODE_SYSMEM); |
| tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); |
| tu_cond_exec_end(cs); |
| return; |
| } |
| |
| tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM); |
| tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); |
| tu_cond_exec_end(cs); |
| |
| tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM); |
| tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); |
| tu_cond_exec_end(cs); |
| |
| for (uint32_t j = 0; j < attachmentCount; j++) { |
| if ((pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) == 0) |
| continue; |
| cmd->state.lrz.valid = false; |
| cmd->state.dirty |= TU_CMD_DIRTY_LRZ; |
| } |
| } |
| |
| static void |
| clear_sysmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat format, |
| VkImageAspectFlags clear_mask, |
| const VkRenderPassBeginInfo *info, |
| uint32_t a, |
| bool separate_stencil) |
| { |
| const struct tu_framebuffer *fb = cmd->state.framebuffer; |
| const struct tu_image_view *iview = fb->attachments[a].attachment; |
| const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views; |
| const struct blit_ops *ops = &r2d_ops; |
| if (cmd->state.pass->attachments[a].samples > 1) |
| ops = &r3d_ops; |
| |
| ops->setup(cmd, cs, format, clear_mask, ROTATE_0, true, iview->ubwc_enabled); |
| ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent); |
| ops->clear_value(cs, format, &info->pClearValues[a]); |
| |
| for_each_layer(i, clear_views, fb->layers) { |
| if (separate_stencil) { |
| if (ops == &r3d_ops) |
| r3d_dst_stencil(cs, iview, i); |
| else |
| r2d_dst_stencil(cs, iview, i); |
| } else { |
| ops->dst(cs, iview, i); |
| } |
| ops->run(cmd, cs); |
| } |
| |
| ops->teardown(cmd, cs); |
| } |
| |
| void |
| tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| const VkRenderPassBeginInfo *info) |
| { |
| const struct tu_render_pass_attachment *attachment = |
| &cmd->state.pass->attachments[a]; |
| |
| if (!attachment->clear_mask) |
| return; |
| |
| /* Wait for any flushes at the beginning of the renderpass to complete */ |
| tu_cs_emit_wfi(cs); |
| |
| if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { |
| if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, |
| info, a, false); |
| } |
| if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT, |
| info, a, true); |
| } |
| } else { |
| clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask, |
| info, a, false); |
| } |
| |
| /* The spec doesn't explicitly say, but presumably the initial renderpass |
| * clear is considered part of the renderpass, and therefore barriers |
| * aren't required inside the subpass/renderpass. Therefore we need to |
| * flush CCU color into CCU depth here, just like with |
| * vkCmdClearAttachments(). Note that because this only happens at the |
| * beginning of a renderpass, and renderpass writes are considered |
| * "incoherent", we shouldn't have to worry about syncing depth into color |
| * beforehand as depth should already be flushed. |
| */ |
| if (vk_format_is_depth_or_stencil(attachment->format)) { |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH); |
| } else { |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR); |
| } |
| } |
| |
| void |
| tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| const VkRenderPassBeginInfo *info) |
| { |
| const struct tu_render_pass_attachment *attachment = |
| &cmd->state.pass->attachments[a]; |
| |
| if (!attachment->clear_mask) |
| return; |
| |
| tu_cs_emit_regs(cs, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples))); |
| |
| tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask, |
| &info->pClearValues[a]); |
| } |
| |
| static void |
| tu_emit_blit(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| const struct tu_render_pass_attachment *attachment, |
| bool resolve, |
| bool separate_stencil) |
| { |
| tu_cs_emit_regs(cs, |
| A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples))); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO( |
| .unk0 = !resolve, |
| .gmem = !resolve, |
| /* "integer" bit disables msaa resolve averaging */ |
| .integer = vk_format_is_int(attachment->format))); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4); |
| if (separate_stencil) { |
| tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS); |
| tu_cs_emit_qw(cs, iview->stencil_base_addr); |
| tu_cs_emit(cs, iview->stencil_PITCH); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil)); |
| } else { |
| tu_cs_emit(cs, iview->RB_BLIT_DST_INFO); |
| tu_cs_image_ref_2d(cs, iview, 0, false); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3); |
| tu_cs_image_flag_ref(cs, iview, 0); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset)); |
| } |
| |
| tu6_emit_event_write(cmd, cs, BLIT); |
| } |
| |
| static bool |
| blit_can_resolve(VkFormat format) |
| { |
| const struct util_format_description *desc = vk_format_description(format); |
| |
| /* blit event can only do resolve for simple cases: |
| * averaging samples as unsigned integers or choosing only one sample |
| */ |
| if (vk_format_is_snorm(format) || vk_format_is_srgb(format)) |
| return false; |
| |
| /* can't do formats with larger channel sizes |
| * note: this includes all float formats |
| * note2: single channel integer formats seem OK |
| */ |
| if (desc->channel[0].size > 10) |
| return false; |
| |
| switch (format) { |
| /* for unknown reasons blit event can't msaa resolve these formats when tiled |
| * likely related to these formats having different layout from other cpp=2 formats |
| */ |
| case VK_FORMAT_R8G8_UNORM: |
| case VK_FORMAT_R8G8_UINT: |
| case VK_FORMAT_R8G8_SINT: |
| /* TODO: this one should be able to work? */ |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| return false; |
| default: |
| break; |
| } |
| |
| return true; |
| } |
| |
| void |
| tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| bool force_load) |
| { |
| const struct tu_image_view *iview = |
| cmd->state.framebuffer->attachments[a].attachment; |
| const struct tu_render_pass_attachment *attachment = |
| &cmd->state.pass->attachments[a]; |
| |
| if (attachment->load || force_load) |
| tu_emit_blit(cmd, cs, iview, attachment, false, false); |
| |
| if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load)) |
| tu_emit_blit(cmd, cs, iview, attachment, false, true); |
| } |
| |
| static void |
| store_cp_blit(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| struct tu_image_view *iview, |
| uint32_t samples, |
| bool separate_stencil, |
| VkFormat format, |
| uint32_t gmem_offset, |
| uint32_t cpp) |
| { |
| r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, |
| iview->ubwc_enabled, true); |
| if (separate_stencil) |
| r2d_dst_stencil(cs, iview, 0); |
| else |
| r2d_dst(cs, iview, 0); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_SP_PS_2D_SRC_INFO( |
| .color_format = tu6_format_texture(format, TILE6_2).fmt, |
| .tile_mode = TILE6_2, |
| .srgb = vk_format_is_srgb(format), |
| .samples = tu_msaa_samples(samples), |
| .samples_average = !vk_format_is_int(format), |
| .unk20 = 1, |
| .unk22 = 1), |
| /* note: src size does not matter when not scaling */ |
| A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff), |
| A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + gmem_offset), |
| A6XX_SP_PS_2D_SRC_HI(), |
| A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.framebuffer->tile0.width * cpp)); |
| |
| /* sync GMEM writes with CACHE. */ |
| tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); |
| |
| /* Wait for CACHE_INVALIDATE to land */ |
| tu_cs_emit_wfi(cs); |
| |
| tu_cs_emit_pkt7(cs, CP_BLIT, 1); |
| tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); |
| |
| /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to |
| * sysmem, and we generally assume that GMEM renderpasses leave their |
| * results in sysmem, so we need to flush manually here. |
| */ |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| } |
| |
| void |
| tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| uint32_t gmem_a) |
| { |
| const VkRect2D *render_area = &cmd->state.render_area; |
| struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a]; |
| struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment; |
| struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a]; |
| |
| if (!dst->store && !dst->store_stencil) |
| return; |
| |
| uint32_t x1 = render_area->offset.x; |
| uint32_t y1 = render_area->offset.y; |
| uint32_t x2 = x1 + render_area->extent.width; |
| uint32_t y2 = y1 + render_area->extent.height; |
| /* x2/y2 can be unaligned if equal to the size of the image, |
| * since it will write into padding space |
| * the one exception is linear levels which don't have the |
| * required y padding in the layout (except for the last level) |
| */ |
| bool need_y2_align = |
| y2 != iview->extent.height || iview->need_y2_align; |
| |
| bool unaligned = |
| x1 % GMEM_ALIGN_W || (x2 % GMEM_ALIGN_W && x2 != iview->extent.width) || |
| y1 % GMEM_ALIGN_H || (y2 % GMEM_ALIGN_H && need_y2_align); |
| |
| /* use fast path when render area is aligned, except for unsupported resolve cases */ |
| if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) { |
| if (dst->store) |
| tu_emit_blit(cmd, cs, iview, src, true, false); |
| if (dst->store_stencil) |
| tu_emit_blit(cmd, cs, iview, src, true, true); |
| return; |
| } |
| |
| if (dst->samples > 1) { |
| /* I guess we need to use shader path in this case? |
| * need a testcase which fails because of this |
| */ |
| tu_finishme("unaligned store of msaa attachment\n"); |
| return; |
| } |
| |
| r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); |
| |
| VkFormat format = src->format; |
| if (format == VK_FORMAT_D32_SFLOAT_S8_UINT) |
| format = VK_FORMAT_D32_SFLOAT; |
| |
| if (dst->store) { |
| store_cp_blit(cmd, cs, iview, src->samples, false, format, |
| src->gmem_offset, src->cpp); |
| } |
| if (dst->store_stencil) { |
| store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT, |
| src->gmem_offset_stencil, src->samples); |
| } |
| } |