| /* |
| * Copyright 2019-2020 Valve Corporation |
| * SPDX-License-Identifier: MIT |
| * |
| * Authors: |
| * Jonathan Marek <jonathan@marek.ca> |
| */ |
| |
| #include "tu_private.h" |
| |
| #include "tu_cs.h" |
| #include "vk_format.h" |
| |
| #include "util/format_r11g11b10f.h" |
| #include "util/format_rgb9e5.h" |
| #include "util/format_srgb.h" |
| #include "util/u_half.h" |
| |
| static uint32_t |
| tu_pack_float32_for_unorm(float val, int bits) |
| { |
| return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1)); |
| } |
| |
| /* r2d_ = BLIT_OP_SCALE operations */ |
| |
| static enum a6xx_2d_ifmt |
| format_to_ifmt(enum a6xx_format fmt) |
| { |
| switch (fmt) { |
| case FMT6_A8_UNORM: |
| case FMT6_8_UNORM: |
| case FMT6_8_SNORM: |
| case FMT6_8_8_UNORM: |
| case FMT6_8_8_SNORM: |
| case FMT6_8_8_8_8_UNORM: |
| case FMT6_8_8_8_X8_UNORM: |
| case FMT6_8_8_8_8_SNORM: |
| case FMT6_4_4_4_4_UNORM: |
| case FMT6_5_5_5_1_UNORM: |
| case FMT6_5_6_5_UNORM: |
| case FMT6_Z24_UNORM_S8_UINT: |
| case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8: |
| return R2D_UNORM8; |
| |
| case FMT6_32_UINT: |
| case FMT6_32_SINT: |
| case FMT6_32_32_UINT: |
| case FMT6_32_32_SINT: |
| case FMT6_32_32_32_32_UINT: |
| case FMT6_32_32_32_32_SINT: |
| return R2D_INT32; |
| |
| case FMT6_16_UINT: |
| case FMT6_16_SINT: |
| case FMT6_16_16_UINT: |
| case FMT6_16_16_SINT: |
| case FMT6_16_16_16_16_UINT: |
| case FMT6_16_16_16_16_SINT: |
| case FMT6_10_10_10_2_UINT: |
| return R2D_INT16; |
| |
| case FMT6_8_UINT: |
| case FMT6_8_SINT: |
| case FMT6_8_8_UINT: |
| case FMT6_8_8_SINT: |
| case FMT6_8_8_8_8_UINT: |
| case FMT6_8_8_8_8_SINT: |
| return R2D_INT8; |
| |
| case FMT6_16_UNORM: |
| case FMT6_16_SNORM: |
| case FMT6_16_16_UNORM: |
| case FMT6_16_16_SNORM: |
| case FMT6_16_16_16_16_UNORM: |
| case FMT6_16_16_16_16_SNORM: |
| case FMT6_32_FLOAT: |
| case FMT6_32_32_FLOAT: |
| case FMT6_32_32_32_32_FLOAT: |
| return R2D_FLOAT32; |
| |
| case FMT6_16_FLOAT: |
| case FMT6_16_16_FLOAT: |
| case FMT6_16_16_16_16_FLOAT: |
| case FMT6_11_11_10_FLOAT: |
| case FMT6_10_10_10_2_UNORM: |
| case FMT6_10_10_10_2_UNORM_DEST: |
| return R2D_FLOAT16; |
| |
| default: |
| unreachable("bad format"); |
| return 0; |
| } |
| } |
| |
| static void |
| r2d_coords(struct tu_cs *cs, |
| const VkOffset2D *dst, |
| const VkOffset2D *src, |
| const VkExtent2D *extent) |
| { |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_DST_TL(.x = dst->x, .y = dst->y), |
| A6XX_GRAS_2D_DST_BR(.x = dst->x + extent->width - 1, .y = dst->y + extent->height - 1)); |
| |
| if (!src) |
| return; |
| |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_SRC_TL_X(.x = src->x), |
| A6XX_GRAS_2D_SRC_BR_X(.x = src->x + extent->width - 1), |
| A6XX_GRAS_2D_SRC_TL_Y(.y = src->y), |
| A6XX_GRAS_2D_SRC_BR_Y(.y = src->y + extent->height - 1)); |
| } |
| |
| static void |
| r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) |
| { |
| uint32_t clear_value[4] = {}; |
| |
| switch (format) { |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| /* cleared as r8g8b8a8_unorm using special format */ |
| clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); |
| clear_value[1] = clear_value[0] >> 8; |
| clear_value[2] = clear_value[0] >> 16; |
| clear_value[3] = val->depthStencil.stencil; |
| break; |
| case VK_FORMAT_D16_UNORM: |
| case VK_FORMAT_D32_SFLOAT: |
| /* R2D_FLOAT32 */ |
| clear_value[0] = fui(val->depthStencil.depth); |
| break; |
| case VK_FORMAT_S8_UINT: |
| clear_value[0] = val->depthStencil.stencil; |
| break; |
| case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
| /* cleared as UINT32 */ |
| clear_value[0] = float3_to_rgb9e5(val->color.float32); |
| break; |
| default: |
| assert(!vk_format_is_depth_or_stencil(format)); |
| const struct util_format_description *desc = vk_format_description(format); |
| enum a6xx_2d_ifmt ifmt = format_to_ifmt(tu6_base_format(format)); |
| |
| assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || |
| format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)); |
| |
| for (unsigned i = 0; i < desc->nr_channels; i++) { |
| const struct util_format_channel_description *ch = &desc->channel[i]; |
| if (ifmt == R2D_UNORM8) { |
| float linear = val->color.float32[i]; |
| if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3) |
| linear = util_format_linear_to_srgb_float(val->color.float32[i]); |
| |
| if (ch->type == UTIL_FORMAT_TYPE_SIGNED) |
| clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f); |
| else |
| clear_value[i] = tu_pack_float32_for_unorm(linear, 8); |
| } else if (ifmt == R2D_FLOAT16) { |
| clear_value[i] = util_float_to_half(val->color.float32[i]); |
| } else { |
| assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 || |
| ifmt == R2D_INT16 || ifmt == R2D_INT8); |
| clear_value[i] = val->color.uint32[i]; |
| } |
| } |
| break; |
| } |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); |
| tu_cs_emit_array(cs, clear_value, 4); |
| } |
| |
| static void |
| r2d_src(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| uint32_t layer, |
| VkFilter filter) |
| { |
| uint32_t src_info = iview->SP_PS_2D_SRC_INFO; |
| if (filter != VK_FILTER_NEAREST) |
| src_info |= A6XX_SP_PS_2D_SRC_INFO_FILTER; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); |
| tu_cs_emit(cs, src_info); |
| tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE); |
| tu_cs_image_ref_2d(cs, iview, layer, true); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 3); |
| tu_cs_image_flag_ref(cs, iview, layer); |
| } |
| |
| static void |
| r2d_src_buffer(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| uint64_t va, uint32_t pitch, |
| uint32_t width, uint32_t height) |
| { |
| struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_SP_PS_2D_SRC_INFO( |
| .color_format = format.fmt, |
| .color_swap = format.swap, |
| .srgb = vk_format_is_srgb(vk_format), |
| .unk20 = 1, |
| .unk22 = 1), |
| A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height), |
| A6XX_SP_PS_2D_SRC_LO((uint32_t) va), |
| A6XX_SP_PS_2D_SRC_HI(va >> 32), |
| A6XX_SP_PS_2D_SRC_PITCH(.pitch = pitch)); |
| } |
| |
| static void |
| r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) |
| { |
| assert(iview->image->samples == 1); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); |
| tu_cs_emit(cs, iview->RB_2D_DST_INFO); |
| tu_cs_image_ref_2d(cs, iview, layer, false); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 3); |
| tu_cs_image_flag_ref(cs, iview, layer); |
| } |
| |
| static void |
| r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch) |
| { |
| struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_2D_DST_INFO( |
| .color_format = format.fmt, |
| .color_swap = format.swap, |
| .srgb = vk_format_is_srgb(vk_format)), |
| A6XX_RB_2D_DST_LO((uint32_t) va), |
| A6XX_RB_2D_DST_HI(va >> 32), |
| A6XX_RB_2D_DST_SIZE(.pitch = pitch)); |
| } |
| |
| static void |
| r2d_setup_common(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear, |
| bool scissor) |
| { |
| enum a6xx_format format = tu6_base_format(vk_format); |
| enum a6xx_2d_ifmt ifmt = format_to_ifmt(format); |
| uint32_t unknown_8c01 = 0; |
| |
| /* note: the only format with partial clearing is D24S8 */ |
| if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { |
| /* preserve stencil channel */ |
| if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) |
| unknown_8c01 = 0x08000041; |
| /* preserve depth channels */ |
| if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) |
| unknown_8c01 = 0x00084001; |
| } |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1); |
| tu_cs_emit(cs, unknown_8c01); |
| |
| uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL( |
| .scissor = scissor, |
| .rotate = rotation, |
| .solid_color = clear, |
| .d24s8 = format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, |
| .color_format = format, |
| .mask = 0xf, |
| .ifmt = vk_format_is_srgb(vk_format) ? R2D_UNORM8_SRGB : ifmt, |
| ).value; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); |
| tu_cs_emit(cs, blit_cntl); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); |
| tu_cs_emit(cs, blit_cntl); |
| |
| if (format == FMT6_10_10_10_2_UNORM_DEST) |
| format = FMT6_16_16_16_16_FLOAT; |
| |
| tu_cs_emit_regs(cs, A6XX_SP_2D_SRC_FORMAT( |
| .sint = vk_format_is_sint(vk_format), |
| .uint = vk_format_is_uint(vk_format), |
| .color_format = format, |
| .srgb = vk_format_is_srgb(vk_format), |
| .mask = 0xf)); |
| } |
| |
| static void |
| r2d_setup(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear) |
| { |
| tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); |
| |
| r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, false); |
| } |
| |
| static void |
| r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) |
| { |
| tu_cs_emit_pkt7(cs, CP_BLIT, 1); |
| tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); |
| } |
| |
| /* r3d_ = shader path operations */ |
| |
| void |
| tu_init_clear_blit_shaders(struct tu6_global *global) |
| { |
| #define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, args } } |
| #define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } } |
| #define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } } |
| |
| static const instr_t vs_code[] = { |
| /* r0.xyz = r0.w ? c1.xyz : c0.xyz |
| * r1.xy = r0.w ? c1.zw : c0.zw |
| * r0.w = 1.0f |
| */ |
| CAT3(OPC_SEL_B32, .repeat = 2, .dst = 0, |
| .c1 = {.src1_c = 1, .src1 = 4}, .src1_r = 1, |
| .src2 = 3, |
| .c2 = {.src3_c = 1, .dummy = 1, .src3 = 0}), |
| CAT3(OPC_SEL_B32, .repeat = 1, .dst = 4, |
| .c1 = {.src1_c = 1, .src1 = 6}, .src1_r = 1, |
| .src2 = 3, |
| .c2 = {.src3_c = 1, .dummy = 1, .src3 = 2}), |
| MOV(.dst = 3, .src_im = 1, .fim_val = 1.0f ), |
| { .cat0 = { .opc = OPC_END } }, |
| }; |
| |
| static const instr_t fs_blit[] = { |
| /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its |
| * blit path (its not clear what allows it to not have it) |
| */ |
| CAT2(OPC_BARY_F, .ei = 1, .full = 1, .dst = 63 * 4, .src1_im = 1), |
| { .cat0 = { .opc = OPC_END } }, |
| }; |
| |
| memcpy(&global->shaders[GLOBAL_SH_VS], vs_code, sizeof(vs_code)); |
| memcpy(&global->shaders[GLOBAL_SH_FS_BLIT], fs_blit, sizeof(fs_blit)); |
| |
| for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) { |
| instr_t *code = global->shaders[GLOBAL_SH_FS_CLEAR0 + num_rts]; |
| for (uint32_t i = 0; i < num_rts; i++) { |
| /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */ |
| *code++ = (instr_t) MOV(.repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4); |
| } |
| *code++ = (instr_t) { .cat0 = { .opc = OPC_END } }; |
| } |
| } |
| |
| static void |
| r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_rts, |
| bool layered_clear) |
| { |
| struct ir3_const_state dummy_const_state = {}; |
| struct ir3_shader dummy_shader = {}; |
| |
| struct ir3_shader_variant vs = { |
| .type = MESA_SHADER_VERTEX, |
| .instrlen = 1, |
| .constlen = 4, |
| .info.max_reg = 1, |
| .inputs_count = 1, |
| .inputs[0] = { |
| .slot = SYSTEM_VALUE_VERTEX_ID, |
| .regid = regid(0, 3), |
| .sysval = true, |
| }, |
| .outputs_count = blit ? 2 : 1, |
| .outputs[0] = { |
| .slot = VARYING_SLOT_POS, |
| .regid = regid(0, 0), |
| }, |
| .outputs[1] = { |
| .slot = VARYING_SLOT_VAR0, |
| .regid = regid(1, 0), |
| }, |
| .shader = &dummy_shader, |
| .const_state = &dummy_const_state, |
| }; |
| if (layered_clear) { |
| vs.outputs[1].slot = VARYING_SLOT_LAYER; |
| vs.outputs[1].regid = regid(1, 1); |
| vs.outputs_count = 2; |
| } |
| |
| struct ir3_shader_variant fs = { |
| .type = MESA_SHADER_FRAGMENT, |
| .instrlen = 1, /* max of 9 instructions with num_rts = 8 */ |
| .constlen = align(num_rts, 4), |
| .info.max_reg = MAX2(num_rts, 1) - 1, |
| .total_in = blit ? 2 : 0, |
| .num_samp = blit ? 1 : 0, |
| .inputs_count = blit ? 2 : 0, |
| .inputs[0] = { |
| .slot = VARYING_SLOT_VAR0, |
| .inloc = 0, |
| .compmask = 3, |
| .bary = true, |
| }, |
| .inputs[1] = { |
| .slot = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL, |
| .regid = regid(0, 0), |
| .sysval = 1, |
| }, |
| .num_sampler_prefetch = blit ? 1 : 0, |
| .sampler_prefetch[0] = { |
| .src = 0, |
| .wrmask = 0xf, |
| .cmd = 4, |
| }, |
| .shader = &dummy_shader, |
| .const_state = &dummy_const_state, |
| }; |
| |
| tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff)); |
| |
| tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS])); |
| tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0); |
| tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL, 0); |
| tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL, 0); |
| tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, |
| global_iova(cmd, shaders[blit ? GLOBAL_SH_FS_BLIT : (GLOBAL_SH_FS_CLEAR0 + num_rts)])); |
| |
| tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0()); |
| tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0()); |
| |
| tu6_emit_vpc(cs, &vs, NULL, NULL, NULL, &fs); |
| |
| /* REPL_MODE for varying with RECTLIST (2 vertices only) */ |
| tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0)); |
| tu_cs_emit_regs(cs, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0)); |
| |
| tu6_emit_fs_inputs(cs, &fs); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_CL_CNTL( |
| .persp_division_disable = 1, |
| .vp_xform_disable = 1, |
| .vp_clip_code_ignore = 1, |
| .clip_disable = 1)); |
| tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable? |
| |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0(.x = 0, .y = 0), |
| A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0(.x = 0x7fff, .y = 0x7fff)); |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x = 0, .y = 0), |
| A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x = 0x7fff, .y = 0x7fff)); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_VFD_INDEX_OFFSET(), |
| A6XX_VFD_INSTANCE_START_OFFSET()); |
| } |
| |
| static void |
| r3d_coords_raw(struct tu_cs *cs, const float *coords) |
| { |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | |
| CP_LOAD_STATE6_0_NUM_UNIT(2)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); |
| tu_cs_emit_array(cs, (const uint32_t *) coords, 8); |
| } |
| |
| static void |
| r3d_coords(struct tu_cs *cs, |
| const VkOffset2D *dst, |
| const VkOffset2D *src, |
| const VkExtent2D *extent) |
| { |
| int32_t src_x1 = src ? src->x : 0; |
| int32_t src_y1 = src ? src->y : 0; |
| r3d_coords_raw(cs, (float[]) { |
| dst->x, dst->y, |
| src_x1, src_y1, |
| dst->x + extent->width, dst->y + extent->height, |
| src_x1 + extent->width, src_y1 + extent->height, |
| }); |
| } |
| |
| static void |
| r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val) |
| { |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | |
| CP_LOAD_STATE6_0_NUM_UNIT(1)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); |
| switch (format) { |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: { |
| /* cleared as r8g8b8a8_unorm using special format */ |
| uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); |
| tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f)); |
| tu_cs_emit(cs, fui((tmp >> 8 & 0xff) / 255.0f)); |
| tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f)); |
| tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f)); |
| } break; |
| case VK_FORMAT_D16_UNORM: |
| case VK_FORMAT_D32_SFLOAT: |
| tu_cs_emit(cs, fui(val->depthStencil.depth)); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| break; |
| case VK_FORMAT_S8_UINT: |
| tu_cs_emit(cs, val->depthStencil.stencil & 0xff); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| tu_cs_emit(cs, 0); |
| break; |
| default: |
| /* as color formats use clear value as-is */ |
| assert(!vk_format_is_depth_or_stencil(format)); |
| tu_cs_emit_array(cs, val->color.uint32, 4); |
| break; |
| } |
| } |
| |
| static void |
| r3d_src_common(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const uint32_t *tex_const, |
| uint32_t offset_base, |
| uint32_t offset_ubwc, |
| VkFilter filter) |
| { |
| struct tu_cs_memory texture = { }; |
| VkResult result = tu_cs_alloc(&cmd->sub_cs, |
| 2, /* allocate space for a sampler too */ |
| A6XX_TEX_CONST_DWORDS, &texture); |
| assert(result == VK_SUCCESS); |
| |
| memcpy(texture.map, tex_const, A6XX_TEX_CONST_DWORDS * 4); |
| |
| /* patch addresses for layer offset */ |
| *(uint64_t*) (texture.map + 4) += offset_base; |
| uint64_t ubwc_addr = (texture.map[7] | (uint64_t) texture.map[8] << 32) + offset_ubwc; |
| texture.map[7] = ubwc_addr; |
| texture.map[8] = ubwc_addr >> 32; |
| |
| texture.map[A6XX_TEX_CONST_DWORDS + 0] = |
| A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter, false)) | |
| A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter, false)) | |
| A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE) | |
| A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE) | |
| A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) | |
| 0x60000; /* XXX used by blob, doesn't seem necessary */ |
| texture.map[A6XX_TEX_CONST_DWORDS + 1] = |
| 0x1 | /* XXX used by blob, doesn't seem necessary */ |
| A6XX_TEX_SAMP_1_UNNORM_COORDS | |
| A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR; |
| texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0; |
| texture.map[A6XX_TEX_CONST_DWORDS + 3] = 0; |
| |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | |
| CP_LOAD_STATE6_0_NUM_UNIT(1)); |
| tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_TEX_SAMP_LO, 2); |
| tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4); |
| |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | |
| CP_LOAD_STATE6_0_NUM_UNIT(1)); |
| tu_cs_emit_qw(cs, texture.iova); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_TEX_CONST_LO, 2); |
| tu_cs_emit_qw(cs, texture.iova); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(1)); |
| } |
| |
| static void |
| r3d_src(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| uint32_t layer, |
| VkFilter filter) |
| { |
| r3d_src_common(cmd, cs, iview->descriptor, |
| iview->layer_size * layer, |
| iview->ubwc_layer_size * layer, |
| filter); |
| } |
| |
| static void |
| r3d_src_buffer(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| uint64_t va, uint32_t pitch, |
| uint32_t width, uint32_t height) |
| { |
| uint32_t desc[A6XX_TEX_CONST_DWORDS]; |
| |
| struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR); |
| |
| desc[0] = |
| COND(vk_format_is_srgb(vk_format), A6XX_TEX_CONST_0_SRGB) | |
| A6XX_TEX_CONST_0_FMT(format.fmt) | |
| A6XX_TEX_CONST_0_SWAP(format.swap) | |
| A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | |
| // XXX to swizzle into .w for stencil buffer_to_image |
| A6XX_TEX_CONST_0_SWIZ_Y(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) | |
| A6XX_TEX_CONST_0_SWIZ_Z(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) | |
| A6XX_TEX_CONST_0_SWIZ_W(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W); |
| desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); |
| desc[2] = |
| A6XX_TEX_CONST_2_PITCH(pitch) | |
| A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D); |
| desc[3] = 0; |
| desc[4] = va; |
| desc[5] = va >> 32; |
| for (uint32_t i = 6; i < A6XX_TEX_CONST_DWORDS; i++) |
| desc[i] = 0; |
| |
| r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); |
| } |
| |
| static void |
| r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) |
| { |
| tu6_emit_msaa(cs, iview->image->samples); /* TODO: move to setup */ |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); |
| tu_cs_emit(cs, iview->RB_MRT_BUF_INFO); |
| tu_cs_image_ref(cs, iview, layer); |
| tu_cs_emit(cs, 0); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); |
| tu_cs_image_flag_ref(cs, iview, layer); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled)); |
| } |
| |
| static void |
| r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch) |
| { |
| struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR); |
| |
| tu6_emit_msaa(cs, 1); /* TODO: move to setup */ |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_MRT_BUF_INFO(0, .color_format = format.fmt, .color_swap = format.swap), |
| A6XX_RB_MRT_PITCH(0, pitch), |
| A6XX_RB_MRT_ARRAY_PITCH(0, 0), |
| A6XX_RB_MRT_BASE_LO(0, (uint32_t) va), |
| A6XX_RB_MRT_BASE_HI(0, va >> 32), |
| A6XX_RB_MRT_BASE_GMEM(0, 0)); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); |
| } |
| |
| static uint8_t |
| aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask) |
| { |
| uint8_t mask = 0xf; |
| assert(aspect_mask); |
| /* note: the only format with partial writing is D24S8, |
| * clear/blit uses the _AS_R8G8B8A8 format to access it |
| */ |
| if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { |
| if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) |
| mask = 0x7; |
| if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) |
| mask = 0x8; |
| } |
| return mask; |
| } |
| |
| static void |
| r3d_setup(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear) |
| { |
| if (!cmd->state.pass) { |
| tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); |
| tu6_emit_window_scissor(cs, 0, 0, 0x7fff, 0x7fff); |
| } |
| |
| tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000)); |
| tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000)); |
| |
| r3d_common(cmd, cs, !clear, clear ? 1 : 0, false); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | |
| A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | |
| 0xfc000000); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1)); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 1); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(0)); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_FS_OUTPUT_CNTL0(), |
| A6XX_RB_FS_OUTPUT_CNTL1(.mrt = 1)); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.sample_mask = 0xffff)); |
| tu_cs_emit_regs(cs, A6XX_RB_ALPHA_CONTROL()); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILREF()); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_RENDER_COMPONENTS(.rt0 = 0xf)); |
| tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf)); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0, |
| .color_format = tu6_base_format(vk_format), |
| .color_sint = vk_format_is_sint(vk_format), |
| .color_uint = vk_format_is_uint(vk_format))); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, |
| .component_enable = aspect_write_mask(vk_format, aspect_mask))); |
| tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format))); |
| tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format))); |
| } |
| |
| static void |
| r3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) |
| { |
| tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); |
| tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) | |
| CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | |
| CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY)); |
| tu_cs_emit(cs, 1); /* instance count */ |
| tu_cs_emit(cs, 2); /* vertex count */ |
| } |
| |
| /* blit ops - common interface for 2d/shader paths */ |
| |
| struct blit_ops { |
| void (*coords)(struct tu_cs *cs, |
| const VkOffset2D *dst, |
| const VkOffset2D *src, |
| const VkExtent2D *extent); |
| void (*clear_value)(struct tu_cs *cs, VkFormat format, const VkClearValue *val); |
| void (*src)( |
| struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| uint32_t layer, |
| VkFilter filter); |
| void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs, |
| VkFormat vk_format, |
| uint64_t va, uint32_t pitch, |
| uint32_t width, uint32_t height); |
| void (*dst)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); |
| void (*dst_buffer)(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch); |
| void (*setup)(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| VkFormat vk_format, |
| VkImageAspectFlags aspect_mask, |
| enum a6xx_rotation rotation, |
| bool clear); |
| void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs); |
| }; |
| |
| static const struct blit_ops r2d_ops = { |
| .coords = r2d_coords, |
| .clear_value = r2d_clear_value, |
| .src = r2d_src, |
| .src_buffer = r2d_src_buffer, |
| .dst = r2d_dst, |
| .dst_buffer = r2d_dst_buffer, |
| .setup = r2d_setup, |
| .run = r2d_run, |
| }; |
| |
| static const struct blit_ops r3d_ops = { |
| .coords = r3d_coords, |
| .clear_value = r3d_clear_value, |
| .src = r3d_src, |
| .src_buffer = r3d_src_buffer, |
| .dst = r3d_dst, |
| .dst_buffer = r3d_dst_buffer, |
| .setup = r3d_setup, |
| .run = r3d_run, |
| }; |
| |
| /* passthrough set coords from 3D extents */ |
| static void |
| coords(const struct blit_ops *ops, |
| struct tu_cs *cs, |
| const VkOffset3D *dst, |
| const VkOffset3D *src, |
| const VkExtent3D *extent) |
| { |
| ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent); |
| } |
| |
| static VkFormat |
| copy_format(VkFormat format, VkImageAspectFlags aspect_mask, bool copy_buffer) |
| { |
| if (vk_format_is_compressed(format)) { |
| switch (vk_format_get_blocksize(format)) { |
| case 1: return VK_FORMAT_R8_UINT; |
| case 2: return VK_FORMAT_R16_UINT; |
| case 4: return VK_FORMAT_R32_UINT; |
| case 8: return VK_FORMAT_R32G32_UINT; |
| case 16:return VK_FORMAT_R32G32B32A32_UINT; |
| default: |
| unreachable("unhandled format size"); |
| } |
| } |
| |
| switch (format) { |
| case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: |
| if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT) |
| return VK_FORMAT_R8G8_UNORM; |
| /* fallthrough */ |
| case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: |
| return VK_FORMAT_R8_UNORM; |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT && copy_buffer) |
| return VK_FORMAT_R8_UNORM; |
| /* fallthrough */ |
| default: |
| return format; |
| case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
| return VK_FORMAT_R32_UINT; |
| } |
| } |
| |
| static void |
| tu_image_view_copy_blit(struct tu_image_view *iview, |
| struct tu_image *image, |
| VkFormat format, |
| const VkImageSubresourceLayers *subres, |
| uint32_t layer, |
| bool stencil_read) |
| { |
| VkImageAspectFlags aspect_mask = subres->aspectMask; |
| |
| /* always use the AS_R8G8B8A8 format for these */ |
| if (format == VK_FORMAT_D24_UNORM_S8_UINT || |
| format == VK_FORMAT_X8_D24_UNORM_PACK32) { |
| aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; |
| } |
| |
| tu_image_view_init(iview, &(VkImageViewCreateInfo) { |
| .image = tu_image_to_handle(image), |
| .viewType = VK_IMAGE_VIEW_TYPE_2D, |
| .format = format, |
| /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */ |
| .components.r = stencil_read ? VK_COMPONENT_SWIZZLE_A : VK_COMPONENT_SWIZZLE_R, |
| .subresourceRange = { |
| .aspectMask = aspect_mask, |
| .baseMipLevel = subres->mipLevel, |
| .levelCount = 1, |
| .baseArrayLayer = subres->baseArrayLayer + layer, |
| .layerCount = 1, |
| }, |
| }); |
| } |
| |
| static void |
| tu_image_view_copy(struct tu_image_view *iview, |
| struct tu_image *image, |
| VkFormat format, |
| const VkImageSubresourceLayers *subres, |
| uint32_t layer, |
| bool stencil_read) |
| { |
| format = copy_format(format, subres->aspectMask, false); |
| tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read); |
| } |
| |
| static void |
| tu_image_view_blit(struct tu_image_view *iview, |
| struct tu_image *image, |
| const VkImageSubresourceLayers *subres, |
| uint32_t layer) |
| { |
| tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false); |
| } |
| |
| static void |
| tu6_blit_image(struct tu_cmd_buffer *cmd, |
| struct tu_image *src_image, |
| struct tu_image *dst_image, |
| const VkImageBlit *info, |
| VkFilter filter) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| uint32_t layers; |
| |
| /* 2D blit can't do rotation mirroring from just coordinates */ |
| static const enum a6xx_rotation rotate[2][2] = { |
| {ROTATE_0, ROTATE_HFLIP}, |
| {ROTATE_VFLIP, ROTATE_180}, |
| }; |
| |
| bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) != |
| (info->dstOffsets[1].x < info->dstOffsets[0].x); |
| bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) != |
| (info->dstOffsets[1].y < info->dstOffsets[0].y); |
| bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) != |
| (info->dstOffsets[1].z < info->dstOffsets[0].z); |
| |
| if (mirror_z) { |
| tu_finishme("blit z mirror\n"); |
| return; |
| } |
| |
| if (info->srcOffsets[1].z - info->srcOffsets[0].z != |
| info->dstOffsets[1].z - info->dstOffsets[0].z) { |
| tu_finishme("blit z filter\n"); |
| return; |
| } |
| |
| layers = info->srcOffsets[1].z - info->srcOffsets[0].z; |
| if (info->dstSubresource.layerCount > 1) { |
| assert(layers <= 1); |
| layers = info->dstSubresource.layerCount; |
| } |
| |
| /* BC1_RGB_* formats need to have their last components overriden with 1 |
| * when sampling, which is normally handled with the texture descriptor |
| * swizzle. The 2d path can't handle that, so use the 3d path. |
| * |
| * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with |
| * the 2d path. |
| */ |
| |
| if (dst_image->samples > 1 || |
| src_image->vk_format == VK_FORMAT_BC1_RGB_UNORM_BLOCK || |
| src_image->vk_format == VK_FORMAT_BC1_RGB_SRGB_BLOCK || |
| filter == VK_FILTER_CUBIC_EXT) |
| ops = &r3d_ops; |
| |
| /* TODO: shader path fails some of blit_image.all_formats.generate_mipmaps.* tests, |
| * figure out why (should be able to pass all tests with only shader path) |
| */ |
| |
| ops->setup(cmd, cs, dst_image->vk_format, info->dstSubresource.aspectMask, |
| rotate[mirror_y][mirror_x], false); |
| |
| if (ops == &r3d_ops) { |
| r3d_coords_raw(cs, (float[]) { |
| info->dstOffsets[0].x, info->dstOffsets[0].y, |
| info->srcOffsets[0].x, info->srcOffsets[0].y, |
| info->dstOffsets[1].x, info->dstOffsets[1].y, |
| info->srcOffsets[1].x, info->srcOffsets[1].y |
| }); |
| } else { |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_DST_TL(.x = MIN2(info->dstOffsets[0].x, info->dstOffsets[1].x), |
| .y = MIN2(info->dstOffsets[0].y, info->dstOffsets[1].y)), |
| A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1, |
| .y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1)); |
| tu_cs_emit_regs(cs, |
| A6XX_GRAS_2D_SRC_TL_X(.x = MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)), |
| A6XX_GRAS_2D_SRC_BR_X(.x = MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1), |
| A6XX_GRAS_2D_SRC_TL_Y(.y = MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)), |
| A6XX_GRAS_2D_SRC_BR_Y(.y = MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1)); |
| } |
| |
| struct tu_image_view dst, src; |
| tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffsets[0].z); |
| tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->dst(cs, &dst, i); |
| ops->src(cmd, cs, &src, i, filter); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| void |
| tu_CmdBlitImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageBlit *pRegions, |
| VkFilter filter) |
| |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_image, dst_image, dstImage); |
| |
| tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); |
| tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); |
| |
| for (uint32_t i = 0; i < regionCount; ++i) |
| tu6_blit_image(cmd, src_image, dst_image, pRegions + i, filter); |
| } |
| |
| static void |
| copy_compressed(VkFormat format, |
| VkOffset3D *offset, |
| VkExtent3D *extent, |
| uint32_t *width, |
| uint32_t *height) |
| { |
| if (!vk_format_is_compressed(format)) |
| return; |
| |
| uint32_t block_width = vk_format_get_blockwidth(format); |
| uint32_t block_height = vk_format_get_blockheight(format); |
| |
| offset->x /= block_width; |
| offset->y /= block_height; |
| |
| if (extent) { |
| extent->width = DIV_ROUND_UP(extent->width, block_width); |
| extent->height = DIV_ROUND_UP(extent->height, block_height); |
| } |
| if (width) |
| *width = DIV_ROUND_UP(*width, block_width); |
| if (height) |
| *height = DIV_ROUND_UP(*height, block_height); |
| } |
| |
| static void |
| tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, |
| struct tu_buffer *src_buffer, |
| struct tu_image *dst_image, |
| const VkBufferImageCopy *info) |
| { |
| struct tu_cs *cs = &cmd->cs; |
| uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); |
| VkFormat src_format = |
| copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, true); |
| const struct blit_ops *ops = &r2d_ops; |
| |
| /* special case for buffer to stencil */ |
| if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && |
| info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { |
| ops = &r3d_ops; |
| } |
| |
| /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format, |
| * which matters for UBWC. buffer_to_image/etc can fail because of this |
| */ |
| |
| VkOffset3D offset = info->imageOffset; |
| VkExtent3D extent = info->imageExtent; |
| uint32_t src_width = info->bufferRowLength ?: extent.width; |
| uint32_t src_height = info->bufferImageHeight ?: extent.height; |
| |
| copy_compressed(dst_image->vk_format, &offset, &extent, &src_width, &src_height); |
| |
| uint32_t pitch = src_width * vk_format_get_blocksize(src_format); |
| uint32_t layer_size = src_height * pitch; |
| |
| ops->setup(cmd, cs, |
| copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false), |
| info->imageSubresource.aspectMask, ROTATE_0, false); |
| |
| struct tu_image_view dst; |
| tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->dst(cs, &dst, i); |
| |
| uint64_t src_va = tu_buffer_iova(src_buffer) + info->bufferOffset + layer_size * i; |
| if ((src_va & 63) || (pitch & 63)) { |
| for (uint32_t y = 0; y < extent.height; y++) { |
| uint32_t x = (src_va & 63) / vk_format_get_blocksize(src_format); |
| ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch, |
| x + extent.width, 1); |
| ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x}, |
| &(VkExtent2D) {extent.width, 1}); |
| ops->run(cmd, cs); |
| src_va += pitch; |
| } |
| } else { |
| ops->src_buffer(cmd, cs, src_format, src_va, pitch, extent.width, extent.height); |
| coords(ops, cs, &offset, &(VkOffset3D){}, &extent); |
| ops->run(cmd, cs); |
| } |
| } |
| } |
| |
| void |
| tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, |
| VkBuffer srcBuffer, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkBufferImageCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, dst_image, dstImage); |
| TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); |
| |
| tu_bo_list_add(&cmd->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ); |
| tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); |
| |
| for (unsigned i = 0; i < regionCount; ++i) |
| tu_copy_buffer_to_image(cmd, src_buffer, dst_image, pRegions + i); |
| } |
| |
| static void |
| tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, |
| struct tu_image *src_image, |
| struct tu_buffer *dst_buffer, |
| const VkBufferImageCopy *info) |
| { |
| struct tu_cs *cs = &cmd->cs; |
| uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); |
| VkFormat dst_format = |
| copy_format(src_image->vk_format, info->imageSubresource.aspectMask, true); |
| bool stencil_read = false; |
| |
| if (src_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && |
| info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { |
| stencil_read = true; |
| } |
| |
| const struct blit_ops *ops = stencil_read ? &r3d_ops : &r2d_ops; |
| VkOffset3D offset = info->imageOffset; |
| VkExtent3D extent = info->imageExtent; |
| uint32_t dst_width = info->bufferRowLength ?: extent.width; |
| uint32_t dst_height = info->bufferImageHeight ?: extent.height; |
| |
| copy_compressed(src_image->vk_format, &offset, &extent, &dst_width, &dst_height); |
| |
| uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format); |
| uint32_t layer_size = pitch * dst_height; |
| |
| ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); |
| |
| struct tu_image_view src; |
| tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| |
| uint64_t dst_va = tu_buffer_iova(dst_buffer) + info->bufferOffset + layer_size * i; |
| if ((dst_va & 63) || (pitch & 63)) { |
| for (uint32_t y = 0; y < extent.height; y++) { |
| uint32_t x = (dst_va & 63) / vk_format_get_blocksize(dst_format); |
| ops->dst_buffer(cs, dst_format, dst_va & ~63, 0); |
| ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y}, |
| &(VkExtent2D) {extent.width, 1}); |
| ops->run(cmd, cs); |
| dst_va += pitch; |
| } |
| } else { |
| ops->dst_buffer(cs, dst_format, dst_va, pitch); |
| coords(ops, cs, &(VkOffset3D) {0, 0}, &offset, &extent); |
| ops->run(cmd, cs); |
| } |
| } |
| } |
| |
| void |
| tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkBuffer dstBuffer, |
| uint32_t regionCount, |
| const VkBufferImageCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer); |
| |
| tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); |
| tu_bo_list_add(&cmd->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); |
| |
| for (unsigned i = 0; i < regionCount; ++i) |
| tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i); |
| } |
| |
| /* Tiled formats don't support swapping, which means that we can't support |
| * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some |
| * formats like B5G5R5A1 have a separate linear-only format when sampling. |
| * Currently we fake support for tiled swapped formats and use the unswapped |
| * format instead, but this means that reinterpreting copies to and from |
| * swapped formats can't be performed correctly unless we can swizzle the |
| * components by reinterpreting the other image as the "correct" swapped |
| * format, i.e. only when the other image is linear. |
| */ |
| |
| static bool |
| is_swapped_format(VkFormat format) |
| { |
| struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR); |
| struct tu_native_format tiled = tu6_format_texture(format, TILE6_3); |
| return linear.fmt != tiled.fmt || linear.swap != tiled.swap; |
| } |
| |
| /* R8G8_* formats have a different tiling layout than other cpp=2 formats, and |
| * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice |
| * versa). This should mirror the logic in fdl6_layout. |
| */ |
| static bool |
| image_is_r8g8(struct tu_image *image) |
| { |
| return image->layout[0].cpp == 2 && |
| vk_format_get_nr_components(image->vk_format) == 2; |
| } |
| |
| static void |
| tu_copy_image_to_image(struct tu_cmd_buffer *cmd, |
| struct tu_image *src_image, |
| struct tu_image *dst_image, |
| const VkImageCopy *info) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| |
| if (dst_image->samples > 1) |
| ops = &r3d_ops; |
| |
| VkFormat format = VK_FORMAT_UNDEFINED; |
| VkOffset3D src_offset = info->srcOffset; |
| VkOffset3D dst_offset = info->dstOffset; |
| VkExtent3D extent = info->extent; |
| |
| /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between |
| * Images": |
| * |
| * When copying between compressed and uncompressed formats the extent |
| * members represent the texel dimensions of the source image and not |
| * the destination. When copying from a compressed image to an |
| * uncompressed image the image texel dimensions written to the |
| * uncompressed image will be source extent divided by the compressed |
| * texel block dimensions. When copying from an uncompressed image to a |
| * compressed image the image texel dimensions written to the compressed |
| * image will be the source extent multiplied by the compressed texel |
| * block dimensions. |
| * |
| * This means we only have to adjust the extent if the source image is |
| * compressed. |
| */ |
| copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL); |
| copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL); |
| |
| VkFormat dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false); |
| VkFormat src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false); |
| |
| bool use_staging_blit = false; |
| |
| if (src_format == dst_format) { |
| /* Images that share a format can always be copied directly because it's |
| * the same as a blit. |
| */ |
| format = src_format; |
| } else if (!src_image->layout[0].tile_mode) { |
| /* If an image is linear, we can always safely reinterpret it with the |
| * other image's format and then do a regular blit. |
| */ |
| format = dst_format; |
| } else if (!dst_image->layout[0].tile_mode) { |
| format = src_format; |
| } else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) { |
| /* We can't currently copy r8g8 images to/from other cpp=2 images, |
| * due to the different tile layout. |
| */ |
| use_staging_blit = true; |
| } else if (is_swapped_format(src_format) || |
| is_swapped_format(dst_format)) { |
| /* If either format has a non-identity swap, then we can't copy |
| * to/from it. |
| */ |
| use_staging_blit = true; |
| } else if (!src_image->layout[0].ubwc) { |
| format = dst_format; |
| } else if (!dst_image->layout[0].ubwc) { |
| format = src_format; |
| } else { |
| /* Both formats use UBWC and so neither can be reinterpreted. |
| * TODO: We could do an in-place decompression of the dst instead. |
| */ |
| use_staging_blit = true; |
| } |
| |
| struct tu_image_view dst, src; |
| |
| if (use_staging_blit) { |
| tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false); |
| tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false); |
| |
| struct tu_image staging_image = { |
| .vk_format = src_format, |
| .type = src_image->type, |
| .tiling = VK_IMAGE_TILING_LINEAR, |
| .extent = extent, |
| .level_count = 1, |
| .layer_count = info->srcSubresource.layerCount, |
| .samples = src_image->samples, |
| .bo_offset = 0, |
| }; |
| |
| VkImageSubresourceLayers staging_subresource = { |
| .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| .mipLevel = 0, |
| .baseArrayLayer = 0, |
| .layerCount = info->srcSubresource.layerCount, |
| }; |
| |
| VkOffset3D staging_offset = { 0 }; |
| |
| staging_image.layout[0].tile_mode = TILE6_LINEAR; |
| staging_image.layout[0].ubwc = false; |
| |
| fdl6_layout(&staging_image.layout[0], |
| vk_format_to_pipe_format(staging_image.vk_format), |
| staging_image.samples, |
| staging_image.extent.width, |
| staging_image.extent.height, |
| staging_image.extent.depth, |
| staging_image.level_count, |
| staging_image.layer_count, |
| staging_image.type == VK_IMAGE_TYPE_3D, |
| NULL); |
| |
| VkResult result = tu_get_scratch_bo(cmd->device, |
| staging_image.layout[0].size, |
| &staging_image.bo); |
| if (result != VK_SUCCESS) { |
| cmd->record_result = result; |
| return; |
| } |
| |
| tu_bo_list_add(&cmd->bo_list, staging_image.bo, |
| MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); |
| |
| struct tu_image_view staging; |
| tu_image_view_copy(&staging, &staging_image, src_format, |
| &staging_subresource, 0, false); |
| |
| ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); |
| coords(ops, cs, &staging_offset, &src_offset, &extent); |
| |
| for (uint32_t i = 0; i < info->extent.depth; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &staging, i); |
| ops->run(cmd, cs); |
| } |
| |
| /* When executed by the user there has to be a pipeline barrier here, |
| * but since we're doing it manually we'll have to flush ourselves. |
| */ |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); |
| |
| tu_image_view_copy(&staging, &staging_image, dst_format, |
| &staging_subresource, 0, false); |
| |
| ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask, ROTATE_0, false); |
| coords(ops, cs, &dst_offset, &staging_offset, &extent); |
| |
| for (uint32_t i = 0; i < info->extent.depth; i++) { |
| ops->src(cmd, cs, &staging, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } else { |
| tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false); |
| tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z, false); |
| |
| ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, ROTATE_0, false); |
| coords(ops, cs, &dst_offset, &src_offset, &extent); |
| |
| for (uint32_t i = 0; i < info->extent.depth; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } |
| } |
| |
| void |
| tu_CmdCopyImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage destImage, |
| VkImageLayout destImageLayout, |
| uint32_t regionCount, |
| const VkImageCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_image, dst_image, destImage); |
| |
| tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); |
| tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); |
| |
| for (uint32_t i = 0; i < regionCount; ++i) |
| tu_copy_image_to_image(cmd, src_image, dst_image, pRegions + i); |
| } |
| |
| static void |
| copy_buffer(struct tu_cmd_buffer *cmd, |
| uint64_t dst_va, |
| uint64_t src_va, |
| uint64_t size, |
| uint32_t block_size) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM; |
| uint64_t blocks = size / block_size; |
| |
| ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); |
| |
| while (blocks) { |
| uint32_t src_x = (src_va & 63) / block_size; |
| uint32_t dst_x = (dst_va & 63) / block_size; |
| uint32_t width = MIN2(MIN2(blocks, 0x4000 - src_x), 0x4000 - dst_x); |
| |
| ops->src_buffer(cmd, cs, format, src_va & ~63, 0, src_x + width, 1); |
| ops->dst_buffer( cs, format, dst_va & ~63, 0); |
| ops->coords(cs, &(VkOffset2D) {dst_x}, &(VkOffset2D) {src_x}, &(VkExtent2D) {width, 1}); |
| ops->run(cmd, cs); |
| |
| src_va += width * block_size; |
| dst_va += width * block_size; |
| blocks -= width; |
| } |
| } |
| |
| void |
| tu_CmdCopyBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer srcBuffer, |
| VkBuffer dstBuffer, |
| uint32_t regionCount, |
| const VkBufferCopy *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); |
| TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer); |
| |
| tu_bo_list_add(&cmd->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ); |
| tu_bo_list_add(&cmd->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); |
| |
| for (unsigned i = 0; i < regionCount; ++i) { |
| copy_buffer(cmd, |
| tu_buffer_iova(dst_buffer) + pRegions[i].dstOffset, |
| tu_buffer_iova(src_buffer) + pRegions[i].srcOffset, |
| pRegions[i].size, 1); |
| } |
| } |
| |
| void |
| tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize dataSize, |
| const void *pData) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); |
| |
| tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE); |
| |
| struct tu_cs_memory tmp; |
| VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64, &tmp); |
| if (result != VK_SUCCESS) { |
| cmd->record_result = result; |
| return; |
| } |
| |
| memcpy(tmp.map, pData, dataSize); |
| copy_buffer(cmd, tu_buffer_iova(buffer) + dstOffset, tmp.iova, dataSize, 4); |
| } |
| |
| void |
| tu_CmdFillBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize fillSize, |
| uint32_t data) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| |
| tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE); |
| |
| if (fillSize == VK_WHOLE_SIZE) |
| fillSize = buffer->size - dstOffset; |
| |
| uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset; |
| uint32_t blocks = fillSize / 4; |
| |
| ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true); |
| ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); |
| |
| while (blocks) { |
| uint32_t dst_x = (dst_va & 63) / 4; |
| uint32_t width = MIN2(blocks, 0x4000 - dst_x); |
| |
| ops->dst_buffer(cs, VK_FORMAT_R32_UINT, dst_va & ~63, 0); |
| ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1}); |
| ops->run(cmd, cs); |
| |
| dst_va += width * 4; |
| blocks -= width; |
| } |
| } |
| |
| void |
| tu_CmdResolveImage(VkCommandBuffer commandBuffer, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageResolve *pRegions) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, src_image, srcImage); |
| TU_FROM_HANDLE(tu_image, dst_image, dstImage); |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->cs; |
| |
| tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); |
| tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); |
| |
| ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); |
| |
| for (uint32_t i = 0; i < regionCount; ++i) { |
| const VkImageResolve *info = &pRegions[i]; |
| uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount); |
| |
| assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount); |
| /* TODO: aspect masks possible ? */ |
| |
| coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent); |
| |
| struct tu_image_view dst, src; |
| tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z); |
| tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } |
| } |
| |
| void |
| tu_resolve_sysmem(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| struct tu_image_view *src, |
| struct tu_image_view *dst, |
| uint32_t layers, |
| const VkRect2D *rect) |
| { |
| const struct blit_ops *ops = &r2d_ops; |
| |
| tu_bo_list_add(&cmd->bo_list, src->image->bo, MSM_SUBMIT_BO_READ); |
| tu_bo_list_add(&cmd->bo_list, dst->image->bo, MSM_SUBMIT_BO_WRITE); |
| |
| assert(src->image->vk_format == dst->image->vk_format); |
| |
| ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); |
| ops->coords(cs, &rect->offset, &rect->offset, &rect->extent); |
| |
| for (uint32_t i = 0; i < layers; i++) { |
| ops->src(cmd, cs, src, i, VK_FILTER_NEAREST); |
| ops->dst(cs, dst, i); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| static void |
| clear_image(struct tu_cmd_buffer *cmd, |
| struct tu_image *image, |
| const VkClearValue *clear_value, |
| const VkImageSubresourceRange *range) |
| { |
| uint32_t level_count = tu_get_levelCount(image, range); |
| uint32_t layer_count = tu_get_layerCount(image, range); |
| struct tu_cs *cs = &cmd->cs; |
| VkFormat format = image->vk_format; |
| if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) |
| format = VK_FORMAT_R32_UINT; |
| |
| if (image->type == VK_IMAGE_TYPE_3D) { |
| assert(layer_count == 1); |
| assert(range->baseArrayLayer == 0); |
| } |
| |
| const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops; |
| |
| ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true); |
| ops->clear_value(cs, image->vk_format, clear_value); |
| |
| for (unsigned j = 0; j < level_count; j++) { |
| if (image->type == VK_IMAGE_TYPE_3D) |
| layer_count = u_minify(image->extent.depth, range->baseMipLevel + j); |
| |
| ops->coords(cs, &(VkOffset2D){}, NULL, &(VkExtent2D) { |
| u_minify(image->extent.width, range->baseMipLevel + j), |
| u_minify(image->extent.height, range->baseMipLevel + j) |
| }); |
| |
| struct tu_image_view dst; |
| tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) { |
| .aspectMask = range->aspectMask, |
| .mipLevel = range->baseMipLevel + j, |
| .baseArrayLayer = range->baseArrayLayer, |
| .layerCount = 1, |
| }, 0, false); |
| |
| for (uint32_t i = 0; i < layer_count; i++) { |
| ops->dst(cs, &dst, i); |
| ops->run(cmd, cs); |
| } |
| } |
| } |
| |
| void |
| tu_CmdClearColorImage(VkCommandBuffer commandBuffer, |
| VkImage image_h, |
| VkImageLayout imageLayout, |
| const VkClearColorValue *pColor, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, image, image_h); |
| |
| tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE); |
| |
| for (unsigned i = 0; i < rangeCount; i++) |
| clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i); |
| } |
| |
| void |
| tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, |
| VkImage image_h, |
| VkImageLayout imageLayout, |
| const VkClearDepthStencilValue *pDepthStencil, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| TU_FROM_HANDLE(tu_image, image, image_h); |
| |
| tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE); |
| |
| for (unsigned i = 0; i < rangeCount; i++) |
| clear_image(cmd, image, (const VkClearValue*) pDepthStencil, pRanges + i); |
| } |
| |
| static void |
| tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer *cmd, |
| uint32_t attachment_count, |
| const VkClearAttachment *attachments, |
| uint32_t rect_count, |
| const VkClearRect *rects) |
| { |
| const struct tu_subpass *subpass = cmd->state.subpass; |
| /* note: cannot use shader path here.. there is a special shader path |
| * in tu_clear_sysmem_attachments() |
| */ |
| const struct blit_ops *ops = &r2d_ops; |
| struct tu_cs *cs = &cmd->draw_cs; |
| |
| for (uint32_t j = 0; j < attachment_count; j++) { |
| /* The vulkan spec, section 17.2 "Clearing Images Inside a Render |
| * Pass Instance" says that: |
| * |
| * Unlike other clear commands, vkCmdClearAttachments executes as |
| * a drawing command, rather than a transfer command, with writes |
| * performed by it executing in rasterization order. Clears to |
| * color attachments are executed as color attachment writes, by |
| * the VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT stage. |
| * Clears to depth/stencil attachments are executed as depth |
| * writes and writes by the |
| * VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT and |
| * VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT stages. |
| * |
| * However, the 2d path here is executed the same way as a |
| * transfer command, using the CCU color cache exclusively with |
| * a special depth-as-color format for depth clears. This means that |
| * we can't rely on the normal pipeline barrier mechanism here, and |
| * have to manually flush whenever using a different cache domain |
| * from what the 3d path would've used. This happens when we clear |
| * depth/stencil, since normally depth attachments use CCU depth, but |
| * we clear it using a special depth-as-color format. Since the clear |
| * potentially uses a different attachment state we also need to |
| * invalidate color beforehand and flush it afterwards. |
| */ |
| |
| uint32_t a; |
| if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { |
| a = subpass->color_attachments[attachments[j].colorAttachment].attachment; |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| } else { |
| a = subpass->depth_stencil_attachment.attachment; |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR); |
| } |
| |
| if (a == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| const struct tu_image_view *iview = |
| cmd->state.framebuffer->attachments[a].attachment; |
| |
| ops->setup(cmd, cs, iview->image->vk_format, attachments[j].aspectMask, ROTATE_0, true); |
| ops->clear_value(cs, iview->image->vk_format, &attachments[j].clearValue); |
| |
| /* Wait for the flushes we triggered manually to complete */ |
| tu_cs_emit_wfi(cs); |
| |
| for (uint32_t i = 0; i < rect_count; i++) { |
| ops->coords(cs, &rects[i].rect.offset, NULL, &rects[i].rect.extent); |
| for (uint32_t layer = 0; layer < rects[i].layerCount; layer++) { |
| ops->dst(cs, iview, rects[i].baseArrayLayer + layer); |
| ops->run(cmd, cs); |
| } |
| } |
| |
| if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR); |
| } else { |
| /* sync color into depth */ |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH); |
| } |
| } |
| } |
| |
| static void |
| tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, |
| uint32_t attachment_count, |
| const VkClearAttachment *attachments, |
| uint32_t rect_count, |
| const VkClearRect *rects) |
| { |
| /* the shader path here is special, it avoids changing MRT/etc state */ |
| const struct tu_render_pass *pass = cmd->state.pass; |
| const struct tu_subpass *subpass = cmd->state.subpass; |
| const uint32_t mrt_count = subpass->color_count; |
| struct tu_cs *cs = &cmd->draw_cs; |
| uint32_t clear_value[MAX_RTS][4]; |
| float z_clear_val = 0.0f; |
| uint8_t s_clear_val = 0; |
| uint32_t clear_rts = 0, clear_components = 0, num_rts = 0, b; |
| bool z_clear = false; |
| bool s_clear = false; |
| bool layered_clear = false; |
| uint32_t max_samples = 1; |
| |
| for (uint32_t i = 0; i < attachment_count; i++) { |
| uint32_t a; |
| if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { |
| uint32_t c = attachments[i].colorAttachment; |
| a = subpass->color_attachments[c].attachment; |
| if (a == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| clear_rts |= 1 << c; |
| clear_components |= 0xf << (c * 4); |
| memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t)); |
| } else { |
| a = subpass->depth_stencil_attachment.attachment; |
| if (a == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| z_clear = true; |
| z_clear_val = attachments[i].clearValue.depthStencil.depth; |
| } |
| |
| if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| s_clear = true; |
| s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff; |
| } |
| } |
| |
| max_samples = MAX2(max_samples, pass->attachments[a].samples); |
| } |
| |
| /* prefer to use 2D path for clears |
| * 2D can't clear separate depth/stencil and msaa, needs known framebuffer |
| */ |
| if (max_samples == 1 && cmd->state.framebuffer) { |
| tu_clear_sysmem_attachments_2d(cmd, attachment_count, attachments, rect_count, rects); |
| return; |
| } |
| |
| /* This clear path behaves like a draw, needs the same flush as tu_draw */ |
| tu_emit_cache_flush_renderpass(cmd, cs); |
| |
| /* disable all draw states so they don't interfere |
| * TODO: use and re-use draw states for this path |
| * we have to disable draw states individually to preserve |
| * input attachment states, because a secondary command buffer |
| * won't be able to restore them |
| */ |
| tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2)); |
| for (uint32_t i = 0; i < TU_DRAW_STATE_COUNT; i++) { |
| if (i == TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM || |
| i == TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM) |
| continue; |
| tu_cs_emit(cs, CP_SET_DRAW_STATE__0_GROUP_ID(i) | |
| CP_SET_DRAW_STATE__0_DISABLE); |
| tu_cs_emit_qw(cs, 0); |
| } |
| cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | |
| A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | |
| 0xfc000000); |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count); |
| for (uint32_t i = 0; i < mrt_count; i++) { |
| if (clear_rts & (1 << i)) |
| tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts++ * 4)); |
| else |
| tu_cs_emit(cs, 0); |
| } |
| |
| for (uint32_t i = 0; i < rect_count; i++) { |
| if (rects[i].baseArrayLayer || rects[i].layerCount > 1) |
| layered_clear = true; |
| } |
| |
| r3d_common(cmd, cs, false, num_rts, layered_clear); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_SP_FS_RENDER_COMPONENTS(.dword = clear_components)); |
| tu_cs_emit_regs(cs, |
| A6XX_RB_RENDER_COMPONENTS(.dword = clear_components)); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_FS_OUTPUT_CNTL0(), |
| A6XX_RB_FS_OUTPUT_CNTL1(.mrt = mrt_count)); |
| |
| tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.independent_blend = 1, .sample_mask = 0xffff)); |
| tu_cs_emit_regs(cs, A6XX_RB_ALPHA_CONTROL()); |
| for (uint32_t i = 0; i < mrt_count; i++) { |
| tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(i, |
| .component_enable = COND(clear_rts & (1 << i), 0xf))); |
| } |
| |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL( |
| .z_enable = z_clear, |
| .z_write_enable = z_clear, |
| .zfunc = FUNC_ALWAYS)); |
| tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL( |
| .stencil_enable = s_clear, |
| .func = FUNC_ALWAYS, |
| .zpass = STENCIL_REPLACE)); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff)); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff)); |
| tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val)); |
| |
| tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_rts); |
| tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | |
| CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | |
| CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | |
| CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | |
| CP_LOAD_STATE6_0_NUM_UNIT(num_rts)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); |
| tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); |
| for_each_bit(b, clear_rts) |
| tu_cs_emit_array(cs, clear_value[b], 4); |
| |
| for (uint32_t i = 0; i < rect_count; i++) { |
| for (uint32_t layer = 0; layer < rects[i].layerCount; layer++) { |
| r3d_coords_raw(cs, (float[]) { |
| rects[i].rect.offset.x, rects[i].rect.offset.y, |
| z_clear_val, uif(rects[i].baseArrayLayer + layer), |
| rects[i].rect.offset.x + rects[i].rect.extent.width, |
| rects[i].rect.offset.y + rects[i].rect.extent.height, |
| z_clear_val, 1.0f, |
| }); |
| r3d_run(cmd, cs); |
| } |
| } |
| } |
| |
| static void |
| pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_value[4]) |
| { |
| enum pipe_format pformat = vk_format_to_pipe_format(format); |
| |
| switch (format) { |
| case VK_FORMAT_X8_D24_UNORM_PACK32: |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) | |
| val->depthStencil.stencil << 24; |
| return; |
| case VK_FORMAT_D16_UNORM: |
| clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16); |
| return; |
| case VK_FORMAT_D32_SFLOAT: |
| clear_value[0] = fui(val->depthStencil.depth); |
| return; |
| case VK_FORMAT_S8_UINT: |
| clear_value[0] = val->depthStencil.stencil; |
| return; |
| /* these formats use a different base format when tiled |
| * the same format can be used for both because GMEM is always in WZYX order |
| */ |
| case VK_FORMAT_R5G5B5A1_UNORM_PACK16: |
| case VK_FORMAT_B5G5R5A1_UNORM_PACK16: |
| pformat = PIPE_FORMAT_B5G5R5A1_UNORM; |
| default: |
| break; |
| } |
| |
| VkClearColorValue color; |
| |
| /** |
| * GMEM is tiled and wants the components in WZYX order, |
| * apply swizzle to the color before packing, to counteract |
| * deswizzling applied by packing functions |
| */ |
| pipe_swizzle_4f(color.float32, val->color.float32, |
| util_format_description(pformat)->swizzle); |
| |
| util_format_pack_rgba(pformat, clear_value, color.uint32, 1); |
| } |
| |
| static void |
| tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t attachment, |
| VkImageAspectFlags mask, |
| const VkClearValue *value) |
| { |
| VkFormat vk_format = cmd->state.pass->attachments[attachment].format; |
| |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); |
| tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format))); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, |
| .clear_mask = aspect_write_mask(vk_format, mask))); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); |
| tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); |
| tu_cs_emit(cs, 0); |
| |
| uint32_t clear_vals[4] = {}; |
| pack_gmem_clear_value(value, vk_format, clear_vals); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); |
| tu_cs_emit_array(cs, clear_vals, 4); |
| |
| tu6_emit_event_write(cmd, cs, BLIT); |
| } |
| |
| static void |
| tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, |
| uint32_t attachment_count, |
| const VkClearAttachment *attachments, |
| uint32_t rect_count, |
| const VkClearRect *rects) |
| { |
| const struct tu_subpass *subpass = cmd->state.subpass; |
| struct tu_cs *cs = &cmd->draw_cs; |
| |
| /* TODO: swap the loops for smaller cmdstream */ |
| for (unsigned i = 0; i < rect_count; i++) { |
| unsigned x1 = rects[i].rect.offset.x; |
| unsigned y1 = rects[i].rect.offset.y; |
| unsigned x2 = x1 + rects[i].rect.extent.width - 1; |
| unsigned y2 = y1 + rects[i].rect.extent.height - 1; |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); |
| tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); |
| tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); |
| |
| for (unsigned j = 0; j < attachment_count; j++) { |
| uint32_t a; |
| if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) |
| a = subpass->color_attachments[attachments[j].colorAttachment].attachment; |
| else |
| a = subpass->depth_stencil_attachment.attachment; |
| |
| if (a == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask, |
| &attachments[j].clearValue); |
| } |
| } |
| } |
| |
| void |
| tu_CmdClearAttachments(VkCommandBuffer commandBuffer, |
| uint32_t attachmentCount, |
| const VkClearAttachment *pAttachments, |
| uint32_t rectCount, |
| const VkClearRect *pRects) |
| { |
| TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); |
| struct tu_cs *cs = &cmd->draw_cs; |
| |
| tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM); |
| tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); |
| tu_cond_exec_end(cs); |
| |
| tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM); |
| tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); |
| tu_cond_exec_end(cs); |
| } |
| |
| void |
| tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| const VkRenderPassBeginInfo *info) |
| { |
| const struct tu_framebuffer *fb = cmd->state.framebuffer; |
| const struct tu_image_view *iview = fb->attachments[a].attachment; |
| const struct tu_render_pass_attachment *attachment = |
| &cmd->state.pass->attachments[a]; |
| |
| if (!attachment->clear_mask) |
| return; |
| |
| const struct blit_ops *ops = &r2d_ops; |
| if (attachment->samples > 1) |
| ops = &r3d_ops; |
| |
| ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0, true); |
| ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent); |
| ops->clear_value(cs, attachment->format, &info->pClearValues[a]); |
| |
| /* Wait for any flushes at the beginning of the renderpass to complete */ |
| tu_cs_emit_wfi(cs); |
| |
| for (uint32_t i = 0; i < fb->layers; i++) { |
| ops->dst(cs, iview, i); |
| ops->run(cmd, cs); |
| } |
| |
| /* The spec doesn't explicitly say, but presumably the initial renderpass |
| * clear is considered part of the renderpass, and therefore barriers |
| * aren't required inside the subpass/renderpass. Therefore we need to |
| * flush CCU color into CCU depth here, just like with |
| * vkCmdClearAttachments(). Note that because this only happens at the |
| * beginning of a renderpass, and renderpass writes are considered |
| * "incoherent", we shouldn't have to worry about syncing depth into color |
| * beforehand as depth should already be flushed. |
| */ |
| if (vk_format_is_depth_or_stencil(attachment->format)) { |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH); |
| } else { |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR); |
| } |
| } |
| |
| void |
| tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| const VkRenderPassBeginInfo *info) |
| { |
| const struct tu_render_pass_attachment *attachment = |
| &cmd->state.pass->attachments[a]; |
| |
| if (!attachment->clear_mask) |
| return; |
| |
| tu_cs_emit_regs(cs, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples))); |
| |
| tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask, |
| &info->pClearValues[a]); |
| } |
| |
| static void |
| tu_emit_blit(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| const struct tu_image_view *iview, |
| const struct tu_render_pass_attachment *attachment, |
| bool resolve) |
| { |
| tu_cs_emit_regs(cs, |
| A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples))); |
| |
| tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO( |
| .unk0 = !resolve, |
| .gmem = !resolve, |
| /* "integer" bit disables msaa resolve averaging */ |
| .integer = vk_format_is_int(attachment->format))); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4); |
| tu_cs_emit(cs, iview->RB_BLIT_DST_INFO); |
| tu_cs_image_ref_2d(cs, iview, 0, false); |
| |
| tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3); |
| tu_cs_image_flag_ref(cs, iview, 0); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset)); |
| |
| tu6_emit_event_write(cmd, cs, BLIT); |
| } |
| |
| static bool |
| blit_can_resolve(VkFormat format) |
| { |
| const struct util_format_description *desc = vk_format_description(format); |
| |
| /* blit event can only do resolve for simple cases: |
| * averaging samples as unsigned integers or choosing only one sample |
| */ |
| if (vk_format_is_snorm(format) || vk_format_is_srgb(format)) |
| return false; |
| |
| /* can't do formats with larger channel sizes |
| * note: this includes all float formats |
| * note2: single channel integer formats seem OK |
| */ |
| if (desc->channel[0].size > 10) |
| return false; |
| |
| switch (format) { |
| /* for unknown reasons blit event can't msaa resolve these formats when tiled |
| * likely related to these formats having different layout from other cpp=2 formats |
| */ |
| case VK_FORMAT_R8G8_UNORM: |
| case VK_FORMAT_R8G8_UINT: |
| case VK_FORMAT_R8G8_SINT: |
| /* TODO: this one should be able to work? */ |
| case VK_FORMAT_D24_UNORM_S8_UINT: |
| return false; |
| default: |
| break; |
| } |
| |
| return true; |
| } |
| |
| void |
| tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| bool force_load) |
| { |
| const struct tu_image_view *iview = |
| cmd->state.framebuffer->attachments[a].attachment; |
| const struct tu_render_pass_attachment *attachment = |
| &cmd->state.pass->attachments[a]; |
| |
| if (attachment->load || force_load) |
| tu_emit_blit(cmd, cs, iview, attachment, false); |
| } |
| |
| void |
| tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, |
| struct tu_cs *cs, |
| uint32_t a, |
| uint32_t gmem_a) |
| { |
| const struct tu_framebuffer *fb = cmd->state.framebuffer; |
| const VkRect2D *render_area = &cmd->state.render_area; |
| struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a]; |
| struct tu_image_view *iview = fb->attachments[a].attachment; |
| struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a]; |
| |
| if (!dst->store) |
| return; |
| |
| uint32_t x1 = render_area->offset.x; |
| uint32_t y1 = render_area->offset.y; |
| uint32_t x2 = x1 + render_area->extent.width; |
| uint32_t y2 = y1 + render_area->extent.height; |
| /* x2/y2 can be unaligned if equal to the size of the image, |
| * since it will write into padding space |
| * the one exception is linear levels which don't have the |
| * required y padding in the layout (except for the last level) |
| */ |
| bool need_y2_align = |
| y2 != iview->extent.height || iview->need_y2_align; |
| |
| bool unaligned = |
| x1 % GMEM_ALIGN_W || (x2 % GMEM_ALIGN_W && x2 != iview->extent.width) || |
| y1 % GMEM_ALIGN_H || (y2 % GMEM_ALIGN_H && need_y2_align); |
| |
| /* use fast path when render area is aligned, except for unsupported resolve cases */ |
| if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) { |
| tu_emit_blit(cmd, cs, iview, src, true); |
| return; |
| } |
| |
| if (dst->samples > 1) { |
| /* I guess we need to use shader path in this case? |
| * need a testcase which fails because of this |
| */ |
| tu_finishme("unaligned store of msaa attachment\n"); |
| return; |
| } |
| |
| r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, true); |
| r2d_dst(cs, iview, 0); |
| r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); |
| |
| tu_cs_emit_regs(cs, |
| A6XX_SP_PS_2D_SRC_INFO( |
| .color_format = tu6_format_texture(src->format, TILE6_2).fmt, |
| .tile_mode = TILE6_2, |
| .srgb = vk_format_is_srgb(src->format), |
| .samples = tu_msaa_samples(src->samples), |
| .samples_average = !vk_format_is_int(src->format), |
| .unk20 = 1, |
| .unk22 = 1), |
| /* note: src size does not matter when not scaling */ |
| A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff), |
| A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset), |
| A6XX_SP_PS_2D_SRC_HI(), |
| A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp)); |
| |
| /* sync GMEM writes with CACHE. */ |
| tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); |
| |
| /* Wait for CACHE_INVALIDATE to land */ |
| tu_cs_emit_wfi(cs); |
| |
| tu_cs_emit_pkt7(cs, CP_BLIT, 1); |
| tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); |
| |
| /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to |
| * sysmem, and we generally assume that GMEM renderpasses leave their |
| * results in sysmem, so we need to flush manually here. |
| */ |
| tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); |
| } |