| /* |
| * Copyright 2020 Advanced Micro Devices, Inc. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * on the rights to use, copy, modify, merge, publish, distribute, sub |
| * license, and/or sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| #include "si_pipe.h" |
| #include "si_shader_internal.h" |
| #include "sid.h" |
| #include "util/u_memory.h" |
| |
| static LLVMValueRef unpack_sint16(struct si_shader_context *ctx, LLVMValueRef i32, unsigned index) |
| { |
| assert(index <= 1); |
| |
| if (index == 1) |
| return LLVMBuildAShr(ctx->ac.builder, i32, LLVMConstInt(ctx->ac.i32, 16, 0), ""); |
| |
| return LLVMBuildSExt(ctx->ac.builder, LLVMBuildTrunc(ctx->ac.builder, i32, ctx->ac.i16, ""), |
| ctx->ac.i32, ""); |
| } |
| |
| static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, LLVMValueRef out[4]) |
| { |
| const struct si_shader_info *info = &ctx->shader->selector->info; |
| unsigned vs_blit_property = info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]; |
| |
| if (vs_blit_property) { |
| LLVMValueRef vertex_id = ctx->abi.vertex_id; |
| LLVMValueRef sel_x1 = |
| LLVMBuildICmp(ctx->ac.builder, LLVMIntULE, vertex_id, ctx->ac.i32_1, ""); |
| /* Use LLVMIntNE, because we have 3 vertices and only |
| * the middle one should use y2. |
| */ |
| LLVMValueRef sel_y1 = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, vertex_id, ctx->ac.i32_1, ""); |
| |
| unsigned param_vs_blit_inputs = ctx->vs_blit_inputs.arg_index; |
| if (input_index == 0) { |
| /* Position: */ |
| LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs); |
| LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 1); |
| |
| LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0); |
| LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1); |
| LLVMValueRef x2 = unpack_sint16(ctx, x2y2, 0); |
| LLVMValueRef y2 = unpack_sint16(ctx, x2y2, 1); |
| |
| LLVMValueRef x = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, ""); |
| LLVMValueRef y = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, ""); |
| |
| out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->ac.f32, ""); |
| out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->ac.f32, ""); |
| out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 2); |
| out[3] = ctx->ac.f32_1; |
| return; |
| } |
| |
| /* Color or texture coordinates: */ |
| assert(input_index == 1); |
| |
| if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) { |
| for (int i = 0; i < 4; i++) { |
| out[i] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3 + i); |
| } |
| } else { |
| assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD); |
| LLVMValueRef x1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3); |
| LLVMValueRef y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 4); |
| LLVMValueRef x2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 5); |
| LLVMValueRef y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 6); |
| |
| out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, ""); |
| out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, ""); |
| out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 7); |
| out[3] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 8); |
| } |
| return; |
| } |
| |
| unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs; |
| union si_vs_fix_fetch fix_fetch; |
| LLVMValueRef vb_desc; |
| LLVMValueRef vertex_index; |
| LLVMValueRef tmp; |
| |
| if (input_index < num_vbos_in_user_sgprs) { |
| vb_desc = ac_get_arg(&ctx->ac, ctx->vb_descriptors[input_index]); |
| } else { |
| unsigned index = input_index - num_vbos_in_user_sgprs; |
| vb_desc = ac_build_load_to_sgpr(&ctx->ac, ac_get_arg(&ctx->ac, ctx->vertex_buffers), |
| LLVMConstInt(ctx->ac.i32, index, 0)); |
| } |
| |
| vertex_index = LLVMGetParam(ctx->main_fn, ctx->vertex_index0.arg_index + input_index); |
| |
| /* Use the open-coded implementation for all loads of doubles and |
| * of dword-sized data that needs fixups. We need to insert conversion |
| * code anyway, and the amd/common code does it for us. |
| * |
| * Note: On LLVM <= 8, we can only open-code formats with |
| * channel size >= 4 bytes. |
| */ |
| bool opencode = ctx->shader->key.mono.vs_fetch_opencode & (1 << input_index); |
| fix_fetch.bits = ctx->shader->key.mono.vs_fix_fetch[input_index].bits; |
| if (opencode || (fix_fetch.u.log_size == 3 && fix_fetch.u.format == AC_FETCH_FORMAT_FLOAT) || |
| (fix_fetch.u.log_size == 2)) { |
| tmp = ac_build_opencoded_load_format(&ctx->ac, fix_fetch.u.log_size, |
| fix_fetch.u.num_channels_m1 + 1, fix_fetch.u.format, |
| fix_fetch.u.reverse, !opencode, vb_desc, vertex_index, |
| ctx->ac.i32_0, ctx->ac.i32_0, 0, true); |
| for (unsigned i = 0; i < 4; ++i) |
| out[i] = |
| LLVMBuildExtractElement(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, i, false), ""); |
| return; |
| } |
| |
| /* Do multiple loads for special formats. */ |
| unsigned required_channels = util_last_bit(info->input_usage_mask[input_index]); |
| LLVMValueRef fetches[4]; |
| unsigned num_fetches; |
| unsigned fetch_stride; |
| unsigned channels_per_fetch; |
| |
| if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2) { |
| num_fetches = MIN2(required_channels, 3); |
| fetch_stride = 1 << fix_fetch.u.log_size; |
| channels_per_fetch = 1; |
| } else { |
| num_fetches = 1; |
| fetch_stride = 0; |
| channels_per_fetch = required_channels; |
| } |
| |
| for (unsigned i = 0; i < num_fetches; ++i) { |
| LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0); |
| fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset, |
| channels_per_fetch, 0, true, false); |
| } |
| |
| if (num_fetches == 1 && channels_per_fetch > 1) { |
| LLVMValueRef fetch = fetches[0]; |
| for (unsigned i = 0; i < channels_per_fetch; ++i) { |
| tmp = LLVMConstInt(ctx->ac.i32, i, false); |
| fetches[i] = LLVMBuildExtractElement(ctx->ac.builder, fetch, tmp, ""); |
| } |
| num_fetches = channels_per_fetch; |
| channels_per_fetch = 1; |
| } |
| |
| for (unsigned i = num_fetches; i < 4; ++i) |
| fetches[i] = LLVMGetUndef(ctx->ac.f32); |
| |
| if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2 && required_channels == 4) { |
| if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT || fix_fetch.u.format == AC_FETCH_FORMAT_SINT) |
| fetches[3] = ctx->ac.i32_1; |
| else |
| fetches[3] = ctx->ac.f32_1; |
| } else if (fix_fetch.u.log_size == 3 && |
| (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM || |
| fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED || |
| fix_fetch.u.format == AC_FETCH_FORMAT_SINT) && |
| required_channels == 4) { |
| /* For 2_10_10_10, the hardware returns an unsigned value; |
| * convert it to a signed one. |
| */ |
| LLVMValueRef tmp = fetches[3]; |
| LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0); |
| |
| /* First, recover the sign-extended signed integer value. */ |
| if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) |
| tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, ctx->ac.i32, ""); |
| else |
| tmp = ac_to_integer(&ctx->ac, tmp); |
| |
| /* For the integer-like cases, do a natural sign extension. |
| * |
| * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 |
| * and happen to contain 0, 1, 2, 3 as the two LSBs of the |
| * exponent. |
| */ |
| tmp = LLVMBuildShl( |
| ctx->ac.builder, tmp, |
| fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, ""); |
| tmp = LLVMBuildAShr(ctx->ac.builder, tmp, c30, ""); |
| |
| /* Convert back to the right type. */ |
| if (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM) { |
| LLVMValueRef clamp; |
| LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0); |
| tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, ""); |
| clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, tmp, neg_one, ""); |
| tmp = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, tmp, ""); |
| } else if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) { |
| tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, ""); |
| } |
| |
| fetches[3] = tmp; |
| } |
| |
| for (unsigned i = 0; i < 4; ++i) |
| out[i] = ac_to_float(&ctx->ac, fetches[i]); |
| } |
| |
| static void declare_input_vs(struct si_shader_context *ctx, unsigned input_index) |
| { |
| LLVMValueRef input[4]; |
| |
| load_input_vs(ctx, input_index / 4, input); |
| |
| for (unsigned chan = 0; chan < 4; chan++) { |
| ctx->inputs[input_index + chan] = |
| LLVMBuildBitCast(ctx->ac.builder, input[chan], ctx->ac.i32, ""); |
| } |
| } |
| |
| void si_llvm_load_vs_inputs(struct si_shader_context *ctx, struct nir_shader *nir) |
| { |
| uint64_t processed_inputs = 0; |
| |
| nir_foreach_shader_in_variable (variable, nir) { |
| unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); |
| unsigned input_idx = variable->data.driver_location; |
| unsigned loc = variable->data.location; |
| |
| for (unsigned i = 0; i < attrib_count; i++) { |
| /* Packed components share the same location so skip |
| * them if we have already processed the location. |
| */ |
| if (processed_inputs & ((uint64_t)1 << (loc + i))) { |
| input_idx += 4; |
| continue; |
| } |
| |
| declare_input_vs(ctx, input_idx); |
| if (glsl_type_is_dual_slot(variable->type)) { |
| input_idx += 4; |
| declare_input_vs(ctx, input_idx); |
| } |
| |
| processed_inputs |= ((uint64_t)1 << (loc + i)); |
| input_idx += 4; |
| } |
| } |
| } |
| |
| void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef const *so_buffers, |
| LLVMValueRef const *so_write_offsets, |
| struct pipe_stream_output *stream_out, |
| struct si_shader_output_values *shader_out) |
| { |
| unsigned buf_idx = stream_out->output_buffer; |
| unsigned start = stream_out->start_component; |
| unsigned num_comps = stream_out->num_components; |
| LLVMValueRef out[4]; |
| |
| assert(num_comps && num_comps <= 4); |
| if (!num_comps || num_comps > 4) |
| return; |
| |
| /* Load the output as int. */ |
| for (int j = 0; j < num_comps; j++) { |
| assert(stream_out->stream == shader_out->vertex_stream[start + j]); |
| |
| out[j] = ac_to_integer(&ctx->ac, shader_out->values[start + j]); |
| } |
| |
| /* Pack the output. */ |
| LLVMValueRef vdata = NULL; |
| |
| switch (num_comps) { |
| case 1: /* as i32 */ |
| vdata = out[0]; |
| break; |
| case 2: /* as v2i32 */ |
| case 3: /* as v3i32 */ |
| if (ac_has_vec3_support(ctx->screen->info.chip_class, false)) { |
| vdata = ac_build_gather_values(&ctx->ac, out, num_comps); |
| break; |
| } |
| /* as v4i32 (aligned to 4) */ |
| out[3] = LLVMGetUndef(ctx->ac.i32); |
| /* fall through */ |
| case 4: /* as v4i32 */ |
| vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps)); |
| break; |
| } |
| |
| ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, num_comps, |
| so_write_offsets[buf_idx], ctx->ac.i32_0, stream_out->dst_offset * 4, |
| ac_glc | ac_slc); |
| } |
| |
| /** |
| * Write streamout data to buffers for vertex stream @p stream (different |
| * vertex streams can occur for GS copy shaders). |
| */ |
| void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, |
| unsigned noutput, unsigned stream) |
| { |
| struct si_shader_selector *sel = ctx->shader->selector; |
| struct pipe_stream_output_info *so = &sel->so; |
| LLVMBuilderRef builder = ctx->ac.builder; |
| int i; |
| |
| /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ |
| LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->streamout_config, 16, 7); |
| |
| LLVMValueRef tid = ac_get_thread_id(&ctx->ac); |
| |
| /* can_emit = tid < so_vtx_count; */ |
| LLVMValueRef can_emit = LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); |
| |
| /* Emit the streamout code conditionally. This actually avoids |
| * out-of-bounds buffer access. The hw tells us via the SGPR |
| * (so_vtx_count) which threads are allowed to emit streamout data. */ |
| ac_build_ifcc(&ctx->ac, can_emit, 6501); |
| { |
| /* The buffer offset is computed as follows: |
| * ByteOffset = streamout_offset[buffer_id]*4 + |
| * (streamout_write_index + thread_id)*stride[buffer_id] + |
| * attrib_offset |
| */ |
| |
| LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->streamout_write_index); |
| |
| /* Compute (streamout_write_index + thread_id). */ |
| so_write_index = LLVMBuildAdd(builder, so_write_index, tid, ""); |
| |
| /* Load the descriptor and compute the write offset for each |
| * enabled buffer. */ |
| LLVMValueRef so_write_offset[4] = {}; |
| LLVMValueRef so_buffers[4]; |
| LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers); |
| |
| for (i = 0; i < 4; i++) { |
| if (!so->stride[i]) |
| continue; |
| |
| LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_VS_STREAMOUT_BUF0 + i, 0); |
| |
| so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); |
| |
| LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->streamout_offset[i]); |
| so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, 0), ""); |
| |
| so_write_offset[i] = ac_build_imad( |
| &ctx->ac, so_write_index, LLVMConstInt(ctx->ac.i32, so->stride[i] * 4, 0), so_offset); |
| } |
| |
| /* Write streamout data. */ |
| for (i = 0; i < so->num_outputs; i++) { |
| unsigned reg = so->output[i].register_index; |
| |
| if (reg >= noutput) |
| continue; |
| |
| if (stream != so->output[i].stream) |
| continue; |
| |
| si_llvm_streamout_store_output(ctx, so_buffers, so_write_offset, &so->output[i], |
| &outputs[reg]); |
| } |
| } |
| ac_build_endif(&ctx->ac, 6501); |
| } |
| |
| static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, struct ac_export_args *pos, |
| LLVMValueRef *out_elts) |
| { |
| unsigned reg_index; |
| unsigned chan; |
| unsigned const_chan; |
| LLVMValueRef base_elt; |
| LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers); |
| LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0); |
| LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index); |
| |
| for (reg_index = 0; reg_index < 2; reg_index++) { |
| struct ac_export_args *args = &pos[2 + reg_index]; |
| |
| args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMConstReal(ctx->ac.f32, 0.0f); |
| |
| /* Compute dot products of position and user clip plane vectors */ |
| for (chan = 0; chan < 4; chan++) { |
| for (const_chan = 0; const_chan < 4; const_chan++) { |
| LLVMValueRef addr = |
| LLVMConstInt(ctx->ac.i32, ((reg_index * 4 + chan) * 4 + const_chan) * 4, 0); |
| base_elt = si_buffer_load_const(ctx, const_resource, addr); |
| args->out[chan] = |
| ac_build_fmad(&ctx->ac, base_elt, out_elts[const_chan], args->out[chan]); |
| } |
| } |
| |
| args->enabled_channels = 0xf; |
| args->valid_mask = 0; |
| args->done = 0; |
| args->target = V_008DFC_SQ_EXP_POS + 2 + reg_index; |
| args->compr = 0; |
| } |
| } |
| |
| /* Initialize arguments for the shader export intrinsic */ |
| static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, LLVMValueRef *values, |
| unsigned target, struct ac_export_args *args) |
| { |
| args->enabled_channels = 0xf; /* writemask - default is 0xf */ |
| args->valid_mask = 0; /* Specify whether the EXEC mask represents the valid mask */ |
| args->done = 0; /* Specify whether this is the last export */ |
| args->target = target; /* Specify the target we are exporting */ |
| args->compr = false; |
| |
| memcpy(&args->out[0], values, sizeof(values[0]) * 4); |
| } |
| |
| static void si_export_param(struct si_shader_context *ctx, unsigned index, LLVMValueRef *values) |
| { |
| struct ac_export_args args; |
| |
| si_llvm_init_vs_export_args(ctx, values, V_008DFC_SQ_EXP_PARAM + index, &args); |
| ac_build_export(&ctx->ac, &args); |
| } |
| |
| static void si_build_param_exports(struct si_shader_context *ctx, |
| struct si_shader_output_values *outputs, unsigned noutput) |
| { |
| struct si_shader *shader = ctx->shader; |
| unsigned param_count = 0; |
| |
| for (unsigned i = 0; i < noutput; i++) { |
| unsigned semantic_name = outputs[i].semantic_name; |
| unsigned semantic_index = outputs[i].semantic_index; |
| |
| if (outputs[i].vertex_stream[0] != 0 && outputs[i].vertex_stream[1] != 0 && |
| outputs[i].vertex_stream[2] != 0 && outputs[i].vertex_stream[3] != 0) |
| continue; |
| |
| switch (semantic_name) { |
| case TGSI_SEMANTIC_LAYER: |
| case TGSI_SEMANTIC_VIEWPORT_INDEX: |
| case TGSI_SEMANTIC_CLIPDIST: |
| case TGSI_SEMANTIC_COLOR: |
| case TGSI_SEMANTIC_BCOLOR: |
| case TGSI_SEMANTIC_PRIMID: |
| case TGSI_SEMANTIC_FOG: |
| case TGSI_SEMANTIC_TEXCOORD: |
| case TGSI_SEMANTIC_GENERIC: |
| break; |
| default: |
| continue; |
| } |
| |
| if ((semantic_name != TGSI_SEMANTIC_GENERIC || semantic_index < SI_MAX_IO_GENERIC) && |
| shader->key.opt.kill_outputs & |
| (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index, true))) |
| continue; |
| |
| si_export_param(ctx, param_count, outputs[i].values); |
| |
| assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); |
| shader->info.vs_output_param_offset[i] = param_count++; |
| } |
| |
| shader->info.nr_param_exports = param_count; |
| } |
| |
| /** |
| * Vertex color clamping. |
| * |
| * This uses a state constant loaded in a user data SGPR and |
| * an IF statement is added that clamps all colors if the constant |
| * is true. |
| */ |
| static void si_vertex_color_clamping(struct si_shader_context *ctx, |
| struct si_shader_output_values *outputs, unsigned noutput) |
| { |
| LLVMValueRef addr[SI_MAX_VS_OUTPUTS][4]; |
| bool has_colors = false; |
| |
| /* Store original colors to alloca variables. */ |
| for (unsigned i = 0; i < noutput; i++) { |
| if (outputs[i].semantic_name != TGSI_SEMANTIC_COLOR && |
| outputs[i].semantic_name != TGSI_SEMANTIC_BCOLOR) |
| continue; |
| |
| for (unsigned j = 0; j < 4; j++) { |
| addr[i][j] = ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); |
| LLVMBuildStore(ctx->ac.builder, outputs[i].values[j], addr[i][j]); |
| } |
| has_colors = true; |
| } |
| |
| if (!has_colors) |
| return; |
| |
| /* The state is in the first bit of the user SGPR. */ |
| LLVMValueRef cond = ac_get_arg(&ctx->ac, ctx->vs_state_bits); |
| cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, ""); |
| |
| ac_build_ifcc(&ctx->ac, cond, 6502); |
| |
| /* Store clamped colors to alloca variables within the conditional block. */ |
| for (unsigned i = 0; i < noutput; i++) { |
| if (outputs[i].semantic_name != TGSI_SEMANTIC_COLOR && |
| outputs[i].semantic_name != TGSI_SEMANTIC_BCOLOR) |
| continue; |
| |
| for (unsigned j = 0; j < 4; j++) { |
| LLVMBuildStore(ctx->ac.builder, ac_build_clamp(&ctx->ac, outputs[i].values[j]), |
| addr[i][j]); |
| } |
| } |
| ac_build_endif(&ctx->ac, 6502); |
| |
| /* Load clamped colors */ |
| for (unsigned i = 0; i < noutput; i++) { |
| if (outputs[i].semantic_name != TGSI_SEMANTIC_COLOR && |
| outputs[i].semantic_name != TGSI_SEMANTIC_BCOLOR) |
| continue; |
| |
| for (unsigned j = 0; j < 4; j++) { |
| outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addr[i][j], ""); |
| } |
| } |
| } |
| |
| /* Generate export instructions for hardware VS shader stage or NGG GS stage |
| * (position and parameter data only). |
| */ |
| void si_llvm_build_vs_exports(struct si_shader_context *ctx, |
| struct si_shader_output_values *outputs, unsigned noutput) |
| { |
| struct si_shader *shader = ctx->shader; |
| struct ac_export_args pos_args[4] = {}; |
| LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, |
| viewport_index_value = NULL; |
| unsigned pos_idx; |
| int i; |
| |
| si_vertex_color_clamping(ctx, outputs, noutput); |
| |
| /* Build position exports. */ |
| for (i = 0; i < noutput; i++) { |
| switch (outputs[i].semantic_name) { |
| case TGSI_SEMANTIC_POSITION: |
| si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS, &pos_args[0]); |
| break; |
| case TGSI_SEMANTIC_PSIZE: |
| psize_value = outputs[i].values[0]; |
| break; |
| case TGSI_SEMANTIC_LAYER: |
| layer_value = outputs[i].values[0]; |
| break; |
| case TGSI_SEMANTIC_VIEWPORT_INDEX: |
| viewport_index_value = outputs[i].values[0]; |
| break; |
| case TGSI_SEMANTIC_EDGEFLAG: |
| edgeflag_value = outputs[i].values[0]; |
| break; |
| case TGSI_SEMANTIC_CLIPDIST: |
| if (!shader->key.opt.clip_disable) { |
| unsigned index = 2 + outputs[i].semantic_index; |
| si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + index, |
| &pos_args[index]); |
| } |
| break; |
| case TGSI_SEMANTIC_CLIPVERTEX: |
| if (!shader->key.opt.clip_disable) { |
| si_llvm_emit_clipvertex(ctx, pos_args, outputs[i].values); |
| } |
| break; |
| } |
| } |
| |
| /* We need to add the position output manually if it's missing. */ |
| if (!pos_args[0].out[0]) { |
| pos_args[0].enabled_channels = 0xf; /* writemask */ |
| pos_args[0].valid_mask = 0; /* EXEC mask */ |
| pos_args[0].done = 0; /* last export? */ |
| pos_args[0].target = V_008DFC_SQ_EXP_POS; |
| pos_args[0].compr = 0; /* COMPR flag */ |
| pos_args[0].out[0] = ctx->ac.f32_0; /* X */ |
| pos_args[0].out[1] = ctx->ac.f32_0; /* Y */ |
| pos_args[0].out[2] = ctx->ac.f32_0; /* Z */ |
| pos_args[0].out[3] = ctx->ac.f32_1; /* W */ |
| } |
| |
| bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg; |
| |
| /* Write the misc vector (point size, edgeflag, layer, viewport). */ |
| if (shader->selector->info.writes_psize || pos_writes_edgeflag || |
| shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) { |
| pos_args[1].enabled_channels = shader->selector->info.writes_psize | |
| (pos_writes_edgeflag << 1) | |
| (shader->selector->info.writes_layer << 2); |
| |
| pos_args[1].valid_mask = 0; /* EXEC mask */ |
| pos_args[1].done = 0; /* last export? */ |
| pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; |
| pos_args[1].compr = 0; /* COMPR flag */ |
| pos_args[1].out[0] = ctx->ac.f32_0; /* X */ |
| pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ |
| pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ |
| pos_args[1].out[3] = ctx->ac.f32_0; /* W */ |
| |
| if (shader->selector->info.writes_psize) |
| pos_args[1].out[0] = psize_value; |
| |
| if (pos_writes_edgeflag) { |
| /* The output is a float, but the hw expects an integer |
| * with the first bit containing the edge flag. */ |
| edgeflag_value = LLVMBuildFPToUI(ctx->ac.builder, edgeflag_value, ctx->ac.i32, ""); |
| edgeflag_value = ac_build_umin(&ctx->ac, edgeflag_value, ctx->ac.i32_1); |
| |
| /* The LLVM intrinsic expects a float. */ |
| pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value); |
| } |
| |
| if (ctx->screen->info.chip_class >= GFX9) { |
| /* GFX9 has the layer in out.z[10:0] and the viewport |
| * index in out.z[19:16]. |
| */ |
| if (shader->selector->info.writes_layer) |
| pos_args[1].out[2] = layer_value; |
| |
| if (shader->selector->info.writes_viewport_index) { |
| LLVMValueRef v = viewport_index_value; |
| |
| v = ac_to_integer(&ctx->ac, v); |
| v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, 0), ""); |
| v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); |
| pos_args[1].out[2] = ac_to_float(&ctx->ac, v); |
| pos_args[1].enabled_channels |= 1 << 2; |
| } |
| } else { |
| if (shader->selector->info.writes_layer) |
| pos_args[1].out[2] = layer_value; |
| |
| if (shader->selector->info.writes_viewport_index) { |
| pos_args[1].out[3] = viewport_index_value; |
| pos_args[1].enabled_channels |= 1 << 3; |
| } |
| } |
| } |
| |
| for (i = 0; i < 4; i++) |
| if (pos_args[i].out[0]) |
| shader->info.nr_pos_exports++; |
| |
| /* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang. |
| * Setting valid_mask=1 prevents it and has no other effect. |
| */ |
| if (ctx->screen->info.chip_class == GFX10) |
| pos_args[0].valid_mask = 1; |
| |
| pos_idx = 0; |
| for (i = 0; i < 4; i++) { |
| if (!pos_args[i].out[0]) |
| continue; |
| |
| /* Specify the target we are exporting */ |
| pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; |
| |
| if (pos_idx == shader->info.nr_pos_exports) |
| /* Specify that this is the last export */ |
| pos_args[i].done = 1; |
| |
| ac_build_export(&ctx->ac, &pos_args[i]); |
| } |
| |
| /* Build parameter exports. */ |
| si_build_param_exports(ctx, outputs, noutput); |
| } |
| |
| void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| struct si_shader_info *info = &ctx->shader->selector->info; |
| struct si_shader_output_values *outputs = NULL; |
| int i, j; |
| |
| assert(!ctx->shader->is_gs_copy_shader); |
| assert(info->num_outputs <= max_outputs); |
| |
| outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0])); |
| |
| for (i = 0; i < info->num_outputs; i++) { |
| outputs[i].semantic_name = info->output_semantic_name[i]; |
| outputs[i].semantic_index = info->output_semantic_index[i]; |
| |
| for (j = 0; j < 4; j++) { |
| outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], ""); |
| outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3; |
| } |
| } |
| |
| if (!ctx->screen->use_ngg_streamout && ctx->shader->selector->so.num_outputs) |
| si_llvm_emit_streamout(ctx, outputs, i, 0); |
| |
| /* Export PrimitiveID. */ |
| if (ctx->shader->key.mono.u.vs_export_prim_id) { |
| outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID; |
| outputs[i].semantic_index = 0; |
| outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0)); |
| for (j = 1; j < 4; j++) |
| outputs[i].values[j] = LLVMConstReal(ctx->ac.f32, 0); |
| |
| memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream)); |
| i++; |
| } |
| |
| si_llvm_build_vs_exports(ctx, outputs, i); |
| FREE(outputs); |
| } |
| |
| static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, |
| LLVMValueRef *addrs) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| struct si_shader_info *info = &ctx->shader->selector->info; |
| LLVMValueRef pos[4] = {}; |
| |
| assert(info->num_outputs <= max_outputs); |
| |
| for (unsigned i = 0; i < info->num_outputs; i++) { |
| if (info->output_semantic_name[i] != TGSI_SEMANTIC_POSITION) |
| continue; |
| |
| for (unsigned chan = 0; chan < 4; chan++) |
| pos[chan] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); |
| break; |
| } |
| assert(pos[0] != NULL); |
| |
| /* Return the position output. */ |
| LLVMValueRef ret = ctx->return_value; |
| for (unsigned chan = 0; chan < 4; chan++) |
| ret = LLVMBuildInsertValue(ctx->ac.builder, ret, pos[chan], chan, ""); |
| ctx->return_value = ret; |
| } |
| |
| /** |
| * Build the vertex shader prolog function. |
| * |
| * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values). |
| * All inputs are returned unmodified. The vertex load indices are |
| * stored after them, which will be used by the API VS for fetching inputs. |
| * |
| * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are: |
| * input_v0, |
| * input_v1, |
| * input_v2, |
| * input_v3, |
| * (VertexID + BaseVertex), |
| * (InstanceID + StartInstance), |
| * (InstanceID / 2 + StartInstance) |
| */ |
| void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key) |
| { |
| LLVMTypeRef *returns; |
| LLVMValueRef ret, func; |
| int num_returns, i; |
| unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs; |
| unsigned num_input_vgprs = |
| key->vs_prolog.num_merged_next_stage_vgprs + 4 + (key->vs_prolog.has_ngg_cull_inputs ? 1 : 0); |
| struct ac_arg input_sgpr_param[key->vs_prolog.num_input_sgprs]; |
| struct ac_arg input_vgpr_param[10]; |
| LLVMValueRef input_vgprs[10]; |
| unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + num_input_vgprs; |
| unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0; |
| |
| memset(&ctx->args, 0, sizeof(ctx->args)); |
| |
| /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ |
| returns = alloca((num_all_input_regs + key->vs_prolog.num_inputs) * sizeof(LLVMTypeRef)); |
| num_returns = 0; |
| |
| /* Declare input and output SGPRs. */ |
| for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &input_sgpr_param[i]); |
| returns[num_returns++] = ctx->ac.i32; |
| } |
| |
| struct ac_arg merged_wave_info = input_sgpr_param[3]; |
| |
| /* Preloaded VGPRs (outputs must be floats) */ |
| for (i = 0; i < num_input_vgprs; i++) { |
| ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &input_vgpr_param[i]); |
| returns[num_returns++] = ctx->ac.f32; |
| } |
| |
| /* Vertex load indices. */ |
| for (i = 0; i < key->vs_prolog.num_inputs; i++) |
| returns[num_returns++] = ctx->ac.f32; |
| |
| /* Create the function. */ |
| si_llvm_create_func(ctx, "vs_prolog", returns, num_returns, 0); |
| func = ctx->main_fn; |
| |
| for (i = 0; i < num_input_vgprs; i++) { |
| input_vgprs[i] = ac_get_arg(&ctx->ac, input_vgpr_param[i]); |
| } |
| |
| if (key->vs_prolog.num_merged_next_stage_vgprs) { |
| if (!key->vs_prolog.is_monolithic) |
| si_init_exec_from_input(ctx, merged_wave_info, 0); |
| |
| if (key->vs_prolog.as_ls && ctx->screen->info.has_ls_vgpr_init_bug) { |
| /* If there are no HS threads, SPI loads the LS VGPRs |
| * starting at VGPR 0. Shift them back to where they |
| * belong. |
| */ |
| LLVMValueRef has_hs_threads = |
| LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, |
| si_unpack_param(ctx, input_sgpr_param[3], 8, 8), ctx->ac.i32_0, ""); |
| |
| for (i = 4; i > 0; --i) { |
| input_vgprs[i + 1] = LLVMBuildSelect(ctx->ac.builder, has_hs_threads, |
| input_vgprs[i + 1], input_vgprs[i - 1], ""); |
| } |
| } |
| } |
| |
| if (key->vs_prolog.gs_fast_launch_tri_list || key->vs_prolog.gs_fast_launch_tri_strip) { |
| LLVMValueRef wave_id, thread_id_in_tg; |
| |
| wave_id = si_unpack_param(ctx, input_sgpr_param[3], 24, 4); |
| thread_id_in_tg = |
| ac_build_imad(&ctx->ac, wave_id, LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), |
| ac_get_thread_id(&ctx->ac)); |
| |
| /* The GS fast launch initializes all VGPRs to the value of |
| * the first thread, so we have to add the thread ID. |
| * |
| * Only these are initialized by the hw: |
| * VGPR2: Base Primitive ID |
| * VGPR5: Base Vertex ID |
| * VGPR6: Instance ID |
| */ |
| |
| /* Put the vertex thread IDs into VGPRs as-is instead of packing them. |
| * The NGG cull shader will read them from there. |
| */ |
| if (key->vs_prolog.gs_fast_launch_tri_list) { |
| input_vgprs[0] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx01_offset */ |
| LLVMConstInt(ctx->ac.i32, 3, 0), /* Vertex 0 */ |
| LLVMConstInt(ctx->ac.i32, 0, 0)); |
| input_vgprs[1] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx23_offset */ |
| LLVMConstInt(ctx->ac.i32, 3, 0), /* Vertex 1 */ |
| LLVMConstInt(ctx->ac.i32, 1, 0)); |
| input_vgprs[4] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx45_offset */ |
| LLVMConstInt(ctx->ac.i32, 3, 0), /* Vertex 2 */ |
| LLVMConstInt(ctx->ac.i32, 2, 0)); |
| } else { |
| assert(key->vs_prolog.gs_fast_launch_tri_strip); |
| LLVMBuilderRef builder = ctx->ac.builder; |
| /* Triangle indices: */ |
| LLVMValueRef index[3] = { |
| thread_id_in_tg, |
| LLVMBuildAdd(builder, thread_id_in_tg, LLVMConstInt(ctx->ac.i32, 1, 0), ""), |
| LLVMBuildAdd(builder, thread_id_in_tg, LLVMConstInt(ctx->ac.i32, 2, 0), ""), |
| }; |
| LLVMValueRef is_odd = LLVMBuildTrunc(ctx->ac.builder, thread_id_in_tg, ctx->ac.i1, ""); |
| LLVMValueRef flatshade_first = LLVMBuildICmp( |
| builder, LLVMIntEQ, si_unpack_param(ctx, ctx->vs_state_bits, 4, 2), ctx->ac.i32_0, ""); |
| |
| ac_build_triangle_strip_indices_to_triangle(&ctx->ac, is_odd, flatshade_first, index); |
| input_vgprs[0] = index[0]; |
| input_vgprs[1] = index[1]; |
| input_vgprs[4] = index[2]; |
| } |
| |
| /* Triangles always have all edge flags set initially. */ |
| input_vgprs[3] = LLVMConstInt(ctx->ac.i32, 0x7 << 8, 0); |
| |
| input_vgprs[2] = |
| LLVMBuildAdd(ctx->ac.builder, input_vgprs[2], thread_id_in_tg, ""); /* PrimID */ |
| input_vgprs[5] = |
| LLVMBuildAdd(ctx->ac.builder, input_vgprs[5], thread_id_in_tg, ""); /* VertexID */ |
| input_vgprs[8] = input_vgprs[6]; /* InstanceID */ |
| } |
| |
| unsigned vertex_id_vgpr = first_vs_vgpr; |
| unsigned instance_id_vgpr = ctx->screen->info.chip_class >= GFX10 |
| ? first_vs_vgpr + 3 |
| : first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1); |
| |
| ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr]; |
| ctx->abi.instance_id = input_vgprs[instance_id_vgpr]; |
| |
| /* InstanceID = VertexID >> 16; |
| * VertexID = VertexID & 0xffff; |
| */ |
| if (key->vs_prolog.states.unpack_instance_id_from_vertex_id) { |
| ctx->abi.instance_id = |
| LLVMBuildLShr(ctx->ac.builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 16, 0), ""); |
| ctx->abi.vertex_id = LLVMBuildAnd(ctx->ac.builder, ctx->abi.vertex_id, |
| LLVMConstInt(ctx->ac.i32, 0xffff, 0), ""); |
| } |
| |
| /* Copy inputs to outputs. This should be no-op, as the registers match, |
| * but it will prevent the compiler from overwriting them unintentionally. |
| */ |
| ret = ctx->return_value; |
| for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { |
| LLVMValueRef p = LLVMGetParam(func, i); |
| ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, ""); |
| } |
| for (i = 0; i < num_input_vgprs; i++) { |
| LLVMValueRef p = input_vgprs[i]; |
| |
| if (i == vertex_id_vgpr) |
| p = ctx->abi.vertex_id; |
| else if (i == instance_id_vgpr) |
| p = ctx->abi.instance_id; |
| |
| p = ac_to_float(&ctx->ac, p); |
| ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, key->vs_prolog.num_input_sgprs + i, ""); |
| } |
| |
| /* Compute vertex load indices from instance divisors. */ |
| LLVMValueRef instance_divisor_constbuf = NULL; |
| |
| if (key->vs_prolog.states.instance_divisor_is_fetched) { |
| LLVMValueRef list = si_prolog_get_rw_buffers(ctx); |
| LLVMValueRef buf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_INSTANCE_DIVISORS, 0); |
| instance_divisor_constbuf = ac_build_load_to_sgpr(&ctx->ac, list, buf_index); |
| } |
| |
| for (i = 0; i < key->vs_prolog.num_inputs; i++) { |
| bool divisor_is_one = key->vs_prolog.states.instance_divisor_is_one & (1u << i); |
| bool divisor_is_fetched = key->vs_prolog.states.instance_divisor_is_fetched & (1u << i); |
| LLVMValueRef index = NULL; |
| |
| if (divisor_is_one) { |
| index = ctx->abi.instance_id; |
| } else if (divisor_is_fetched) { |
| LLVMValueRef udiv_factors[4]; |
| |
| for (unsigned j = 0; j < 4; j++) { |
| udiv_factors[j] = si_buffer_load_const(ctx, instance_divisor_constbuf, |
| LLVMConstInt(ctx->ac.i32, i * 16 + j * 4, 0)); |
| udiv_factors[j] = ac_to_integer(&ctx->ac, udiv_factors[j]); |
| } |
| /* The faster NUW version doesn't work when InstanceID == UINT_MAX. |
| * Such InstanceID might not be achievable in a reasonable time though. |
| */ |
| index = ac_build_fast_udiv_nuw(&ctx->ac, ctx->abi.instance_id, udiv_factors[0], |
| udiv_factors[1], udiv_factors[2], udiv_factors[3]); |
| } |
| |
| if (divisor_is_one || divisor_is_fetched) { |
| /* Add StartInstance. */ |
| index = |
| LLVMBuildAdd(ctx->ac.builder, index, |
| LLVMGetParam(ctx->main_fn, user_sgpr_base + SI_SGPR_START_INSTANCE), ""); |
| } else { |
| /* VertexID + BaseVertex */ |
| index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, |
| LLVMGetParam(func, user_sgpr_base + SI_SGPR_BASE_VERTEX), ""); |
| } |
| |
| index = ac_to_float(&ctx->ac, index); |
| ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, ctx->args.arg_count + i, ""); |
| } |
| |
| si_llvm_build_ret(ctx, ret); |
| } |
| |
| static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| |
| /* For non-indexed draws, the base vertex set by the driver |
| * (for direct draws) or the CP (for indirect draws) is the |
| * first vertex ID, but GLSL expects 0 to be returned. |
| */ |
| LLVMValueRef vs_state = ac_get_arg(&ctx->ac, ctx->vs_state_bits); |
| LLVMValueRef indexed; |
| |
| indexed = LLVMBuildLShr(ctx->ac.builder, vs_state, ctx->ac.i32_1, ""); |
| indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, ""); |
| |
| return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex), |
| ctx->ac.i32_0, ""); |
| } |
| |
| void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader) |
| { |
| struct si_shader *shader = ctx->shader; |
| |
| if (shader->key.as_ls) |
| ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue; |
| else if (shader->key.as_es) |
| ctx->abi.emit_outputs = si_llvm_emit_es_epilogue; |
| else if (shader->key.opt.vs_as_prim_discard_cs) |
| ctx->abi.emit_outputs = si_llvm_emit_prim_discard_cs_epilogue; |
| else if (ngg_cull_shader) |
| ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue; |
| else if (shader->key.as_ngg) |
| ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; |
| else |
| ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; |
| |
| ctx->abi.load_base_vertex = get_base_vertex; |
| } |