| /************************************************************************** |
| * |
| * Copyright 2009 VMware, Inc. |
| * Copyright 2007-2008 VMware, Inc. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sub license, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| **************************************************************************/ |
| |
| /** |
| * @file |
| * TGSI to LLVM IR translation -- SoA. |
| * |
| * @author Jose Fonseca <jfonseca@vmware.com> |
| * |
| * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, |
| * Brian Paul, and others. |
| */ |
| |
| #include "pipe/p_config.h" |
| #include "pipe/p_shader_tokens.h" |
| #include "util/u_debug.h" |
| #include "util/u_math.h" |
| #include "util/u_memory.h" |
| #include "tgsi/tgsi_dump.h" |
| #include "tgsi/tgsi_exec.h" |
| #include "tgsi/tgsi_info.h" |
| #include "tgsi/tgsi_parse.h" |
| #include "tgsi/tgsi_util.h" |
| #include "tgsi/tgsi_scan.h" |
| #include "tgsi/tgsi_strings.h" |
| #include "lp_bld_tgsi_action.h" |
| #include "lp_bld_type.h" |
| #include "lp_bld_const.h" |
| #include "lp_bld_arit.h" |
| #include "lp_bld_bitarit.h" |
| #include "lp_bld_gather.h" |
| #include "lp_bld_init.h" |
| #include "lp_bld_logic.h" |
| #include "lp_bld_swizzle.h" |
| #include "lp_bld_flow.h" |
| #include "lp_bld_quad.h" |
| #include "lp_bld_tgsi.h" |
| #include "lp_bld_limits.h" |
| #include "lp_bld_debug.h" |
| #include "lp_bld_printf.h" |
| #include "lp_bld_sample.h" |
| #include "lp_bld_struct.h" |
| |
| /* SM 4.0 says that subroutines can nest 32 deep and |
| * we need one more for our main function */ |
| #define LP_MAX_NUM_FUNCS 33 |
| |
| #define DUMP_GS_EMITS 0 |
| |
| /* |
| * If non-zero, the generated LLVM IR will print intermediate results on every TGSI |
| * instruction. |
| * |
| * TODO: |
| * - take execution masks in consideration |
| * - debug control-flow instructions |
| */ |
| #define DEBUG_EXECUTION 0 |
| |
| |
| /* |
| * Emit code to print a register value. |
| */ |
| static void |
| emit_dump_reg(struct gallivm_state *gallivm, |
| unsigned file, |
| unsigned index, |
| unsigned chan, |
| LLVMValueRef value) |
| { |
| char buf[32]; |
| |
| util_snprintf(buf, sizeof buf, " %s[%u].%c = ", |
| tgsi_file_name(file), |
| index, "xyzw"[chan]); |
| |
| lp_build_print_value(gallivm, buf, value); |
| } |
| |
| /* |
| * Return the context for the current function. |
| * (always 'main', if shader doesn't do any function calls) |
| */ |
| static inline struct function_ctx * |
| func_ctx(struct lp_exec_mask *mask) |
| { |
| assert(mask->function_stack_size > 0); |
| assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); |
| return &mask->function_stack[mask->function_stack_size - 1]; |
| } |
| |
| /* |
| * Returns true if we're in a loop. |
| * It's global, meaning that it returns true even if there's |
| * no loop inside the current function, but we were inside |
| * a loop inside another function, from which this one was called. |
| */ |
| static inline boolean |
| mask_has_loop(struct lp_exec_mask *mask) |
| { |
| int i; |
| for (i = mask->function_stack_size - 1; i >= 0; --i) { |
| const struct function_ctx *ctx = &mask->function_stack[i]; |
| if (ctx->loop_stack_size > 0) |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| /* |
| * Returns true if we're inside a switch statement. |
| * It's global, meaning that it returns true even if there's |
| * no switch in the current function, but we were inside |
| * a switch inside another function, from which this one was called. |
| */ |
| static inline boolean |
| mask_has_switch(struct lp_exec_mask *mask) |
| { |
| int i; |
| for (i = mask->function_stack_size - 1; i >= 0; --i) { |
| const struct function_ctx *ctx = &mask->function_stack[i]; |
| if (ctx->switch_stack_size > 0) |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| /* |
| * Returns true if we're inside a conditional. |
| * It's global, meaning that it returns true even if there's |
| * no conditional in the current function, but we were inside |
| * a conditional inside another function, from which this one was called. |
| */ |
| static inline boolean |
| mask_has_cond(struct lp_exec_mask *mask) |
| { |
| int i; |
| for (i = mask->function_stack_size - 1; i >= 0; --i) { |
| const struct function_ctx *ctx = &mask->function_stack[i]; |
| if (ctx->cond_stack_size > 0) |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| |
| /* |
| * Initialize a function context at the specified index. |
| */ |
| static void |
| lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx) |
| { |
| LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = &mask->function_stack[function_idx]; |
| |
| ctx->cond_stack_size = 0; |
| ctx->loop_stack_size = 0; |
| ctx->switch_stack_size = 0; |
| |
| if (function_idx == 0) { |
| ctx->ret_mask = mask->ret_mask; |
| } |
| |
| ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm, |
| int_type, "looplimiter"); |
| LLVMBuildStore( |
| builder, |
| LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), |
| ctx->loop_limiter); |
| } |
| |
| static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) |
| { |
| mask->bld = bld; |
| mask->has_mask = FALSE; |
| mask->ret_in_main = FALSE; |
| /* For the main function */ |
| mask->function_stack_size = 1; |
| |
| mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); |
| mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = |
| mask->cond_mask = mask->switch_mask = |
| LLVMConstAllOnes(mask->int_vec_type); |
| |
| mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS, |
| sizeof(mask->function_stack[0])); |
| lp_exec_mask_function_init(mask, 0); |
| } |
| |
| static void |
| lp_exec_mask_fini(struct lp_exec_mask *mask) |
| { |
| FREE(mask->function_stack); |
| } |
| |
| static void lp_exec_mask_update(struct lp_exec_mask *mask) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| boolean has_loop_mask = mask_has_loop(mask); |
| boolean has_cond_mask = mask_has_cond(mask); |
| boolean has_switch_mask = mask_has_switch(mask); |
| boolean has_ret_mask = mask->function_stack_size > 1 || |
| mask->ret_in_main; |
| |
| if (has_loop_mask) { |
| /*for loops we need to update the entire mask at runtime */ |
| LLVMValueRef tmp; |
| assert(mask->break_mask); |
| tmp = LLVMBuildAnd(builder, |
| mask->cont_mask, |
| mask->break_mask, |
| "maskcb"); |
| mask->exec_mask = LLVMBuildAnd(builder, |
| mask->cond_mask, |
| tmp, |
| "maskfull"); |
| } else |
| mask->exec_mask = mask->cond_mask; |
| |
| if (has_switch_mask) { |
| mask->exec_mask = LLVMBuildAnd(builder, |
| mask->exec_mask, |
| mask->switch_mask, |
| "switchmask"); |
| } |
| |
| if (has_ret_mask) { |
| mask->exec_mask = LLVMBuildAnd(builder, |
| mask->exec_mask, |
| mask->ret_mask, |
| "callmask"); |
| } |
| |
| mask->has_mask = (has_cond_mask || |
| has_loop_mask || |
| has_switch_mask || |
| has_ret_mask); |
| } |
| |
| static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, |
| LLVMValueRef val) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| |
| if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) { |
| ctx->cond_stack_size++; |
| return; |
| } |
| if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) { |
| assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); |
| } |
| ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask; |
| assert(LLVMTypeOf(val) == mask->int_vec_type); |
| mask->cond_mask = LLVMBuildAnd(builder, |
| mask->cond_mask, |
| val, |
| ""); |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| LLVMValueRef prev_mask; |
| LLVMValueRef inv_mask; |
| |
| assert(ctx->cond_stack_size); |
| if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) |
| return; |
| prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1]; |
| if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) { |
| assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); |
| } |
| |
| inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); |
| |
| mask->cond_mask = LLVMBuildAnd(builder, |
| inv_mask, |
| prev_mask, ""); |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) |
| { |
| struct function_ctx *ctx = func_ctx(mask); |
| assert(ctx->cond_stack_size); |
| --ctx->cond_stack_size; |
| if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) |
| return; |
| mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size]; |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_bgnloop(struct lp_exec_mask *mask) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| |
| if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) { |
| ++ctx->loop_stack_size; |
| return; |
| } |
| |
| ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = |
| ctx->break_type; |
| ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP; |
| |
| ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block; |
| ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask; |
| ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask; |
| ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var; |
| ++ctx->loop_stack_size; |
| |
| ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); |
| LLVMBuildStore(builder, mask->break_mask, ctx->break_var); |
| |
| ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); |
| |
| LLVMBuildBr(builder, ctx->loop_block); |
| LLVMPositionBuilderAtEnd(builder, ctx->loop_block); |
| |
| mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, ""); |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_break(struct lp_exec_mask *mask, |
| struct lp_build_tgsi_context * bld_base) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| |
| if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) { |
| LLVMValueRef exec_mask = LLVMBuildNot(builder, |
| mask->exec_mask, |
| "break"); |
| |
| mask->break_mask = LLVMBuildAnd(builder, |
| mask->break_mask, |
| exec_mask, "break_full"); |
| } |
| else { |
| unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode; |
| boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH || |
| opcode == TGSI_OPCODE_CASE); |
| |
| |
| if (ctx->switch_in_default) { |
| /* |
| * stop default execution but only if this is an unconditional switch. |
| * (The condition here is not perfect since dead code after break is |
| * allowed but should be sufficient since false negatives are just |
| * unoptimized - so we don't have to pre-evaluate that). |
| */ |
| if(break_always && ctx->switch_pc) { |
| bld_base->pc = ctx->switch_pc; |
| return; |
| } |
| } |
| |
| if (break_always) { |
| mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type); |
| } |
| else { |
| LLVMValueRef exec_mask = LLVMBuildNot(builder, |
| mask->exec_mask, |
| "break"); |
| mask->switch_mask = LLVMBuildAnd(builder, |
| mask->switch_mask, |
| exec_mask, "break_switch"); |
| } |
| } |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_continue(struct lp_exec_mask *mask) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| LLVMValueRef exec_mask = LLVMBuildNot(builder, |
| mask->exec_mask, |
| ""); |
| |
| mask->cont_mask = LLVMBuildAnd(builder, |
| mask->cont_mask, |
| exec_mask, ""); |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| |
| static void lp_exec_endloop(struct gallivm_state *gallivm, |
| struct lp_exec_mask *mask) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| LLVMBasicBlockRef endloop; |
| LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); |
| LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, |
| mask->bld->type.width * |
| mask->bld->type.length); |
| LLVMValueRef i1cond, i2cond, icond, limiter; |
| |
| assert(mask->break_mask); |
| |
| |
| assert(ctx->loop_stack_size); |
| if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { |
| --ctx->loop_stack_size; |
| return; |
| } |
| |
| /* |
| * Restore the cont_mask, but don't pop |
| */ |
| mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask; |
| lp_exec_mask_update(mask); |
| |
| /* |
| * Unlike the continue mask, the break_mask must be preserved across loop |
| * iterations |
| */ |
| LLVMBuildStore(builder, mask->break_mask, ctx->break_var); |
| |
| /* Decrement the loop limiter */ |
| limiter = LLVMBuildLoad(builder, ctx->loop_limiter, ""); |
| |
| limiter = LLVMBuildSub( |
| builder, |
| limiter, |
| LLVMConstInt(int_type, 1, false), |
| ""); |
| |
| LLVMBuildStore(builder, limiter, ctx->loop_limiter); |
| |
| /* i1cond = (mask != 0) */ |
| i1cond = LLVMBuildICmp( |
| builder, |
| LLVMIntNE, |
| LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), |
| LLVMConstNull(reg_type), "i1cond"); |
| |
| /* i2cond = (looplimiter > 0) */ |
| i2cond = LLVMBuildICmp( |
| builder, |
| LLVMIntSGT, |
| limiter, |
| LLVMConstNull(int_type), "i2cond"); |
| |
| /* if( i1cond && i2cond ) */ |
| icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); |
| |
| endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); |
| |
| LLVMBuildCondBr(builder, |
| icond, ctx->loop_block, endloop); |
| |
| LLVMPositionBuilderAtEnd(builder, endloop); |
| |
| assert(ctx->loop_stack_size); |
| --ctx->loop_stack_size; |
| mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask; |
| mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask; |
| ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block; |
| ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var; |
| ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + |
| ctx->switch_stack_size]; |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_switch(struct lp_exec_mask *mask, |
| LLVMValueRef switchval) |
| { |
| struct function_ctx *ctx = func_ctx(mask); |
| |
| if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING || |
| ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { |
| ctx->switch_stack_size++; |
| return; |
| } |
| |
| ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = |
| ctx->break_type; |
| ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH; |
| |
| ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask; |
| ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val; |
| ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default; |
| ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default; |
| ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc; |
| ctx->switch_stack_size++; |
| |
| mask->switch_mask = LLVMConstNull(mask->int_vec_type); |
| ctx->switch_val = switchval; |
| ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type); |
| ctx->switch_in_default = false; |
| ctx->switch_pc = 0; |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_endswitch(struct lp_exec_mask *mask, |
| struct lp_build_tgsi_context * bld_base) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| |
| if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
| ctx->switch_stack_size--; |
| return; |
| } |
| |
| /* check if there's deferred default if so do it now */ |
| if (ctx->switch_pc && !ctx->switch_in_default) { |
| LLVMValueRef prevmask, defaultmask; |
| unsigned tmp_pc; |
| prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; |
| defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); |
| mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); |
| ctx->switch_in_default = true; |
| |
| lp_exec_mask_update(mask); |
| |
| assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode == |
| TGSI_OPCODE_DEFAULT); |
| |
| tmp_pc = bld_base->pc; |
| bld_base->pc = ctx->switch_pc; |
| /* |
| * re-purpose switch_pc to point to here again, since we stop execution of |
| * the deferred default after next break. |
| */ |
| ctx->switch_pc = tmp_pc - 1; |
| |
| return; |
| } |
| |
| else if (ctx->switch_pc && ctx->switch_in_default) { |
| assert(bld_base->pc == ctx->switch_pc + 1); |
| } |
| |
| ctx->switch_stack_size--; |
| mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask; |
| ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val; |
| ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default; |
| ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default; |
| ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc; |
| |
| ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size]; |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_case(struct lp_exec_mask *mask, |
| LLVMValueRef caseval) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| |
| LLVMValueRef casemask, prevmask; |
| |
| if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
| return; |
| } |
| |
| /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */ |
| if (!ctx->switch_in_default) { |
| prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; |
| casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val); |
| ctx->switch_mask_default = LLVMBuildOr(builder, casemask, |
| ctx->switch_mask_default, "sw_default_mask"); |
| casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, ""); |
| mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask"); |
| |
| lp_exec_mask_update(mask); |
| } |
| } |
| |
| /* |
| * Analyse default statement in a switch. |
| * \return true if default is last statement, false otherwise |
| * \param default_pc_start contains pc of instruction to jump to |
| * if default wasn't last but there's no |
| * fallthrough into default. |
| */ |
| static boolean default_analyse_is_last(struct lp_exec_mask *mask, |
| struct lp_build_tgsi_context * bld_base, |
| int *default_pc_start) |
| { |
| unsigned pc = bld_base->pc; |
| struct function_ctx *ctx = func_ctx(mask); |
| int curr_switch_stack = ctx->switch_stack_size; |
| |
| if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
| return false; |
| } |
| |
| /* skip over case statements which are together with default */ |
| while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) { |
| pc++; |
| } |
| |
| while (pc != ~0u && pc < bld_base->num_instructions) { |
| unsigned opcode = bld_base->instructions[pc].Instruction.Opcode; |
| switch (opcode) { |
| case TGSI_OPCODE_CASE: |
| if (curr_switch_stack == ctx->switch_stack_size) { |
| *default_pc_start = pc - 1; |
| return false; |
| } |
| break; |
| case TGSI_OPCODE_SWITCH: |
| curr_switch_stack++; |
| break; |
| case TGSI_OPCODE_ENDSWITCH: |
| if (curr_switch_stack == ctx->switch_stack_size) { |
| *default_pc_start = pc - 1; |
| return true; |
| } |
| curr_switch_stack--; |
| break; |
| } |
| pc++; |
| } |
| /* should never arrive here */ |
| assert(0); |
| return true; |
| } |
| |
| static void lp_exec_default(struct lp_exec_mask *mask, |
| struct lp_build_tgsi_context * bld_base) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| |
| int default_exec_pc; |
| boolean default_is_last; |
| |
| if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
| return; |
| } |
| |
| /* |
| * This is a messy opcode, because it may not be always at the end and |
| * there can be fallthrough in and out of it. |
| */ |
| |
| default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc); |
| /* |
| * If it is last statement in switch (note that case statements appearing |
| * "at the same time" as default don't change that) everything is just fine, |
| * update switch mask and go on. This means we can handle default with |
| * fallthrough INTO it without overhead, if it is last. |
| */ |
| if (default_is_last) { |
| LLVMValueRef prevmask, defaultmask; |
| prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; |
| defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); |
| defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, ""); |
| mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); |
| ctx->switch_in_default = true; |
| |
| lp_exec_mask_update(mask); |
| } |
| else { |
| /* |
| * Technically, "case" immediately before default isn't really a |
| * fallthrough, however we still have to count them as such as we |
| * already have updated the masks. |
| * If that happens in practice could add a switch optimizer pass |
| * which just gets rid of all case statements appearing together with |
| * default (or could do switch analysis at switch start time instead). |
| */ |
| unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode; |
| boolean ft_into = (opcode != TGSI_OPCODE_BRK && |
| opcode != TGSI_OPCODE_SWITCH); |
| /* |
| * If it is not last statement and there was no fallthrough into it, |
| * we record the PC and continue execution at next case (again, those |
| * case encountered at the same time don't count). At endswitch |
| * time, we update switchmask, and go back executing the code we skipped |
| * until the next break (possibly re-executing some code with changed mask |
| * if there was a fallthrough out of default). |
| * Finally, if it is not last statement and there was a fallthrough into it, |
| * do the same as with the former case, except instead of skipping the code |
| * just execute it without updating the mask, then go back and re-execute. |
| */ |
| ctx->switch_pc = bld_base->pc; |
| if (!ft_into) { |
| bld_base->pc = default_exec_pc; |
| } |
| } |
| } |
| |
| |
| /* stores val into an address pointed to by dst_ptr. |
| * mask->exec_mask is used to figure out which bits of val |
| * should be stored into the address |
| * (0 means don't store this bit, 1 means do store). |
| */ |
| static void lp_exec_mask_store(struct lp_exec_mask *mask, |
| struct lp_build_context *bld_store, |
| LLVMValueRef val, |
| LLVMValueRef dst_ptr) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL; |
| |
| assert(lp_check_value(bld_store->type, val)); |
| assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind); |
| assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val)); |
| |
| if (exec_mask) { |
| LLVMValueRef res, dst; |
| |
| dst = LLVMBuildLoad(builder, dst_ptr, ""); |
| res = lp_build_select(bld_store, exec_mask, val, dst); |
| LLVMBuildStore(builder, res, dst_ptr); |
| } else |
| LLVMBuildStore(builder, val, dst_ptr); |
| } |
| |
| static void lp_exec_mask_call(struct lp_exec_mask *mask, |
| int func, |
| int *pc) |
| { |
| if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) { |
| return; |
| } |
| |
| lp_exec_mask_function_init(mask, mask->function_stack_size); |
| mask->function_stack[mask->function_stack_size].pc = *pc; |
| mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask; |
| mask->function_stack_size++; |
| *pc = func; |
| } |
| |
| static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) |
| { |
| LLVMBuilderRef builder = mask->bld->gallivm->builder; |
| struct function_ctx *ctx = func_ctx(mask); |
| LLVMValueRef exec_mask; |
| |
| if (ctx->cond_stack_size == 0 && |
| ctx->loop_stack_size == 0 && |
| ctx->switch_stack_size == 0 && |
| mask->function_stack_size == 1) { |
| /* returning from main() */ |
| *pc = -1; |
| return; |
| } |
| |
| if (mask->function_stack_size == 1) { |
| /* |
| * This requires special handling since we need to ensure |
| * we don't drop the mask even if we have no call stack |
| * (e.g. after a ret in a if clause after the endif) |
| */ |
| mask->ret_in_main = TRUE; |
| } |
| |
| exec_mask = LLVMBuildNot(builder, |
| mask->exec_mask, |
| "ret"); |
| |
| mask->ret_mask = LLVMBuildAnd(builder, |
| mask->ret_mask, |
| exec_mask, "ret_full"); |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) |
| { |
| } |
| |
| static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) |
| { |
| struct function_ctx *ctx; |
| |
| assert(mask->function_stack_size > 1); |
| assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); |
| |
| ctx = func_ctx(mask); |
| mask->function_stack_size--; |
| |
| *pc = ctx->pc; |
| mask->ret_mask = ctx->ret_mask; |
| |
| lp_exec_mask_update(mask); |
| } |
| |
| |
| static LLVMValueRef |
| get_file_ptr(struct lp_build_tgsi_soa_context *bld, |
| unsigned file, |
| int index, |
| unsigned chan) |
| { |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS]; |
| LLVMValueRef var_of_array; |
| |
| switch (file) { |
| case TGSI_FILE_TEMPORARY: |
| array_of_vars = bld->temps; |
| var_of_array = bld->temps_array; |
| break; |
| case TGSI_FILE_OUTPUT: |
| array_of_vars = bld->outputs; |
| var_of_array = bld->outputs_array; |
| break; |
| default: |
| assert(0); |
| return NULL; |
| } |
| |
| assert(chan < 4); |
| |
| if (bld->indirect_files & (1 << file)) { |
| LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); |
| return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); |
| } |
| else { |
| assert(index <= bld->bld_base.info->file_max[file]); |
| return array_of_vars[index][chan]; |
| } |
| } |
| |
| |
| /** |
| * Return pointer to a temporary register channel (src or dest). |
| * Note that indirect addressing cannot be handled here. |
| * \param index which temporary register |
| * \param chan which channel of the temp register. |
| */ |
| LLVMValueRef |
| lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, |
| unsigned index, |
| unsigned chan) |
| { |
| return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan); |
| } |
| |
| /** |
| * Return pointer to a output register channel (src or dest). |
| * Note that indirect addressing cannot be handled here. |
| * \param index which output register |
| * \param chan which channel of the output register. |
| */ |
| LLVMValueRef |
| lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, |
| unsigned index, |
| unsigned chan) |
| { |
| return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan); |
| } |
| |
| /* |
| * If we have indirect addressing in outputs copy our alloca array |
| * to the outputs slots specified by the caller to make sure |
| * our outputs are delivered consistently via the same interface. |
| */ |
| static void |
| gather_outputs(struct lp_build_tgsi_soa_context * bld) |
| { |
| if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { |
| unsigned index, chan; |
| assert(bld->bld_base.info->num_outputs <= |
| bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); |
| for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { |
| for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { |
| bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Gather vector. |
| * XXX the lp_build_gather() function should be capable of doing this |
| * with a little work. |
| */ |
| static LLVMValueRef |
| build_gather(struct lp_build_tgsi_context *bld_base, |
| LLVMValueRef base_ptr, |
| LLVMValueRef indexes, |
| LLVMValueRef overflow_mask, |
| LLVMValueRef indexes2) |
| { |
| struct gallivm_state *gallivm = bld_base->base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| struct lp_build_context *uint_bld = &bld_base->uint_bld; |
| struct lp_build_context *bld = &bld_base->base; |
| LLVMValueRef res; |
| unsigned i; |
| |
| if (indexes2) |
| res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); |
| else |
| res = bld->undef; |
| /* |
| * overflow_mask is a vector telling us which channels |
| * in the vector overflowed. We use the overflow behavior for |
| * constant buffers which is defined as: |
| * Out of bounds access to constant buffer returns 0 in all |
| * components. Out of bounds behavior is always with respect |
| * to the size of the buffer bound at that slot. |
| */ |
| |
| if (overflow_mask) { |
| /* |
| * We avoid per-element control flow here (also due to llvm going crazy, |
| * though I suspect it's better anyway since overflow is likely rare). |
| * Note that since we still fetch from buffers even if num_elements was |
| * zero (in this case we'll fetch from index zero) the jit func callers |
| * MUST provide valid fake constant buffers of size 4x32 (the values do |
| * not matter), otherwise we'd still need (not per element though) |
| * control flow. |
| */ |
| indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); |
| if (indexes2) |
| indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2); |
| } |
| |
| /* |
| * Loop over elements of index_vec, load scalar value, insert it into 'res'. |
| */ |
| for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) { |
| LLVMValueRef si, di; |
| LLVMValueRef index; |
| LLVMValueRef scalar_ptr, scalar; |
| |
| di = lp_build_const_int32(bld->gallivm, i); |
| if (indexes2) |
| si = lp_build_const_int32(bld->gallivm, i >> 1); |
| else |
| si = di; |
| |
| if (indexes2 && (i & 1)) { |
| index = LLVMBuildExtractElement(builder, |
| indexes2, si, ""); |
| } else { |
| index = LLVMBuildExtractElement(builder, |
| indexes, si, ""); |
| } |
| scalar_ptr = LLVMBuildGEP(builder, base_ptr, |
| &index, 1, "gather_ptr"); |
| scalar = LLVMBuildLoad(builder, scalar_ptr, ""); |
| |
| res = LLVMBuildInsertElement(builder, res, scalar, di, ""); |
| } |
| |
| if (overflow_mask) { |
| if (indexes2) { |
| res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); |
| overflow_mask = LLVMBuildSExt(builder, overflow_mask, |
| bld_base->dbl_bld.int_vec_type, ""); |
| res = lp_build_select(&bld_base->dbl_bld, overflow_mask, |
| bld_base->dbl_bld.zero, res); |
| } else |
| res = lp_build_select(bld, overflow_mask, bld->zero, res); |
| } |
| |
| return res; |
| } |
| |
| |
| /** |
| * Scatter/store vector. |
| */ |
| static void |
| emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, |
| LLVMValueRef base_ptr, |
| LLVMValueRef indexes, |
| LLVMValueRef values, |
| struct lp_exec_mask *mask) |
| { |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| unsigned i; |
| LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL; |
| |
| /* |
| * Loop over elements of index_vec, store scalar value. |
| */ |
| for (i = 0; i < bld->bld_base.base.type.length; i++) { |
| LLVMValueRef ii = lp_build_const_int32(gallivm, i); |
| LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); |
| LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); |
| LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); |
| LLVMValueRef scalar_pred = pred ? |
| LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; |
| |
| if (0) |
| lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", |
| ii, val, index, scalar_ptr); |
| |
| if (scalar_pred) { |
| LLVMValueRef real_val, dst_val; |
| dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); |
| real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); |
| LLVMBuildStore(builder, real_val, scalar_ptr); |
| } |
| else { |
| LLVMBuildStore(builder, val, scalar_ptr); |
| } |
| } |
| } |
| |
| |
| /** |
| * Read the current value of the ADDR register, convert the floats to |
| * ints, add the base index and return the vector of offsets. |
| * The offsets will be used to index into the constant buffer or |
| * temporary register file. |
| */ |
| static LLVMValueRef |
| get_indirect_index(struct lp_build_tgsi_soa_context *bld, |
| unsigned reg_file, unsigned reg_index, |
| const struct tgsi_ind_register *indirect_reg) |
| { |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; |
| /* always use X component of address register */ |
| unsigned swizzle = indirect_reg->Swizzle; |
| LLVMValueRef base; |
| LLVMValueRef rel; |
| LLVMValueRef max_index; |
| LLVMValueRef index; |
| |
| assert(bld->indirect_files & (1 << reg_file)); |
| |
| base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); |
| |
| assert(swizzle < 4); |
| switch (indirect_reg->File) { |
| case TGSI_FILE_ADDRESS: |
| rel = LLVMBuildLoad(builder, |
| bld->addr[indirect_reg->Index][swizzle], |
| "load addr reg"); |
| /* ADDR LLVM values already have LLVM integer type. */ |
| break; |
| case TGSI_FILE_TEMPORARY: |
| rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle); |
| rel = LLVMBuildLoad(builder, rel, "load temp reg"); |
| /* TEMP LLVM values always have LLVM float type, but for indirection, the |
| * value actually stored is expected to be an integer */ |
| rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, ""); |
| break; |
| default: |
| assert(0); |
| rel = uint_bld->zero; |
| } |
| |
| index = lp_build_add(uint_bld, base, rel); |
| |
| /* |
| * emit_fetch_constant handles constant buffer overflow so this code |
| * is pointless for them. |
| * Furthermore the D3D10 spec in section 6.5 says: |
| * If the constant buffer bound to a slot is larger than the size |
| * declared in the shader for that slot, implementations are allowed |
| * to return incorrect data (not necessarily 0) for indices that are |
| * larger than the declared size but smaller than the buffer size. |
| */ |
| if (reg_file != TGSI_FILE_CONSTANT) { |
| max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, |
| uint_bld->type, |
| bld->bld_base.info->file_max[reg_file]); |
| |
| assert(!uint_bld->type.sign); |
| index = lp_build_min(uint_bld, index, max_index); |
| } |
| |
| return index; |
| } |
| |
| static struct lp_build_context * |
| stype_to_fetch(struct lp_build_tgsi_context * bld_base, |
| enum tgsi_opcode_type stype) |
| { |
| struct lp_build_context *bld_fetch; |
| |
| switch (stype) { |
| case TGSI_TYPE_FLOAT: |
| case TGSI_TYPE_UNTYPED: |
| bld_fetch = &bld_base->base; |
| break; |
| case TGSI_TYPE_UNSIGNED: |
| bld_fetch = &bld_base->uint_bld; |
| break; |
| case TGSI_TYPE_SIGNED: |
| bld_fetch = &bld_base->int_bld; |
| break; |
| case TGSI_TYPE_DOUBLE: |
| bld_fetch = &bld_base->dbl_bld; |
| break; |
| case TGSI_TYPE_UNSIGNED64: |
| bld_fetch = &bld_base->uint64_bld; |
| break; |
| case TGSI_TYPE_SIGNED64: |
| bld_fetch = &bld_base->int64_bld; |
| break; |
| case TGSI_TYPE_VOID: |
| default: |
| assert(0); |
| bld_fetch = NULL; |
| break; |
| } |
| return bld_fetch; |
| } |
| |
| static LLVMValueRef |
| get_soa_array_offsets(struct lp_build_context *uint_bld, |
| LLVMValueRef indirect_index, |
| unsigned chan_index, |
| boolean need_perelement_offset) |
| { |
| struct gallivm_state *gallivm = uint_bld->gallivm; |
| LLVMValueRef chan_vec = |
| lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); |
| LLVMValueRef length_vec = |
| lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); |
| LLVMValueRef index_vec; |
| |
| /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ |
| index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); |
| index_vec = lp_build_add(uint_bld, index_vec, chan_vec); |
| index_vec = lp_build_mul(uint_bld, index_vec, length_vec); |
| |
| if (need_perelement_offset) { |
| LLVMValueRef pixel_offsets; |
| unsigned i; |
| /* build pixel offset vector: {0, 1, 2, 3, ...} */ |
| pixel_offsets = uint_bld->undef; |
| for (i = 0; i < uint_bld->type.length; i++) { |
| LLVMValueRef ii = lp_build_const_int32(gallivm, i); |
| pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets, |
| ii, ii, ""); |
| } |
| index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); |
| } |
| return index_vec; |
| } |
| |
| static LLVMValueRef |
| emit_fetch_constant( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_src_register * reg, |
| enum tgsi_opcode_type stype, |
| unsigned swizzle) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld_base->base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| struct lp_build_context *uint_bld = &bld_base->uint_bld; |
| unsigned dimension = 0; |
| LLVMValueRef consts_ptr; |
| LLVMValueRef num_consts; |
| LLVMValueRef res; |
| |
| /* XXX: Handle fetching xyzw components as a vector */ |
| assert(swizzle != ~0u); |
| |
| if (reg->Register.Dimension) { |
| assert(!reg->Dimension.Indirect); |
| dimension = reg->Dimension.Index; |
| assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); |
| } |
| |
| consts_ptr = bld->consts[dimension]; |
| num_consts = bld->consts_sizes[dimension]; |
| |
| if (reg->Register.Indirect) { |
| LLVMValueRef indirect_index; |
| LLVMValueRef swizzle_vec = |
| lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); |
| LLVMValueRef index_vec; /* index into the const buffer */ |
| LLVMValueRef overflow_mask; |
| LLVMValueRef index_vec2 = NULL; |
| |
| indirect_index = get_indirect_index(bld, |
| reg->Register.File, |
| reg->Register.Index, |
| ®->Indirect); |
| |
| /* All fetches are from the same constant buffer, so |
| * we need to propagate the size to a vector to do a |
| * vector comparison */ |
| num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); |
| /* Construct a boolean vector telling us which channels |
| * overflow the bound constant buffer */ |
| overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, |
| indirect_index, num_consts); |
| |
| /* index_vec = indirect_index * 4 + swizzle */ |
| index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); |
| index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); |
| |
| if (tgsi_type_is_64bit(stype)) { |
| LLVMValueRef swizzle_vec2; |
| swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1); |
| index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2); |
| index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2); |
| } |
| /* Gather values from the constant buffer */ |
| res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2); |
| } |
| else { |
| LLVMValueRef index; /* index into the const buffer */ |
| LLVMValueRef scalar, scalar_ptr; |
| struct lp_build_context *bld_broad = &bld_base->base; |
| index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); |
| |
| scalar_ptr = LLVMBuildGEP(builder, consts_ptr, |
| &index, 1, ""); |
| if (stype == TGSI_TYPE_DOUBLE) { |
| LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); |
| scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); |
| bld_broad = &bld_base->dbl_bld; |
| } else if (stype == TGSI_TYPE_UNSIGNED64) { |
| LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); |
| scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, ""); |
| bld_broad = &bld_base->uint64_bld; |
| } else if (stype == TGSI_TYPE_SIGNED64) { |
| LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); |
| scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, ""); |
| bld_broad = &bld_base->int64_bld; |
| } |
| scalar = LLVMBuildLoad(builder, scalar_ptr, ""); |
| res = lp_build_broadcast_scalar(bld_broad, scalar); |
| } |
| |
| if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) { |
| struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); |
| res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); |
| } |
| |
| return res; |
| } |
| |
| /** |
| * Fetch 64-bit values from two separate channels. |
| * 64-bit values are stored split across two channels, like xy and zw. |
| * This function creates a set of vec_length*2 floats, |
| * extracts the values from the two channels, |
| * puts them in the correct place, then casts to vec_length 64-bits. |
| */ |
| static LLVMValueRef |
| emit_fetch_64bit( |
| struct lp_build_tgsi_context * bld_base, |
| enum tgsi_opcode_type stype, |
| LLVMValueRef input, |
| LLVMValueRef input2) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMValueRef res; |
| struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); |
| int i; |
| LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; |
| int len = bld_base->base.type.length * 2; |
| assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); |
| |
| for (i = 0; i < bld_base->base.type.length * 2; i+=2) { |
| shuffles[i] = lp_build_const_int32(gallivm, i / 2); |
| shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); |
| } |
| res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); |
| |
| return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); |
| } |
| |
| static LLVMValueRef |
| emit_fetch_immediate( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_src_register * reg, |
| enum tgsi_opcode_type stype, |
| unsigned swizzle) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMValueRef res = NULL; |
| |
| if (bld->use_immediates_array || reg->Register.Indirect) { |
| LLVMValueRef imms_array; |
| LLVMTypeRef fptr_type; |
| |
| /* cast imms_array pointer to float* */ |
| fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
| imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, ""); |
| |
| if (reg->Register.Indirect) { |
| LLVMValueRef indirect_index; |
| LLVMValueRef index_vec; /* index into the immediate register array */ |
| LLVMValueRef index_vec2 = NULL; |
| indirect_index = get_indirect_index(bld, |
| reg->Register.File, |
| reg->Register.Index, |
| ®->Indirect); |
| /* |
| * Unlike for other reg classes, adding pixel offsets is unnecessary - |
| * immediates are stored as full vectors (FIXME??? - might be better |
| * to store them the same as constants) but all elements are the same |
| * in any case. |
| */ |
| index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| swizzle, |
| FALSE); |
| if (tgsi_type_is_64bit(stype)) |
| index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| swizzle + 1, |
| FALSE); |
| /* Gather values from the immediate register array */ |
| res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2); |
| } else { |
| LLVMValueRef lindex = lp_build_const_int32(gallivm, |
| reg->Register.Index * 4 + swizzle); |
| LLVMValueRef imms_ptr = LLVMBuildGEP(builder, |
| bld->imms_array, &lindex, 1, ""); |
| res = LLVMBuildLoad(builder, imms_ptr, ""); |
| |
| if (tgsi_type_is_64bit(stype)) { |
| LLVMValueRef lindex1; |
| LLVMValueRef imms_ptr2; |
| LLVMValueRef res2; |
| |
| lindex1 = lp_build_const_int32(gallivm, |
| reg->Register.Index * 4 + swizzle + 1); |
| imms_ptr2 = LLVMBuildGEP(builder, |
| bld->imms_array, &lindex1, 1, ""); |
| res2 = LLVMBuildLoad(builder, imms_ptr2, ""); |
| res = emit_fetch_64bit(bld_base, stype, res, res2); |
| } |
| } |
| } |
| else { |
| res = bld->immediates[reg->Register.Index][swizzle]; |
| if (tgsi_type_is_64bit(stype)) |
| res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]); |
| } |
| |
| if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { |
| struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); |
| res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); |
| } |
| return res; |
| } |
| |
| static LLVMValueRef |
| emit_fetch_input( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_src_register * reg, |
| enum tgsi_opcode_type stype, |
| unsigned swizzle) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMValueRef res; |
| |
| if (reg->Register.Indirect) { |
| LLVMValueRef indirect_index; |
| LLVMValueRef index_vec; /* index into the input reg array */ |
| LLVMValueRef index_vec2 = NULL; |
| LLVMValueRef inputs_array; |
| LLVMTypeRef fptr_type; |
| |
| indirect_index = get_indirect_index(bld, |
| reg->Register.File, |
| reg->Register.Index, |
| ®->Indirect); |
| |
| index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| swizzle, |
| TRUE); |
| if (tgsi_type_is_64bit(stype)) { |
| index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| swizzle + 1, |
| TRUE); |
| } |
| /* cast inputs_array pointer to float* */ |
| fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
| inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, ""); |
| |
| /* Gather values from the input register array */ |
| res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2); |
| } else { |
| if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { |
| LLVMValueRef lindex = lp_build_const_int32(gallivm, |
| reg->Register.Index * 4 + swizzle); |
| LLVMValueRef input_ptr = LLVMBuildGEP(builder, |
| bld->inputs_array, &lindex, 1, ""); |
| |
| res = LLVMBuildLoad(builder, input_ptr, ""); |
| if (tgsi_type_is_64bit(stype)) { |
| LLVMValueRef lindex1; |
| LLVMValueRef input_ptr2; |
| LLVMValueRef res2; |
| |
| lindex1 = lp_build_const_int32(gallivm, |
| reg->Register.Index * 4 + swizzle + 1); |
| input_ptr2 = LLVMBuildGEP(builder, |
| bld->inputs_array, &lindex1, 1, ""); |
| res2 = LLVMBuildLoad(builder, input_ptr2, ""); |
| res = emit_fetch_64bit(bld_base, stype, res, res2); |
| } |
| } |
| else { |
| res = bld->inputs[reg->Register.Index][swizzle]; |
| if (tgsi_type_is_64bit(stype)) |
| res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]); |
| } |
| } |
| |
| assert(res); |
| |
| if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { |
| struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); |
| res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); |
| } |
| |
| return res; |
| } |
| |
| |
| static LLVMValueRef |
| emit_fetch_gs_input( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_src_register * reg, |
| enum tgsi_opcode_type stype, |
| unsigned swizzle) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| const struct tgsi_shader_info *info = bld->bld_base.info; |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMValueRef attrib_index = NULL; |
| LLVMValueRef vertex_index = NULL; |
| LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); |
| LLVMValueRef res; |
| |
| if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { |
| /* This is really a system value not a regular input */ |
| assert(!reg->Register.Indirect); |
| assert(!reg->Dimension.Indirect); |
| res = bld->system_values.prim_id; |
| if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { |
| res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); |
| } |
| return res; |
| } |
| |
| if (reg->Register.Indirect) { |
| attrib_index = get_indirect_index(bld, |
| reg->Register.File, |
| reg->Register.Index, |
| ®->Indirect); |
| } else { |
| attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); |
| } |
| |
| if (reg->Dimension.Indirect) { |
| vertex_index = get_indirect_index(bld, |
| reg->Register.File, |
| reg->Dimension.Index, |
| ®->DimIndirect); |
| } else { |
| vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); |
| } |
| |
| res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, |
| reg->Dimension.Indirect, |
| vertex_index, |
| reg->Register.Indirect, |
| attrib_index, |
| swizzle_index); |
| |
| assert(res); |
| if (tgsi_type_is_64bit(stype)) { |
| LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1); |
| LLVMValueRef res2; |
| res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, |
| reg->Dimension.Indirect, |
| vertex_index, |
| reg->Register.Indirect, |
| attrib_index, |
| swizzle_index); |
| assert(res2); |
| res = emit_fetch_64bit(bld_base, stype, res, res2); |
| } else if (stype == TGSI_TYPE_UNSIGNED) { |
| res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); |
| } else if (stype == TGSI_TYPE_SIGNED) { |
| res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); |
| } |
| |
| return res; |
| } |
| |
| static LLVMValueRef |
| emit_fetch_temporary( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_src_register * reg, |
| enum tgsi_opcode_type stype, |
| unsigned swizzle) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMValueRef res; |
| |
| if (reg->Register.Indirect) { |
| LLVMValueRef indirect_index; |
| LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */ |
| LLVMValueRef temps_array; |
| LLVMTypeRef fptr_type; |
| |
| indirect_index = get_indirect_index(bld, |
| reg->Register.File, |
| reg->Register.Index, |
| ®->Indirect); |
| |
| index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| swizzle, |
| TRUE); |
| if (tgsi_type_is_64bit(stype)) { |
| index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| swizzle + 1, |
| TRUE); |
| } |
| |
| /* cast temps_array pointer to float* */ |
| fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
| temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); |
| |
| /* Gather values from the temporary register array */ |
| res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2); |
| } |
| else { |
| LLVMValueRef temp_ptr; |
| temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); |
| res = LLVMBuildLoad(builder, temp_ptr, ""); |
| |
| if (tgsi_type_is_64bit(stype)) { |
| LLVMValueRef temp_ptr2, res2; |
| |
| temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1); |
| res2 = LLVMBuildLoad(builder, temp_ptr2, ""); |
| res = emit_fetch_64bit(bld_base, stype, res, res2); |
| } |
| } |
| |
| if (stype == TGSI_TYPE_SIGNED || |
| stype == TGSI_TYPE_UNSIGNED || |
| stype == TGSI_TYPE_DOUBLE || |
| stype == TGSI_TYPE_SIGNED64 || |
| stype == TGSI_TYPE_UNSIGNED64) { |
| struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); |
| res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); |
| } |
| |
| return res; |
| } |
| |
| static LLVMValueRef |
| emit_fetch_system_value( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_src_register * reg, |
| enum tgsi_opcode_type stype, |
| unsigned swizzle) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| const struct tgsi_shader_info *info = bld->bld_base.info; |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMValueRef res; |
| enum tgsi_opcode_type atype; // Actual type of the value |
| |
| assert(!reg->Register.Indirect); |
| |
| switch (info->system_value_semantic_name[reg->Register.Index]) { |
| case TGSI_SEMANTIC_INSTANCEID: |
| res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); |
| atype = TGSI_TYPE_UNSIGNED; |
| break; |
| |
| case TGSI_SEMANTIC_VERTEXID: |
| res = bld->system_values.vertex_id; |
| atype = TGSI_TYPE_UNSIGNED; |
| break; |
| |
| case TGSI_SEMANTIC_VERTEXID_NOBASE: |
| res = bld->system_values.vertex_id_nobase; |
| atype = TGSI_TYPE_UNSIGNED; |
| break; |
| |
| case TGSI_SEMANTIC_BASEVERTEX: |
| res = bld->system_values.basevertex; |
| atype = TGSI_TYPE_UNSIGNED; |
| break; |
| |
| case TGSI_SEMANTIC_PRIMID: |
| res = bld->system_values.prim_id; |
| atype = TGSI_TYPE_UNSIGNED; |
| break; |
| |
| case TGSI_SEMANTIC_INVOCATIONID: |
| res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); |
| atype = TGSI_TYPE_UNSIGNED; |
| break; |
| |
| default: |
| assert(!"unexpected semantic in emit_fetch_system_value"); |
| res = bld_base->base.zero; |
| atype = TGSI_TYPE_FLOAT; |
| break; |
| } |
| |
| if (atype != stype) { |
| if (stype == TGSI_TYPE_FLOAT) { |
| res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); |
| } else if (stype == TGSI_TYPE_UNSIGNED) { |
| res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); |
| } else if (stype == TGSI_TYPE_SIGNED) { |
| res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); |
| } |
| } |
| |
| return res; |
| } |
| |
| /** |
| * Register fetch with derivatives. |
| */ |
| static void |
| emit_fetch_deriv( |
| struct lp_build_tgsi_soa_context *bld, |
| LLVMValueRef src, |
| LLVMValueRef *res, |
| LLVMValueRef *ddx, |
| LLVMValueRef *ddy) |
| { |
| if (res) |
| *res = src; |
| |
| /* TODO: use interpolation coeffs for inputs */ |
| |
| if (ddx) |
| *ddx = lp_build_ddx(&bld->bld_base.base, src); |
| |
| if (ddy) |
| *ddy = lp_build_ddy(&bld->bld_base.base, src); |
| } |
| |
| /** |
| * store an array of vec-length 64-bit into two arrays of vec_length floats |
| * i.e. |
| * value is d0, d1, d2, d3 etc. |
| * each 64-bit has high and low pieces x, y |
| * so gets stored into the separate channels as: |
| * chan_ptr = d0.x, d1.x, d2.x, d3.x |
| * chan_ptr2 = d0.y, d1.y, d2.y, d3.y |
| */ |
| static void |
| emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base, |
| LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2, |
| LLVMValueRef value) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld_base->base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| struct lp_build_context *float_bld = &bld_base->base; |
| unsigned i; |
| LLVMValueRef temp, temp2; |
| LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; |
| LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; |
| |
| for (i = 0; i < bld_base->base.type.length; i++) { |
| shuffles[i] = lp_build_const_int32(gallivm, i * 2); |
| shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); |
| } |
| |
| temp = LLVMBuildShuffleVector(builder, value, |
| LLVMGetUndef(LLVMTypeOf(value)), |
| LLVMConstVector(shuffles, |
| bld_base->base.type.length), |
| ""); |
| temp2 = LLVMBuildShuffleVector(builder, value, |
| LLVMGetUndef(LLVMTypeOf(value)), |
| LLVMConstVector(shuffles2, |
| bld_base->base.type.length), |
| ""); |
| |
| lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr); |
| lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2); |
| } |
| |
| /** |
| * Register store. |
| */ |
| static void |
| emit_store_chan( |
| struct lp_build_tgsi_context *bld_base, |
| const struct tgsi_full_instruction *inst, |
| unsigned index, |
| unsigned chan_index, |
| LLVMValueRef value) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld_base->base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| const struct tgsi_full_dst_register *reg = &inst->Dst[index]; |
| struct lp_build_context *float_bld = &bld_base->base; |
| struct lp_build_context *int_bld = &bld_base->int_bld; |
| LLVMValueRef indirect_index = NULL; |
| enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); |
| |
| /* |
| * Apply saturation. |
| * |
| * It is always assumed to be float. |
| */ |
| if (inst->Instruction.Saturate) { |
| assert(dtype == TGSI_TYPE_FLOAT || |
| dtype == TGSI_TYPE_UNTYPED); |
| value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
| value = lp_build_clamp_zero_one_nanzero(float_bld, value); |
| } |
| |
| if (reg->Register.Indirect) { |
| /* |
| * Currently the mesa/st doesn't generate indirect stores |
| * to 64-bit values, it normally uses MOV to do indirect stores. |
| */ |
| assert(!tgsi_type_is_64bit(dtype)); |
| indirect_index = get_indirect_index(bld, |
| reg->Register.File, |
| reg->Register.Index, |
| ®->Indirect); |
| } else { |
| assert(reg->Register.Index <= |
| bld_base->info->file_max[reg->Register.File]); |
| } |
| |
| if (DEBUG_EXECUTION) { |
| emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value); |
| } |
| |
| switch( reg->Register.File ) { |
| case TGSI_FILE_OUTPUT: |
| /* Outputs are always stored as floats */ |
| value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
| |
| if (reg->Register.Indirect) { |
| LLVMValueRef index_vec; /* indexes into the output registers */ |
| LLVMValueRef outputs_array; |
| LLVMTypeRef fptr_type; |
| |
| index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| chan_index, |
| TRUE); |
| |
| fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
| outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, ""); |
| |
| /* Scatter store values into output registers */ |
| emit_mask_scatter(bld, outputs_array, index_vec, value, |
| &bld->exec_mask); |
| } |
| else { |
| LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, |
| chan_index); |
| |
| if (tgsi_type_is_64bit(dtype)) { |
| LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index, |
| chan_index + 1); |
| emit_store_64bit_chan(bld_base, out_ptr, out_ptr2, |
| value); |
| } else |
| lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr); |
| } |
| break; |
| |
| case TGSI_FILE_TEMPORARY: |
| /* Temporaries are always stored as floats */ |
| if (!tgsi_type_is_64bit(dtype)) |
| value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
| else |
| value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), ""); |
| |
| if (reg->Register.Indirect) { |
| LLVMValueRef index_vec; /* indexes into the temp registers */ |
| LLVMValueRef temps_array; |
| LLVMTypeRef fptr_type; |
| |
| index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
| indirect_index, |
| chan_index, |
| TRUE); |
| |
| fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
| temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); |
| |
| /* Scatter store values into temp registers */ |
| emit_mask_scatter(bld, temps_array, index_vec, value, |
| &bld->exec_mask); |
| } |
| else { |
| LLVMValueRef temp_ptr; |
| temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); |
| |
| if (tgsi_type_is_64bit(dtype)) { |
| LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld, |
| reg->Register.Index, |
| chan_index + 1); |
| emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2, |
| value); |
| } |
| else |
| lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr); |
| } |
| break; |
| |
| case TGSI_FILE_ADDRESS: |
| assert(dtype == TGSI_TYPE_SIGNED); |
| assert(LLVMTypeOf(value) == int_bld->vec_type); |
| value = LLVMBuildBitCast(builder, value, int_bld->vec_type, ""); |
| lp_exec_mask_store(&bld->exec_mask, int_bld, value, |
| bld->addr[reg->Register.Index][chan_index]); |
| break; |
| |
| default: |
| assert( 0 ); |
| } |
| |
| (void)dtype; |
| } |
| |
| /* |
| * Called at the beginning of the translation of each TGSI instruction, to |
| * emit some debug code. |
| */ |
| static void |
| emit_debug( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_instruction * inst, |
| const struct tgsi_opcode_info * info) |
| |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| if (DEBUG_EXECUTION) { |
| /* |
| * Dump the TGSI instruction. |
| */ |
| |
| struct gallivm_state *gallivm = bld_base->base.gallivm; |
| char buf[512]; |
| buf[0] = '$'; |
| buf[1] = ' '; |
| tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2); |
| lp_build_printf(gallivm, buf); |
| |
| /* Dump the execution mask. |
| */ |
| if (bld->exec_mask.has_mask) { |
| lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask); |
| } |
| } |
| } |
| |
| static void |
| emit_store( |
| struct lp_build_tgsi_context * bld_base, |
| const struct tgsi_full_instruction * inst, |
| const struct tgsi_opcode_info * info, |
| unsigned index, |
| LLVMValueRef dst[4]) |
| |
| { |
| enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); |
| |
| unsigned writemask = inst->Dst[index].Register.WriteMask; |
| while (writemask) { |
| unsigned chan_index = u_bit_scan(&writemask); |
| if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) |
| continue; |
| emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]); |
| } |
| } |
| |
| static unsigned |
| tgsi_to_pipe_tex_target(unsigned tgsi_target) |
| { |
| switch (tgsi_target) { |
| case TGSI_TEXTURE_BUFFER: |
| return PIPE_BUFFER; |
| case TGSI_TEXTURE_1D: |
| case TGSI_TEXTURE_SHADOW1D: |
| return PIPE_TEXTURE_1D; |
| case TGSI_TEXTURE_2D: |
| case TGSI_TEXTURE_SHADOW2D: |
| case TGSI_TEXTURE_2D_MSAA: |
| return PIPE_TEXTURE_2D; |
| case TGSI_TEXTURE_3D: |
| return PIPE_TEXTURE_3D; |
| case TGSI_TEXTURE_CUBE: |
| case TGSI_TEXTURE_SHADOWCUBE: |
| return PIPE_TEXTURE_CUBE; |
| case TGSI_TEXTURE_RECT: |
| case TGSI_TEXTURE_SHADOWRECT: |
| return PIPE_TEXTURE_RECT; |
| case TGSI_TEXTURE_1D_ARRAY: |
| case TGSI_TEXTURE_SHADOW1D_ARRAY: |
| return PIPE_TEXTURE_1D_ARRAY; |
| case TGSI_TEXTURE_2D_ARRAY: |
| case TGSI_TEXTURE_SHADOW2D_ARRAY: |
| case TGSI_TEXTURE_2D_ARRAY_MSAA: |
| return PIPE_TEXTURE_2D_ARRAY; |
| case TGSI_TEXTURE_CUBE_ARRAY: |
| case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
| return PIPE_TEXTURE_CUBE_ARRAY; |
| default: |
| assert(0); |
| return PIPE_BUFFER; |
| } |
| } |
| |
| |
| static enum lp_sampler_lod_property |
| lp_build_lod_property( |
| struct lp_build_tgsi_context *bld_base, |
| const struct tgsi_full_instruction *inst, |
| unsigned src_op) |
| { |
| const struct tgsi_full_src_register *reg = &inst->Src[src_op]; |
| enum lp_sampler_lod_property lod_property; |
| |
| /* |
| * Not much we can do here. We could try catching inputs declared |
| * with constant interpolation but not sure it's worth it - since for |
| * TEX opcodes as well as FETCH/LD the lod comes from same reg as |
| * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just |
| * like the constant/immediate recognition below. |
| * What seems to be of more value would be to recognize temps holding |
| * broadcasted scalars but no way we can do it. |
| * Tried asking llvm but without any success (using LLVMIsConstant |
| * even though this isn't exactly what we'd need), even as simple as |
| * IMM[0] UINT32 (0,-1,0,0) |
| * MOV TEMP[0] IMM[0].yyyy |
| * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0] |
| * doesn't work. |
| * This means there's ZERO chance this will ever catch a scalar lod |
| * with traditional tex opcodes as well as texel fetches, since the lod |
| * comes from the same reg as coords (except some test shaders using |
| * constant coords maybe). |
| * There's at least hope for sample opcodes as well as size queries. |
| */ |
| if (reg->Register.File == TGSI_FILE_CONSTANT || |
| reg->Register.File == TGSI_FILE_IMMEDIATE) { |
| lod_property = LP_SAMPLER_LOD_SCALAR; |
| } |
| else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) { |
| if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { |
| lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
| } |
| else { |
| lod_property = LP_SAMPLER_LOD_PER_QUAD; |
| } |
| } |
| else { |
| /* never use scalar (per-quad) lod the results are just too wrong. */ |
| lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
| } |
| return lod_property; |
| } |
| |
| |
| /** |
| * High-level instruction translators. |
| */ |
| |
| static void |
| emit_tex( struct lp_build_tgsi_soa_context *bld, |
| const struct tgsi_full_instruction *inst, |
| enum lp_build_tex_modifier modifier, |
| LLVMValueRef *texel, |
| unsigned sampler_reg, |
| enum lp_sampler_op_type sampler_op) |
| { |
| unsigned unit = inst->Src[sampler_reg].Register.Index; |
| LLVMValueRef oow = NULL; |
| LLVMValueRef lod = NULL; |
| LLVMValueRef coords[5]; |
| LLVMValueRef offsets[3] = { NULL }; |
| struct lp_derivatives derivs; |
| struct lp_sampler_params params; |
| enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; |
| unsigned num_derivs, num_offsets, i; |
| unsigned shadow_coord = 0; |
| unsigned layer_coord = 0; |
| unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT; |
| |
| memset(¶ms, 0, sizeof(params)); |
| |
| if (!bld->sampler) { |
| _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); |
| for (i = 0; i < 4; i++) { |
| texel[i] = bld->bld_base.base.undef; |
| } |
| return; |
| } |
| |
| switch (inst->Texture.Texture) { |
| case TGSI_TEXTURE_1D_ARRAY: |
| layer_coord = 1; |
| /* fallthrough */ |
| case TGSI_TEXTURE_1D: |
| num_offsets = 1; |
| num_derivs = 1; |
| break; |
| case TGSI_TEXTURE_2D_ARRAY: |
| layer_coord = 2; |
| /* fallthrough */ |
| case TGSI_TEXTURE_2D: |
| case TGSI_TEXTURE_RECT: |
| num_offsets = 2; |
| num_derivs = 2; |
| break; |
| case TGSI_TEXTURE_SHADOW1D_ARRAY: |
| layer_coord = 1; |
| /* fallthrough */ |
| case TGSI_TEXTURE_SHADOW1D: |
| shadow_coord = 2; |
| num_offsets = 1; |
| num_derivs = 1; |
| break; |
| case TGSI_TEXTURE_SHADOW2D_ARRAY: |
| layer_coord = 2; |
| shadow_coord = 3; |
| num_offsets = 2; |
| num_derivs = 2; |
| break; |
| case TGSI_TEXTURE_SHADOW2D: |
| case TGSI_TEXTURE_SHADOWRECT: |
| shadow_coord = 2; |
| num_offsets = 2; |
| num_derivs = 2; |
| break; |
| case TGSI_TEXTURE_CUBE: |
| num_offsets = 2; |
| num_derivs = 3; |
| break; |
| case TGSI_TEXTURE_3D: |
| num_offsets = 3; |
| num_derivs = 3; |
| break; |
| case TGSI_TEXTURE_SHADOWCUBE: |
| shadow_coord = 3; |
| num_offsets = 2; |
| num_derivs = 3; |
| break; |
| case TGSI_TEXTURE_CUBE_ARRAY: |
| num_offsets = 2; |
| num_derivs = 3; |
| layer_coord = 3; |
| break; |
| case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
| num_offsets = 2; |
| num_derivs = 3; |
| layer_coord = 3; |
| shadow_coord = 4; /* shadow coord special different reg */ |
| break; |
| case TGSI_TEXTURE_2D_MSAA: |
| case TGSI_TEXTURE_2D_ARRAY_MSAA: |
| default: |
| assert(0); |
| return; |
| } |
| |
| /* Note lod and especially projected are illegal in a LOT of cases */ |
| if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || |
| modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
| if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || |
| inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) { |
| /* note that shadow cube array with bias/explicit lod does not exist */ |
| lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); |
| } |
| else { |
| lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); |
| } |
| if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { |
| sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| } |
| else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
| sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| } |
| lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
| } |
| |
| if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { |
| oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); |
| oow = lp_build_rcp(&bld->bld_base.base, oow); |
| } |
| |
| for (i = 0; i < num_derivs; i++) { |
| coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); |
| if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) |
| coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); |
| } |
| for (i = num_derivs; i < 5; i++) { |
| coords[i] = bld->bld_base.base.undef; |
| } |
| |
| /* Layer coord always goes into 3rd slot, except for cube map arrays */ |
| if (layer_coord) { |
| if (layer_coord == 3) { |
| coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
| } |
| else { |
| coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
| } |
| if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) |
| coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow); |
| } |
| /* Shadow coord occupies always 5th slot. */ |
| if (shadow_coord) { |
| sample_key |= LP_SAMPLER_SHADOW; |
| if (shadow_coord == 4) { |
| coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); |
| } |
| else { |
| coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord); |
| } |
| if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) |
| coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow); |
| } |
| |
| if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { |
| unsigned dim; |
| sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| for (dim = 0; dim < num_derivs; ++dim) { |
| derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim); |
| derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim); |
| } |
| params.derivs = &derivs; |
| /* |
| * could also check all src regs if constant but I doubt such |
| * cases exist in practice. |
| */ |
| if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { |
| if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { |
| lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
| } |
| else { |
| lod_property = LP_SAMPLER_LOD_PER_QUAD; |
| } |
| } |
| else { |
| lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
| } |
| } |
| sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; |
| |
| /* we don't handle the 4 offset version of tg4 */ |
| if (inst->Texture.NumOffsets == 1) { |
| unsigned dim; |
| sample_key |= LP_SAMPLER_OFFSETS; |
| for (dim = 0; dim < num_offsets; dim++) { |
| offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); |
| } |
| } |
| |
| params.type = bld->bld_base.base.type; |
| params.sample_key = sample_key; |
| params.texture_index = unit; |
| params.sampler_index = unit; |
| params.context_ptr = bld->context_ptr; |
| params.thread_data_ptr = bld->thread_data_ptr; |
| params.coords = coords; |
| params.offsets = offsets; |
| params.lod = lod; |
| params.texel = texel; |
| |
| bld->sampler->emit_tex_sample(bld->sampler, |
| bld->bld_base.base.gallivm, |
| ¶ms); |
| } |
| |
| static void |
| emit_sample(struct lp_build_tgsi_soa_context *bld, |
| const struct tgsi_full_instruction *inst, |
| enum lp_build_tex_modifier modifier, |
| boolean compare, |
| enum lp_sampler_op_type sample_type, |
| LLVMValueRef *texel) |
| { |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| unsigned texture_unit, sampler_unit; |
| LLVMValueRef lod = NULL; |
| LLVMValueRef coords[5]; |
| LLVMValueRef offsets[3] = { NULL }; |
| struct lp_derivatives derivs; |
| struct lp_sampler_params params; |
| enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; |
| |
| unsigned num_offsets, num_derivs, i; |
| unsigned layer_coord = 0; |
| unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT; |
| |
| memset(¶ms, 0, sizeof(params)); |
| |
| if (!bld->sampler) { |
| _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); |
| for (i = 0; i < 4; i++) { |
| texel[i] = bld->bld_base.base.undef; |
| } |
| return; |
| } |
| |
| /* |
| * unlike old-style tex opcodes the texture/sampler indices |
| * always come from src1 and src2 respectively. |
| */ |
| texture_unit = inst->Src[1].Register.Index; |
| sampler_unit = inst->Src[2].Register.Index; |
| |
| /* |
| * Note inst->Texture.Texture will contain the number of offsets, |
| * however the target information is NOT there and comes from the |
| * declared sampler views instead. |
| */ |
| switch (bld->sv[texture_unit].Resource) { |
| case TGSI_TEXTURE_1D: |
| num_offsets = 1; |
| num_derivs = 1; |
| break; |
| case TGSI_TEXTURE_1D_ARRAY: |
| layer_coord = 1; |
| num_offsets = 1; |
| num_derivs = 1; |
| break; |
| case TGSI_TEXTURE_2D: |
| case TGSI_TEXTURE_RECT: |
| num_offsets = 2; |
| num_derivs = 2; |
| break; |
| case TGSI_TEXTURE_2D_ARRAY: |
| layer_coord = 2; |
| num_offsets = 2; |
| num_derivs = 2; |
| break; |
| case TGSI_TEXTURE_CUBE: |
| num_offsets = 2; |
| num_derivs = 3; |
| break; |
| case TGSI_TEXTURE_3D: |
| num_offsets = 3; |
| num_derivs = 3; |
| break; |
| case TGSI_TEXTURE_CUBE_ARRAY: |
| layer_coord = 3; |
| num_offsets = 2; |
| num_derivs = 3; |
| break; |
| default: |
| assert(0); |
| return; |
| } |
| |
| if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || |
| modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
| lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); |
| if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { |
| sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| } |
| else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
| sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| } |
| lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
| } |
| else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) { |
| /* XXX might be better to explicitly pass the level zero information */ |
| sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F); |
| } |
| |
| for (i = 0; i < num_derivs; i++) { |
| coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); |
| } |
| for (i = num_derivs; i < 5; i++) { |
| coords[i] = bld->bld_base.base.undef; |
| } |
| |
| /* Layer coord always goes into 3rd slot, except for cube map arrays */ |
| if (layer_coord) { |
| if (layer_coord == 3) |
| coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
| else |
| coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
| } |
| /* Shadow coord occupies always 5th slot. */ |
| if (compare) { |
| sample_key |= LP_SAMPLER_SHADOW; |
| coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); |
| } |
| |
| if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { |
| unsigned dim; |
| sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| for (dim = 0; dim < num_derivs; ++dim) { |
| derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim); |
| derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim); |
| } |
| params.derivs = &derivs; |
| /* |
| * could also check all src regs if constant but I doubt such |
| * cases exist in practice. |
| */ |
| if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { |
| if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { |
| lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
| } |
| else { |
| lod_property = LP_SAMPLER_LOD_PER_QUAD; |
| } |
| } |
| else { |
| lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
| } |
| } |
| |
| /* some advanced gather instructions (txgo) would require 4 offsets */ |
| if (inst->Texture.NumOffsets == 1) { |
| unsigned dim; |
| sample_key |= LP_SAMPLER_OFFSETS; |
| for (dim = 0; dim < num_offsets; dim++) { |
| offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); |
| } |
| } |
| sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; |
| |
| params.type = bld->bld_base.base.type; |
| params.sample_key = sample_key; |
| params.texture_index = texture_unit; |
| params.sampler_index = sampler_unit; |
| params.context_ptr = bld->context_ptr; |
| params.thread_data_ptr = bld->thread_data_ptr; |
| params.coords = coords; |
| params.offsets = offsets; |
| params.lod = lod; |
| params.texel = texel; |
| |
| bld->sampler->emit_tex_sample(bld->sampler, |
| bld->bld_base.base.gallivm, |
| ¶ms); |
| |
| if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || |
| inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || |
| inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || |
| inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) { |
| unsigned char swizzles[4]; |
| swizzles[0] = inst->Src[1].Register.SwizzleX; |
| swizzles[1] = inst->Src[1].Register.SwizzleY; |
| swizzles[2] = inst->Src[1].Register.SwizzleZ; |
| swizzles[3] = inst->Src[1].Register.SwizzleW; |
| |
| lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); |
| } |
| } |
| |
| static void |
| emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, |
| const struct tgsi_full_instruction *inst, |
| LLVMValueRef *texel, |
| boolean is_samplei) |
| { |
| unsigned unit, target; |
| LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); |
| LLVMValueRef explicit_lod = NULL; |
| LLVMValueRef coords[5]; |
| LLVMValueRef offsets[3] = { NULL }; |
| struct lp_sampler_params params; |
| enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; |
| unsigned dims, i; |
| unsigned layer_coord = 0; |
| unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; |
| |
| memset(¶ms, 0, sizeof(params)); |
| |
| if (!bld->sampler) { |
| _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); |
| for (i = 0; i < 4; i++) { |
| texel[i] = coord_undef; |
| } |
| return; |
| } |
| |
| unit = inst->Src[1].Register.Index; |
| |
| if (is_samplei) { |
| target = bld->sv[unit].Resource; |
| } |
| else { |
| target = inst->Texture.Texture; |
| } |
| |
| switch (target) { |
| case TGSI_TEXTURE_1D: |
| case TGSI_TEXTURE_BUFFER: |
| dims = 1; |
| break; |
| case TGSI_TEXTURE_1D_ARRAY: |
| layer_coord = 1; |
| dims = 1; |
| break; |
| case TGSI_TEXTURE_2D: |
| case TGSI_TEXTURE_RECT: |
| case TGSI_TEXTURE_2D_MSAA: |
| dims = 2; |
| break; |
| case TGSI_TEXTURE_2D_ARRAY: |
| case TGSI_TEXTURE_2D_ARRAY_MSAA: |
| layer_coord = 2; |
| dims = 2; |
| break; |
| case TGSI_TEXTURE_3D: |
| dims = 3; |
| break; |
| default: |
| assert(0); |
| return; |
| } |
| |
| /* always have lod except for buffers and msaa targets ? */ |
| if (target != TGSI_TEXTURE_BUFFER && |
| target != TGSI_TEXTURE_2D_MSAA && |
| target != TGSI_TEXTURE_2D_ARRAY_MSAA) { |
| sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
| explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); |
| lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
| } |
| /* |
| * XXX: for real msaa support, the w component (or src2.x for sample_i_ms) |
| * would be the sample index. |
| */ |
| |
| for (i = 0; i < dims; i++) { |
| coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); |
| } |
| /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */ |
| for (i = dims; i < 5; i++) { |
| coords[i] = coord_undef; |
| } |
| if (layer_coord) |
| coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
| |
| if (inst->Texture.NumOffsets == 1) { |
| unsigned dim; |
| sample_key |= LP_SAMPLER_OFFSETS; |
| for (dim = 0; dim < dims; dim++) { |
| offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); |
| } |
| } |
| sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; |
| |
| params.type = bld->bld_base.base.type; |
| params.sample_key = sample_key; |
| params.texture_index = unit; |
| /* |
| * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS |
| * and trigger some assertions with d3d10 where the sampler view number |
| * can exceed this. |
| */ |
| params.sampler_index = 0; |
| params.context_ptr = bld->context_ptr; |
| params.thread_data_ptr = bld->thread_data_ptr; |
| params.coords = coords; |
| params.offsets = offsets; |
| params.derivs = NULL; |
| params.lod = explicit_lod; |
| params.texel = texel; |
| |
| bld->sampler->emit_tex_sample(bld->sampler, |
| bld->bld_base.base.gallivm, |
| ¶ms); |
| |
| if (is_samplei && |
| (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || |
| inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || |
| inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || |
| inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) { |
| unsigned char swizzles[4]; |
| swizzles[0] = inst->Src[1].Register.SwizzleX; |
| swizzles[1] = inst->Src[1].Register.SwizzleY; |
| swizzles[2] = inst->Src[1].Register.SwizzleZ; |
| swizzles[3] = inst->Src[1].Register.SwizzleW; |
| |
| lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); |
| } |
| } |
| |
| static void |
| emit_size_query( struct lp_build_tgsi_soa_context *bld, |
| const struct tgsi_full_instruction *inst, |
| LLVMValueRef *sizes_out, |
| boolean is_sviewinfo) |
| { |
| LLVMValueRef explicit_lod; |
| enum lp_sampler_lod_property lod_property; |
| unsigned has_lod; |
| unsigned i; |
| unsigned unit = inst->Src[1].Register.Index; |
| unsigned target, pipe_target; |
| struct lp_sampler_size_query_params params; |
| |
| if (is_sviewinfo) { |
| target = bld->sv[unit].Resource; |
| } |
| else { |
| target = inst->Texture.Texture; |
| } |
| switch (target) { |
| case TGSI_TEXTURE_BUFFER: |
| case TGSI_TEXTURE_RECT: |
| case TGSI_TEXTURE_SHADOWRECT: |
| has_lod = 0; |
| break; |
| default: |
| has_lod = 1; |
| break; |
| } |
| |
| if (!bld->sampler) { |
| _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); |
| for (i = 0; i < 4; i++) |
| sizes_out[i] = bld->bld_base.int_bld.undef; |
| return; |
| } |
| |
| if (has_lod) { |
| explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0); |
| lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
| } |
| else { |
| explicit_lod = NULL; |
| lod_property = LP_SAMPLER_LOD_SCALAR; |
| } |
| |
| |
| pipe_target = tgsi_to_pipe_tex_target(target); |
| |
| params.int_type = bld->bld_base.int_bld.type; |
| params.texture_unit = unit; |
| params.target = pipe_target; |
| params.context_ptr = bld->context_ptr; |
| params.is_sviewinfo = TRUE; |
| params.lod_property = lod_property; |
| params.explicit_lod = explicit_lod; |
| params.sizes_out = sizes_out; |
| |
| bld->sampler->emit_size_query(bld->sampler, |
| bld->bld_base.base.gallivm, |
| ¶ms); |
| } |
| |
| static boolean |
| near_end_of_shader(struct lp_build_tgsi_soa_context *bld, |
| int pc) |
| { |
| unsigned i; |
| |
| for (i = 0; i < 5; i++) { |
| unsigned opcode; |
| |
| if (pc + i >= bld->bld_base.info->num_instructions) |
| return TRUE; |
| |
| opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; |
| |
| if (opcode == TGSI_OPCODE_END) |
| return TRUE; |
| |
| if (opcode == TGSI_OPCODE_TEX || |
| opcode == TGSI_OPCODE_TXP || |
| opcode == TGSI_OPCODE_TXD || |
| opcode == TGSI_OPCODE_TXB || |
| opcode == TGSI_OPCODE_TXL || |
| opcode == TGSI_OPCODE_TXF || |
| opcode == TGSI_OPCODE_TXQ || |
| opcode == TGSI_OPCODE_TEX2 || |
| opcode == TGSI_OPCODE_TXB2 || |
| opcode == TGSI_OPCODE_TXL2 || |
| opcode == TGSI_OPCODE_SAMPLE || |
| opcode == TGSI_OPCODE_SAMPLE_B || |
| opcode == TGSI_OPCODE_SAMPLE_C || |
| opcode == TGSI_OPCODE_SAMPLE_C_LZ || |
| opcode == TGSI_OPCODE_SAMPLE_D || |
| opcode == TGSI_OPCODE_SAMPLE_I || |
| opcode == TGSI_OPCODE_SAMPLE_I_MS || |
| opcode == TGSI_OPCODE_SAMPLE_L || |
| opcode == TGSI_OPCODE_SVIEWINFO || |
| opcode == TGSI_OPCODE_CAL || |
| opcode == TGSI_OPCODE_IF || |
| opcode == TGSI_OPCODE_UIF || |
| opcode == TGSI_OPCODE_BGNLOOP || |
| opcode == TGSI_OPCODE_SWITCH) |
| return FALSE; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| |
| /** |
| * Kill fragment if any of the src register values are negative. |
| */ |
| static void |
| emit_kill_if( |
| struct lp_build_tgsi_soa_context *bld, |
| const struct tgsi_full_instruction *inst, |
| int pc) |
| { |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| const struct tgsi_full_src_register *reg = &inst->Src[0]; |
| LLVMValueRef terms[TGSI_NUM_CHANNELS]; |
| LLVMValueRef mask; |
| unsigned chan_index; |
| |
| memset(&terms, 0, sizeof terms); |
| |
| TGSI_FOR_EACH_CHANNEL( chan_index ) { |
| unsigned swizzle; |
| |
| /* Unswizzle channel */ |
| swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); |
| |
| /* Check if the component has not been already tested. */ |
| assert(swizzle < TGSI_NUM_CHANNELS); |
| if( !terms[swizzle] ) |
| /* TODO: change the comparison operator instead of setting the sign */ |
| terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); |
| } |
| |
| mask = NULL; |
| TGSI_FOR_EACH_CHANNEL( chan_index ) { |
| if(terms[chan_index]) { |
| LLVMValueRef chan_mask; |
| |
| /* |
| * If term < 0 then mask = 0 else mask = ~0. |
| */ |
| chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); |
| |
| if(mask) |
| mask = LLVMBuildAnd(builder, mask, chan_mask, ""); |
| else |
| mask = chan_mask; |
| } |
| } |
| |
| if (bld->exec_mask.has_mask) { |
| LLVMValueRef invmask; |
| invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); |
| mask = LLVMBuildOr(builder, mask, invmask, ""); |
| } |
| |
| lp_build_mask_update(bld->mask, mask); |
| if (!near_end_of_shader(bld, pc)) |
| lp_build_mask_check(bld->mask); |
| } |
| |
| |
| /** |
| * Unconditional fragment kill. |
| * The only predication is the execution mask which will apply if |
| * we're inside a loop or conditional. |
| */ |
| static void |
| emit_kill(struct lp_build_tgsi_soa_context *bld, |
| int pc) |
| { |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| LLVMValueRef mask; |
| |
| /* For those channels which are "alive", disable fragment shader |
| * execution. |
| */ |
| if (bld->exec_mask.has_mask) { |
| mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); |
| } |
| else { |
| LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); |
| mask = zero; |
| } |
| |
| lp_build_mask_update(bld->mask, mask); |
| |
| if (!near_end_of_shader(bld, pc)) |
| lp_build_mask_check(bld->mask); |
| } |
| |
| |
| /** |
| * Emit code which will dump the value of all the temporary registers |
| * to stdout. |
| */ |
| static void |
| emit_dump_file(struct lp_build_tgsi_soa_context *bld, |
| unsigned file) |
| { |
| const struct tgsi_shader_info *info = bld->bld_base.info; |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| LLVMValueRef reg_ptr; |
| int index; |
| int max_index = info->file_max[file]; |
| |
| /* |
| * Some register files, particularly constants, can be very large, |
| * and dumping everything could make this unusably slow. |
| */ |
| max_index = MIN2(max_index, 32); |
| |
| for (index = 0; index <= max_index; index++) { |
| LLVMValueRef res; |
| unsigned mask; |
| int chan; |
| |
| if (index < 8 * sizeof(unsigned) && |
| (info->file_mask[file] & (1u << index)) == 0) { |
| /* This was not declared.*/ |
| continue; |
| } |
| |
| if (file == TGSI_FILE_INPUT) { |
| mask = info->input_usage_mask[index]; |
| } else { |
| mask = TGSI_WRITEMASK_XYZW; |
| } |
| |
| for (chan = 0; chan < 4; chan++) { |
| if ((mask & (1 << chan)) == 0) { |
| /* This channel is not used.*/ |
| continue; |
| } |
| |
| if (file == TGSI_FILE_CONSTANT) { |
| struct tgsi_full_src_register reg; |
| memset(®, 0, sizeof reg); |
| reg.Register.File = file; |
| reg.Register.Index = index; |
| reg.Register.SwizzleX = 0; |
| reg.Register.SwizzleY = 1; |
| reg.Register.SwizzleZ = 2; |
| reg.Register.SwizzleW = 3; |
| |
| res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan); |
| if (!res) { |
| continue; |
| } |
| } else if (file == TGSI_FILE_INPUT) { |
| res = bld->inputs[index][chan]; |
| if (!res) { |
| continue; |
| } |
| } else if (file == TGSI_FILE_TEMPORARY) { |
| reg_ptr = lp_get_temp_ptr_soa(bld, index, chan); |
| assert(reg_ptr); |
| res = LLVMBuildLoad(builder, reg_ptr, ""); |
| } else if (file == TGSI_FILE_OUTPUT) { |
| reg_ptr = lp_get_output_ptr(bld, index, chan); |
| assert(reg_ptr); |
| res = LLVMBuildLoad(builder, reg_ptr, ""); |
| } else { |
| assert(0); |
| continue; |
| } |
| |
| emit_dump_reg(gallivm, file, index, chan, res); |
| } |
| } |
| } |
| |
| |
| |
| void |
| lp_emit_declaration_soa( |
| struct lp_build_tgsi_context *bld_base, |
| const struct tgsi_full_declaration *decl) |
| { |
| struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMTypeRef vec_type = bld->bld_base.base.vec_type; |
| const unsigned first = decl->Range.First; |
| const unsigned last = decl->Range.Last; |
| unsigned idx, i; |
| |
| assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); |
| |
| switch (decl->Declaration.File) { |
| case TGSI_FILE_TEMPORARY: |
| if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { |
| assert(last < LP_MAX_INLINED_TEMPS); |
| for (idx = first; idx <= last; ++idx) { |
| for (i = 0; i < TGSI_NUM_CHANNELS; i++) |
| bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); |
| } |
| } |
| break; |
| |
| case TGSI_FILE_OUTPUT: |
| if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { |
| for (idx = first; idx <= last; ++idx) { |
| for (i = 0; i < TGSI_NUM_CHANNELS; i++) |
| bld->outputs[idx][i] = lp_build_alloca(gallivm, |
| vec_type, "output"); |
| } |
| } |
| break; |
| |
| case TGSI_FILE_ADDRESS: |
| /* ADDR registers are only allocated with an integer LLVM IR type, |
| * as they are guaranteed to always have integers. |
| * XXX: Not sure if this exception is worthwhile (or the whole idea of |
| * an ADDR register for that matter). |
| */ |
| assert(last < LP_MAX_TGSI_ADDRS); |
| for (idx = first; idx <= last; ++idx) { |
| assert(idx < LP_MAX_TGSI_ADDRS); |
| for (i = 0; i < TGSI_NUM_CHANNELS; i++) |
| bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); |
| } |
| break; |
| |
| case TGSI_FILE_SAMPLER_VIEW: |
| /* |
| * The target stored here MUST match whatever there actually |
| * is in the set sampler views (what about return type?). |
| */ |
| assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); |
| for (idx = first; idx <= last; ++idx) { |
| bld->sv[idx] = decl->SamplerView; |
| } |
| break; |
| |
| case TGSI_FILE_CONSTANT: |
| { |
| /* |
| * We could trivially fetch the per-buffer pointer when fetching the |
| * constant, relying on llvm to figure out it's always the same pointer |
| * anyway. However, doing so results in a huge (more than factor of 10) |
| * slowdown in llvm compilation times for some (but not all) shaders |
| * (more specifically, the IR optimization spends way more time in |
| * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. |
| */ |
| unsigned idx2D = decl->Dim.Index2D; |
| LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); |
| assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); |
| bld->consts[idx2D] = |
| lp_build_array_get(gallivm, bld->consts_ptr, index2D); |
| bld->consts_sizes[idx2D] = |
| lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); |
| } |
| break; |
| |
| default: |
| /* don't need to declare other vars */ |
| break; |
| } |
| } |
| |
| |
| void lp_emit_immediate_soa( |
| struct lp_build_tgsi_context *bld_base, |
| const struct tgsi_full_immediate *imm) |
| { |
| struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
| struct gallivm_state * gallivm = bld_base->base.gallivm; |
| LLVMValueRef imms[4]; |
| unsigned i; |
| const uint size = imm->Immediate.NrTokens - 1; |
| assert(size <= 4); |
| switch (imm->Immediate.DataType) { |
| case TGSI_IMM_FLOAT32: |
| for( i = 0; i < size; ++i ) |
| imms[i] = |
| lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); |
| |
| break; |
| case TGSI_IMM_FLOAT64: |
| case TGSI_IMM_UINT64: |
| case TGSI_IMM_INT64: |
| case TGSI_IMM_UINT32: |
| for( i = 0; i < size; ++i ) { |
| LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); |
| imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); |
| } |
| |
| break; |
| case TGSI_IMM_INT32: |
| for( i = 0; i < size; ++i ) { |
| LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); |
| imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); |
| } |
| |
| break; |
| } |
| for( i = size; i < 4; ++i ) |
| imms[i] = bld_base->base.undef; |
| |
| if (bld->use_immediates_array) { |
| unsigned index = bld->num_immediates; |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| |
| assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)); |
| for (i = 0; i < 4; ++i ) { |
| LLVMValueRef lindex = lp_build_const_int32( |
| bld->bld_base.base.gallivm, index * 4 + i); |
| LLVMValueRef imm_ptr = LLVMBuildGEP(builder, |
| bld->imms_array, &lindex, 1, ""); |
| LLVMBuildStore(builder, imms[i], imm_ptr); |
| } |
| } else { |
| /* simply copy the immediate values into the next immediates[] slot */ |
| unsigned i; |
| assert(imm->Immediate.NrTokens - 1 <= 4); |
| assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES); |
| |
| for(i = 0; i < 4; ++i ) |
| bld->immediates[bld->num_immediates][i] = imms[i]; |
| |
| if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { |
| unsigned index = bld->num_immediates; |
| struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
| LLVMBuilderRef builder = gallivm->builder; |
| for (i = 0; i < 4; ++i ) { |
| LLVMValueRef lindex = lp_build_const_int32( |
| bld->bld_base.base.gallivm, index * 4 + i); |
| LLVMValueRef imm_ptr = LLVMBuildGEP(builder, |
| bld->imms_array, &lindex, 1, ""); |
| LLVMBuildStore(builder, |
| bld->immediates[index][i], |
| imm_ptr); |
| } |
| } |
| } |
| |
| bld->num_immediates++; |
| } |
| |
| static void |
| ddx_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_fetch_deriv(bld, emit_data->args[0], NULL, |
| &emit_data->output[emit_data->chan], NULL); |
| } |
| |
| static void |
| ddy_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, |
| &emit_data->output[emit_data->chan]); |
| } |
| |
| static void |
| kill_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_kill(bld, bld_base->pc - 1); |
| } |
| |
| static void |
| kill_if_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_kill_if(bld, emit_data->inst, bld_base->pc - 1); |
| } |
| |
| static void |
| tex_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| tex2_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| txb_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, |
| emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| txb2_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, |
| emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| txd_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, |
| emit_data->output, 3, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| txl_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, |
| emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| txl2_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, |
| emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| txp_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, |
| emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
| } |
| |
| static void |
| tg4_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| emit_data->output, 2, LP_SAMPLER_OP_GATHER); |
| } |
| |
| static void |
| lodq_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| emit_data->output, 1, LP_SAMPLER_OP_LODQ); |
| } |
| |
| static void |
| txq_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_size_query(bld, emit_data->inst, emit_data->output, FALSE); |
| } |
| |
| static void |
| txf_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE); |
| } |
| |
| static void |
| sample_i_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE); |
| } |
| |
| static void |
| sample_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); |
| } |
| |
| static void |
| sample_b_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, |
| FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); |
| } |
| |
| static void |
| sample_c_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); |
| } |
| |
| static void |
| sample_c_lz_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, |
| TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); |
| } |
| |
| static void |
| sample_d_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, |
| FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); |
| } |
| |
| static void |
| sample_l_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, |
| FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); |
| } |
| |
| static void |
| gather4_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| FALSE, LP_SAMPLER_OP_GATHER, emit_data->output); |
| } |
| |
| static void |
| sviewinfo_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); |
| } |
| |
| static void |
| lod_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
| FALSE, LP_SAMPLER_OP_LODQ, emit_data->output); |
| } |
| |
| static LLVMValueRef |
| mask_vec(struct lp_build_tgsi_context *bld_base) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| struct lp_exec_mask *exec_mask = &bld->exec_mask; |
| |
| if (!exec_mask->has_mask) { |
| return lp_build_mask_value(bld->mask); |
| } |
| return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), |
| exec_mask->exec_mask, ""); |
| } |
| |
| static void |
| increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base, |
| LLVMValueRef ptr, |
| LLVMValueRef mask) |
| { |
| LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
| LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); |
| |
| current_vec = LLVMBuildSub(builder, current_vec, mask, ""); |
| |
| LLVMBuildStore(builder, current_vec, ptr); |
| } |
| |
| static void |
| clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base, |
| LLVMValueRef ptr, |
| LLVMValueRef mask) |
| { |
| LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
| LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); |
| |
| current_vec = lp_build_select(&bld_base->uint_bld, |
| mask, |
| bld_base->uint_bld.zero, |
| current_vec); |
| |
| LLVMBuildStore(builder, current_vec, ptr); |
| } |
| |
| static LLVMValueRef |
| clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, |
| LLVMValueRef current_mask_vec, |
| LLVMValueRef total_emitted_vertices_vec) |
| { |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| struct lp_build_context *int_bld = &bld->bld_base.int_bld; |
| LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS, |
| total_emitted_vertices_vec, |
| bld->max_output_vertices_vec); |
| |
| return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); |
| } |
| |
| static void |
| emit_vertex( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| |
| if (bld->gs_iface->emit_vertex) { |
| LLVMValueRef mask = mask_vec(bld_base); |
| LLVMValueRef total_emitted_vertices_vec = |
| LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); |
| mask = clamp_mask_to_max_output_vertices(bld, mask, |
| total_emitted_vertices_vec); |
| gather_outputs(bld); |
| bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base, |
| bld->outputs, |
| total_emitted_vertices_vec); |
| increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, |
| mask); |
| increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, |
| mask); |
| #if DUMP_GS_EMITS |
| lp_build_print_value(bld->bld_base.base.gallivm, |
| " +++ emit vertex masked ones = ", |
| mask); |
| lp_build_print_value(bld->bld_base.base.gallivm, |
| " +++ emit vertex emitted = ", |
| total_emitted_vertices_vec); |
| #endif |
| } |
| } |
| |
| |
| static void |
| end_primitive_masked(struct lp_build_tgsi_context * bld_base, |
| LLVMValueRef mask) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
| |
| if (bld->gs_iface->end_primitive) { |
| struct lp_build_context *uint_bld = &bld_base->uint_bld; |
| LLVMValueRef emitted_vertices_vec = |
| LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); |
| LLVMValueRef emitted_prims_vec = |
| LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); |
| |
| LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, |
| emitted_vertices_vec, |
| uint_bld->zero); |
| /* We need to combine the current execution mask with the mask |
| telling us which, if any, execution slots actually have |
| unemitted primitives, this way we make sure that end_primitives |
| executes only on the paths that have unflushed vertices */ |
| mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); |
| |
| bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base, |
| emitted_vertices_vec, |
| emitted_prims_vec); |
| |
| #if DUMP_GS_EMITS |
| lp_build_print_value(bld->bld_base.base.gallivm, |
| " +++ end prim masked ones = ", |
| mask); |
| lp_build_print_value(bld->bld_base.base.gallivm, |
| " +++ end prim emitted verts1 = ", |
| emitted_vertices_vec); |
| lp_build_print_value(bld->bld_base.base.gallivm, |
| " +++ end prim emitted prims1 = ", |
| LLVMBuildLoad(builder, |
| bld->emitted_prims_vec_ptr, "")); |
| #endif |
| increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, |
| mask); |
| clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, |
| mask); |
| #if DUMP_GS_EMITS |
| lp_build_print_value(bld->bld_base.base.gallivm, |
| " +++ end prim emitted verts2 = ", |
| LLVMBuildLoad(builder, |
| bld->emitted_vertices_vec_ptr, "")); |
| #endif |
| } |
| |
| } |
| |
| static void |
| end_primitive( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| if (bld->gs_iface->end_primitive) { |
| LLVMValueRef mask = mask_vec(bld_base); |
| end_primitive_masked(bld_base, mask); |
| } |
| } |
| |
| static void |
| cal_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, |
| &bld_base->pc); |
| } |
| |
| static void |
| ret_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); |
| } |
| |
| static void |
| brk_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_break(&bld->exec_mask, bld_base); |
| } |
| |
| static void |
| if_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| LLVMValueRef tmp; |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, |
| emit_data->args[0], bld->bld_base.base.zero); |
| lp_exec_mask_cond_push(&bld->exec_mask, tmp); |
| } |
| |
| static void |
| uif_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| LLVMValueRef tmp; |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct lp_build_context *uint_bld = &bld_base->uint_bld; |
| |
| tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, |
| emit_data->args[0], uint_bld->zero); |
| lp_exec_mask_cond_push(&bld->exec_mask, tmp); |
| } |
| |
| static void |
| case_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_case(&bld->exec_mask, emit_data->args[0]); |
| } |
| |
| static void |
| default_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_default(&bld->exec_mask, bld_base); |
| } |
| |
| static void |
| switch_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_switch(&bld->exec_mask, emit_data->args[0]); |
| } |
| |
| static void |
| endswitch_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_endswitch(&bld->exec_mask, bld_base); |
| } |
| |
| static void |
| bgnloop_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_bgnloop(&bld->exec_mask); |
| } |
| |
| static void |
| bgnsub_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_mask_bgnsub(&bld->exec_mask); |
| } |
| |
| static void |
| else_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_mask_cond_invert(&bld->exec_mask); |
| } |
| |
| static void |
| endif_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_mask_cond_pop(&bld->exec_mask); |
| } |
| |
| static void |
| endloop_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); |
| } |
| |
| static void |
| endsub_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); |
| } |
| |
| static void |
| cont_emit( |
| const struct lp_build_tgsi_action * action, |
| struct lp_build_tgsi_context * bld_base, |
| struct lp_build_emit_data * emit_data) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| |
| lp_exec_continue(&bld->exec_mask); |
| } |
| |
| static void emit_prologue(struct lp_build_tgsi_context * bld_base) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| struct gallivm_state * gallivm = bld_base->base.gallivm; |
| |
| if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { |
| LLVMValueRef array_size = |
| lp_build_const_int32(gallivm, |
| bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); |
| bld->temps_array = lp_build_array_alloca(gallivm, |
| bld_base->base.vec_type, array_size, |
| "temp_array"); |
| } |
| |
| if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { |
| LLVMValueRef array_size = |
| lp_build_const_int32(gallivm, |
| bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); |
| bld->outputs_array = lp_build_array_alloca(gallivm, |
| bld_base->base.vec_type, array_size, |
| "output_array"); |
| } |
| |
| if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { |
| LLVMValueRef array_size = |
| lp_build_const_int32(gallivm, |
| bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4); |
| bld->imms_array = lp_build_array_alloca(gallivm, |
| bld_base->base.vec_type, array_size, |
| "imms_array"); |
| } |
| |
| /* If we have indirect addressing in inputs we need to copy them into |
| * our alloca array to be able to iterate over them */ |
| if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) { |
| unsigned index, chan; |
| LLVMTypeRef vec_type = bld_base->base.vec_type; |
| LLVMValueRef array_size = lp_build_const_int32(gallivm, |
| bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); |
| bld->inputs_array = lp_build_array_alloca(gallivm, |
| vec_type, array_size, |
| "input_array"); |
| |
| assert(bld_base->info->num_inputs |
| <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); |
| |
| for (index = 0; index < bld_base->info->num_inputs; ++index) { |
| for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { |
| LLVMValueRef lindex = |
| lp_build_const_int32(gallivm, index * 4 + chan); |
| LLVMValueRef input_ptr = |
| LLVMBuildGEP(gallivm->builder, bld->inputs_array, |
| &lindex, 1, ""); |
| LLVMValueRef value = bld->inputs[index][chan]; |
| if (value) |
| LLVMBuildStore(gallivm->builder, value, input_ptr); |
| } |
| } |
| } |
| |
| if (bld->gs_iface) { |
| struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; |
| bld->emitted_prims_vec_ptr = |
| lp_build_alloca(gallivm, |
| uint_bld->vec_type, |
| "emitted_prims_ptr"); |
| bld->emitted_vertices_vec_ptr = |
| lp_build_alloca(gallivm, |
| uint_bld->vec_type, |
| "emitted_vertices_ptr"); |
| bld->total_emitted_vertices_vec_ptr = |
| lp_build_alloca(gallivm, |
| uint_bld->vec_type, |
| "total_emitted_vertices_ptr"); |
| |
| LLVMBuildStore(gallivm->builder, uint_bld->zero, |
| bld->emitted_prims_vec_ptr); |
| LLVMBuildStore(gallivm->builder, uint_bld->zero, |
| bld->emitted_vertices_vec_ptr); |
| LLVMBuildStore(gallivm->builder, uint_bld->zero, |
| bld->total_emitted_vertices_vec_ptr); |
| } |
| |
| if (DEBUG_EXECUTION) { |
| lp_build_printf(gallivm, "\n"); |
| emit_dump_file(bld, TGSI_FILE_CONSTANT); |
| if (!bld->gs_iface) |
| emit_dump_file(bld, TGSI_FILE_INPUT); |
| } |
| } |
| |
| static void emit_epilogue(struct lp_build_tgsi_context * bld_base) |
| { |
| struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
| LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
| |
| if (DEBUG_EXECUTION) { |
| /* for debugging */ |
| if (0) { |
| emit_dump_file(bld, TGSI_FILE_TEMPORARY); |
| } |
| emit_dump_file(bld, TGSI_FILE_OUTPUT); |
| lp_build_printf(bld_base->base.gallivm, "\n"); |
| } |
| |
| /* If we have indirect addressing in outputs we need to copy our alloca array |
| * to the outputs slots specified by the caller */ |
| if (bld->gs_iface) { |
| LLVMValueRef total_emitted_vertices_vec; |
| LLVMValueRef emitted_prims_vec; |
| /* implicit end_primitives, needed in case there are any unflushed |
| vertices in the cache. Note must not call end_primitive here |
| since the exec_mask is not valid at this point. */ |
| end_primitive_masked(bld_base, lp_build_mask_value(bld->mask)); |
| |
| total_emitted_vertices_vec = |
| LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); |
| emitted_prims_vec = |
| LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); |
| |
| bld->gs_iface->gs_epilogue(bld->gs_iface, |
| &bld->bld_base, |
| total_emitted_vertices_vec, |
| emitted_prims_vec); |
| } else { |
| gather_outputs(bld); |
| } |
| } |
| |
| void |
| lp_build_tgsi_soa(struct gallivm_state *gallivm, |
| const struct tgsi_token *tokens, |
| struct lp_type type, |
| struct lp_build_mask_context *mask, |
| LLVMValueRef consts_ptr, |
| LLVMValueRef const_sizes_ptr, |
| const struct lp_bld_tgsi_system_values *system_values, |
| const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], |
| LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], |
| LLVMValueRef context_ptr, |
| LLVMValueRef thread_data_ptr, |
| struct lp_build_sampler_soa *sampler, |
| const struct tgsi_shader_info *info, |
| const struct lp_build_tgsi_gs_iface *gs_iface) |
| { |
| struct lp_build_tgsi_soa_context bld; |
| |
| struct lp_type res_type; |
| |
| assert(type.length <= LP_MAX_VECTOR_LENGTH); |
| memset(&res_type, 0, sizeof res_type); |
| res_type.width = type.width; |
| res_type.length = type.length; |
| res_type.sign = 1; |
| |
| /* Setup build context */ |
| memset(&bld, 0, sizeof bld); |
| lp_build_context_init(&bld.bld_base.base, gallivm, type); |
| lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); |
| lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); |
| lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); |
| { |
| struct lp_type dbl_type; |
| dbl_type = type; |
| dbl_type.width *= 2; |
| lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); |
| } |
| { |
| struct lp_type uint64_type; |
| uint64_type = lp_uint_type(type); |
| uint64_type.width *= 2; |
| lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type); |
| } |
| { |
| struct lp_type int64_type; |
| int64_type = lp_int_type(type); |
| int64_type.width *= 2; |
| lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type); |
| } |
| bld.mask = mask; |
| bld.inputs = inputs; |
| bld.outputs = outputs; |
| bld.consts_ptr = consts_ptr; |
| bld.const_sizes_ptr = const_sizes_ptr; |
| bld.sampler = sampler; |
| bld.bld_base.info = info; |
| bld.indirect_files = info->indirect_files; |
| bld.context_ptr = context_ptr; |
| bld.thread_data_ptr = thread_data_ptr; |
| |
| /* |
| * If the number of temporaries is rather large then we just |
| * allocate them as an array right from the start and treat |
| * like indirect temporaries. |
| */ |
| if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) { |
| bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY); |
| } |
| /* |
| * For performance reason immediates are always backed in a static |
| * array, but if their number is too great, we have to use just |
| * a dynamically allocated array. |
| */ |
| bld.use_immediates_array = |
| (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES); |
| if (bld.use_immediates_array) { |
| bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE); |
| } |
| |
| |
| bld.bld_base.soa = TRUE; |
| bld.bld_base.emit_debug = emit_debug; |
| bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; |
| bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; |
| bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; |
| bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; |
| bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; |
| bld.bld_base.emit_store = emit_store; |
| |
| bld.bld_base.emit_declaration = lp_emit_declaration_soa; |
| bld.bld_base.emit_immediate = lp_emit_immediate_soa; |
| |
| bld.bld_base.emit_prologue = emit_prologue; |
| bld.bld_base.emit_epilogue = emit_epilogue; |
| |
| /* Set opcode actions */ |
| lp_set_default_actions_cpu(&bld.bld_base); |
| |
| bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit; |
| /* DX10 sampling ops */ |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; |
| bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit; |
| |
| |
| if (gs_iface) { |
| /* There's no specific value for this because it should always |
| * be set, but apps using ext_geometry_shader4 quite often |
| * were forgetting so we're using MAX_VERTEX_VARYING from |
| * that spec even though we could debug_assert if it's not |
| * set, but that's a lot uglier. */ |
| uint max_output_vertices; |
| |
| /* inputs are always indirect with gs */ |
| bld.indirect_files |= (1 << TGSI_FILE_INPUT); |
| bld.gs_iface = gs_iface; |
| bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; |
| bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; |
| bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; |
| |
| max_output_vertices = |
| info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; |
| if (!max_output_vertices) |
| max_output_vertices = 32; |
| |
| bld.max_output_vertices_vec = |
| lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, |
| max_output_vertices); |
| } |
| |
| lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); |
| |
| bld.system_values = *system_values; |
| |
| lp_build_tgsi_llvm(&bld.bld_base, tokens); |
| |
| if (0) { |
| LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); |
| LLVMValueRef function = LLVMGetBasicBlockParent(block); |
| debug_printf("11111111111111111111111111111 \n"); |
| tgsi_dump(tokens, 0); |
| lp_debug_dump_value(function); |
| debug_printf("2222222222222222222222222222 \n"); |
| } |
| |
| if (0) { |
| LLVMModuleRef module = LLVMGetGlobalParent( |
| LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); |
| LLVMDumpModule(module); |
| |
| } |
| lp_exec_mask_fini(&bld.exec_mask); |
| } |