| /* |
| * Copyright (C) 2005 Ben Skeggs. |
| * |
| * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
| * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. |
| * |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining |
| * a copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sublicense, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial |
| * portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
| * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| */ |
| |
| /** |
| * \file |
| * |
| * \author Ben Skeggs <darktama@iinet.net.au> |
| * |
| * \author Jerome Glisse <j.glisse@gmail.com> |
| * |
| * \author Corbin Simpson <MostAwesomeDude@gmail.com> |
| * |
| */ |
| |
| #include "r500_fragprog.h" |
| |
| #include "../r300_reg.h" |
| |
| #include "radeon_program_pair.h" |
| |
| #define PROG_CODE \ |
| struct r500_fragment_program_code *code = &c->code->code.r500 |
| |
| #define error(fmt, args...) do { \ |
| rc_error(&c->Base, "%s::%s(): " fmt "\n", \ |
| __FILE__, __FUNCTION__, ##args); \ |
| } while(0) |
| |
| |
| struct branch_info { |
| int If; |
| int Else; |
| int Endif; |
| }; |
| |
| struct r500_loop_info { |
| int BgnLoop; |
| |
| int BranchDepth; |
| int * Brks; |
| int BrkCount; |
| int BrkReserved; |
| |
| int * Conts; |
| int ContCount; |
| int ContReserved; |
| }; |
| |
| struct emit_state { |
| struct radeon_compiler * C; |
| struct r500_fragment_program_code * Code; |
| |
| struct branch_info * Branches; |
| unsigned int CurrentBranchDepth; |
| unsigned int BranchesReserved; |
| |
| struct r500_loop_info * Loops; |
| unsigned int CurrentLoopDepth; |
| unsigned int LoopsReserved; |
| |
| unsigned int MaxBranchDepth; |
| |
| }; |
| |
| static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) |
| { |
| switch(opcode) { |
| case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; |
| case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; |
| case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; |
| case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; |
| case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; |
| case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; |
| case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; |
| default: |
| error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); |
| /* fall through */ |
| case RC_OPCODE_NOP: |
| /* fall through */ |
| case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; |
| case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; |
| case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; |
| case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; |
| } |
| } |
| |
| static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) |
| { |
| switch(opcode) { |
| case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; |
| case RC_OPCODE_CND: return R500_ALPHA_OP_CND; |
| case RC_OPCODE_COS: return R500_ALPHA_OP_COS; |
| case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; |
| case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; |
| case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; |
| case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; |
| case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; |
| case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; |
| case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; |
| default: |
| error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); |
| /* fall through */ |
| case RC_OPCODE_NOP: |
| /* fall through */ |
| case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; |
| case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; |
| case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; |
| case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; |
| case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; |
| case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; |
| } |
| } |
| |
| static unsigned int fix_hw_swizzle(unsigned int swz) |
| { |
| switch (swz) { |
| case RC_SWIZZLE_ZERO: |
| case RC_SWIZZLE_UNUSED: |
| swz = 4; |
| break; |
| case RC_SWIZZLE_HALF: |
| swz = 5; |
| break; |
| case RC_SWIZZLE_ONE: |
| swz = 6; |
| break; |
| } |
| |
| return swz; |
| } |
| |
| static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) |
| { |
| unsigned int t = inst->RGB.Arg[arg].Source; |
| int comp; |
| t |= inst->RGB.Arg[arg].Negate << 11; |
| t |= inst->RGB.Arg[arg].Abs << 12; |
| |
| for(comp = 0; comp < 3; ++comp) |
| t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); |
| |
| return t; |
| } |
| |
| static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) |
| { |
| unsigned int t = inst->Alpha.Arg[i].Source; |
| t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; |
| t |= inst->Alpha.Arg[i].Negate << 5; |
| t |= inst->Alpha.Arg[i].Abs << 6; |
| return t; |
| } |
| |
| static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) |
| { |
| switch(func) { |
| case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; |
| case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; |
| case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; |
| case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; |
| default: |
| rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); |
| return 0; |
| } |
| } |
| |
| static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) |
| { |
| if (index > code->max_temp_idx) |
| code->max_temp_idx = index; |
| } |
| |
| static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) |
| { |
| /* From docs: |
| * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. |
| * MSB = 1 << 7 */ |
| if (!src.Used) |
| return 1 << 7; |
| |
| if (src.File == RC_FILE_CONSTANT) { |
| return src.Index | R500_RGB_ADDR0_CONST; |
| } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { |
| use_temporary(code, src.Index); |
| return src.Index; |
| } else if (src.File == RC_FILE_INLINE) { |
| return src.Index | (1 << 7); |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * NOP the specified instruction if it is not a texture lookup. |
| */ |
| static void alu_nop(struct r300_fragment_program_compiler *c, int ip) |
| { |
| PROG_CODE; |
| |
| if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { |
| code->inst[ip].inst0 |= R500_INST_NOP; |
| } |
| } |
| |
| /** |
| * Emit a paired ALU instruction. |
| */ |
| static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) |
| { |
| int ip; |
| PROG_CODE; |
| |
| if (code->inst_end >= c->Base.max_alu_insts-1) { |
| error("emit_alu: Too many instructions"); |
| return; |
| } |
| |
| ip = ++code->inst_end; |
| |
| /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ |
| if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || |
| inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { |
| if (ip > 0) { |
| alu_nop(c, ip - 1); |
| } |
| } |
| |
| code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); |
| code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); |
| |
| if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { |
| code->inst[ip].inst0 = R500_INST_TYPE_OUT; |
| if (inst->WriteALUResult) { |
| error("Cannot write output and ALU result at the same time"); |
| return; |
| } |
| } else { |
| code->inst[ip].inst0 = R500_INST_TYPE_ALU; |
| } |
| code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT); |
| |
| code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); |
| code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; |
| code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); |
| if (inst->Nop) { |
| code->inst[ip].inst0 |= R500_INST_NOP; |
| } |
| if (inst->Alpha.DepthWriteMask) { |
| code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; |
| c->code->writes_depth = 1; |
| } |
| |
| code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); |
| code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); |
| use_temporary(code, inst->Alpha.DestIndex); |
| use_temporary(code, inst->RGB.DestIndex); |
| |
| if (inst->RGB.Saturate) |
| code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; |
| if (inst->Alpha.Saturate) |
| code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; |
| |
| /* Set the presubtract operation. */ |
| switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { |
| case RC_PRESUB_BIAS: |
| code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; |
| break; |
| case RC_PRESUB_SUB: |
| code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; |
| break; |
| case RC_PRESUB_ADD: |
| code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; |
| break; |
| case RC_PRESUB_INV: |
| code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; |
| break; |
| default: |
| break; |
| } |
| switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { |
| case RC_PRESUB_BIAS: |
| code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; |
| break; |
| case RC_PRESUB_SUB: |
| code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; |
| break; |
| case RC_PRESUB_ADD: |
| code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; |
| break; |
| case RC_PRESUB_INV: |
| code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; |
| break; |
| default: |
| break; |
| } |
| |
| /* Set the output modifier */ |
| code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT; |
| code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT; |
| |
| code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); |
| code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); |
| code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); |
| |
| code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); |
| code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); |
| code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); |
| |
| code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; |
| code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; |
| code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; |
| |
| code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; |
| code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; |
| code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; |
| |
| code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); |
| code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); |
| |
| if (inst->WriteALUResult) { |
| code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; |
| |
| if (inst->WriteALUResult == RC_ALURESULT_X) |
| code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; |
| else |
| code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; |
| |
| code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); |
| } |
| } |
| |
| static unsigned int translate_strq_swizzle(unsigned int swizzle) |
| { |
| unsigned int swiz = 0; |
| int i; |
| for (i = 0; i < 4; i++) |
| swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; |
| return swiz; |
| } |
| |
| /** |
| * Emit a single TEX instruction |
| */ |
| static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) |
| { |
| int ip; |
| PROG_CODE; |
| |
| if (code->inst_end >= c->Base.max_alu_insts-1) { |
| error("emit_tex: Too many instructions"); |
| return 0; |
| } |
| |
| ip = ++code->inst_end; |
| |
| code->inst[ip].inst0 = R500_INST_TYPE_TEX |
| | (inst->DstReg.WriteMask << 11) |
| | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT); |
| code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) |
| | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT); |
| |
| if (inst->TexSrcTarget == RC_TEXTURE_RECT) |
| code->inst[ip].inst1 |= R500_TEX_UNSCALED; |
| |
| switch (inst->Opcode) { |
| case RC_OPCODE_KIL: |
| code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; |
| break; |
| case RC_OPCODE_TEX: |
| code->inst[ip].inst1 |= R500_TEX_INST_LD; |
| break; |
| case RC_OPCODE_TXB: |
| code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; |
| break; |
| case RC_OPCODE_TXP: |
| code->inst[ip].inst1 |= R500_TEX_INST_PROJ; |
| break; |
| case RC_OPCODE_TXD: |
| code->inst[ip].inst1 |= R500_TEX_INST_DXDY; |
| break; |
| case RC_OPCODE_TXL: |
| code->inst[ip].inst1 |= R500_TEX_INST_LOD; |
| break; |
| default: |
| error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); |
| } |
| |
| use_temporary(code, inst->SrcReg[0].Index); |
| if (inst->Opcode != RC_OPCODE_KIL) |
| use_temporary(code, inst->DstReg.Index); |
| |
| code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) |
| | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) |
| | R500_TEX_DST_ADDR(inst->DstReg.Index) |
| | (GET_SWZ(inst->TexSwizzle, 0) << 24) |
| | (GET_SWZ(inst->TexSwizzle, 1) << 26) |
| | (GET_SWZ(inst->TexSwizzle, 2) << 28) |
| | (GET_SWZ(inst->TexSwizzle, 3) << 30) |
| ; |
| |
| if (inst->Opcode == RC_OPCODE_TXD) { |
| use_temporary(code, inst->SrcReg[1].Index); |
| use_temporary(code, inst->SrcReg[2].Index); |
| |
| /* DX and DY parameters are specified in a separate register. */ |
| code->inst[ip].inst3 = |
| R500_DX_ADDR(inst->SrcReg[1].Index) | |
| (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | |
| R500_DY_ADDR(inst->SrcReg[2].Index) | |
| (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); |
| } |
| |
| return 1; |
| } |
| |
| static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) |
| { |
| unsigned int newip; |
| |
| if (s->Code->inst_end >= s->C->max_alu_insts-1) { |
| rc_error(s->C, "emit_tex: Too many instructions"); |
| return; |
| } |
| |
| newip = ++s->Code->inst_end; |
| |
| /* Currently all loops use the same integer constant to intialize |
| * the loop variables. */ |
| if(!s->Code->int_constants[0]) { |
| s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); |
| s->Code->int_constant_count = 1; |
| } |
| s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; |
| |
| switch(inst->U.I.Opcode){ |
| struct branch_info * branch; |
| struct r500_loop_info * loop; |
| case RC_OPCODE_BGNLOOP: |
| memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, |
| s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); |
| |
| loop = &s->Loops[s->CurrentLoopDepth++]; |
| memset(loop, 0, sizeof(struct r500_loop_info)); |
| loop->BranchDepth = s->CurrentBranchDepth; |
| loop->BgnLoop = newip; |
| |
| s->Code->inst[newip].inst2 = R500_FC_OP_LOOP |
| | R500_FC_JUMP_FUNC(0x00) |
| | R500_FC_IGNORE_UNCOVERED |
| ; |
| break; |
| case RC_OPCODE_BRK: |
| loop = &s->Loops[s->CurrentLoopDepth - 1]; |
| memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, |
| loop->BrkCount, loop->BrkReserved, 1); |
| |
| loop->Brks[loop->BrkCount++] = newip; |
| s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP |
| | R500_FC_JUMP_FUNC(0xff) |
| | R500_FC_B_OP1_DECR |
| | R500_FC_B_POP_CNT( |
| s->CurrentBranchDepth - loop->BranchDepth) |
| | R500_FC_IGNORE_UNCOVERED |
| ; |
| break; |
| |
| case RC_OPCODE_CONT: |
| loop = &s->Loops[s->CurrentLoopDepth - 1]; |
| memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, |
| loop->ContCount, loop->ContReserved, 1); |
| loop->Conts[loop->ContCount++] = newip; |
| s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE |
| | R500_FC_JUMP_FUNC(0xff) |
| | R500_FC_B_OP1_DECR |
| | R500_FC_B_POP_CNT( |
| s->CurrentBranchDepth - loop->BranchDepth) |
| | R500_FC_IGNORE_UNCOVERED |
| ; |
| break; |
| |
| case RC_OPCODE_ENDLOOP: |
| { |
| loop = &s->Loops[s->CurrentLoopDepth - 1]; |
| /* Emit ENDLOOP */ |
| s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP |
| | R500_FC_JUMP_FUNC(0xff) |
| | R500_FC_JUMP_ANY |
| | R500_FC_IGNORE_UNCOVERED |
| ; |
| /* The constant integer at index 0 is used by all loops. */ |
| s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) |
| | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) |
| ; |
| |
| /* Set jump address and int constant for BGNLOOP */ |
| s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) |
| | R500_FC_JUMP_ADDR(newip) |
| ; |
| |
| /* Set jump address for the BRK instructions. */ |
| while(loop->BrkCount--) { |
| s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = |
| R500_FC_JUMP_ADDR(newip + 1); |
| } |
| |
| /* Set jump address for CONT instructions. */ |
| while(loop->ContCount--) { |
| s->Code->inst[loop->Conts[loop->ContCount]].inst3 = |
| R500_FC_JUMP_ADDR(newip); |
| } |
| s->CurrentLoopDepth--; |
| break; |
| } |
| case RC_OPCODE_IF: |
| if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { |
| rc_error(s->C, "Branch depth exceeds hardware limit"); |
| return; |
| } |
| memory_pool_array_reserve(&s->C->Pool, struct branch_info, |
| s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); |
| |
| branch = &s->Branches[s->CurrentBranchDepth++]; |
| branch->If = newip; |
| branch->Else = -1; |
| branch->Endif = -1; |
| |
| if (s->CurrentBranchDepth > s->MaxBranchDepth) |
| s->MaxBranchDepth = s->CurrentBranchDepth; |
| |
| /* actual instruction is filled in at ENDIF time */ |
| break; |
| |
| case RC_OPCODE_ELSE: |
| if (!s->CurrentBranchDepth) { |
| rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); |
| return; |
| } |
| |
| branch = &s->Branches[s->CurrentBranchDepth - 1]; |
| branch->Else = newip; |
| |
| /* actual instruction is filled in at ENDIF time */ |
| break; |
| |
| case RC_OPCODE_ENDIF: |
| if (!s->CurrentBranchDepth) { |
| rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); |
| return; |
| } |
| |
| branch = &s->Branches[s->CurrentBranchDepth - 1]; |
| branch->Endif = newip; |
| |
| s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP |
| | R500_FC_A_OP_NONE /* no address stack */ |
| | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ |
| | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ |
| | R500_FC_B_OP1_NONE /* no branch counter if stay */ |
| | R500_FC_B_POP_CNT(1) |
| ; |
| s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); |
| s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP |
| | R500_FC_A_OP_NONE /* no address stack */ |
| | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ |
| | R500_FC_B_OP0_INCR /* increment branch counter if stay */ |
| | R500_FC_IGNORE_UNCOVERED |
| ; |
| |
| if (branch->Else >= 0) { |
| /* increment branch counter also if jump */ |
| s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; |
| s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); |
| |
| s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP |
| | R500_FC_A_OP_NONE /* no address stack */ |
| | R500_FC_B_ELSE /* all active pixels want to jump */ |
| | R500_FC_B_OP0_NONE /* no counter op if stay */ |
| | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ |
| | R500_FC_B_POP_CNT(1) |
| ; |
| s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); |
| } else { |
| /* don't touch branch counter on jump */ |
| s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; |
| s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); |
| } |
| |
| |
| s->CurrentBranchDepth--; |
| break; |
| default: |
| rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); |
| } |
| } |
| |
| void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) |
| { |
| struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; |
| struct emit_state s; |
| struct r500_fragment_program_code *code = &compiler->code->code.r500; |
| |
| memset(&s, 0, sizeof(s)); |
| s.C = &compiler->Base; |
| s.Code = code; |
| |
| memset(code, 0, sizeof(*code)); |
| code->max_temp_idx = 1; |
| code->inst_end = -1; |
| |
| for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; |
| inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; |
| inst = inst->Next) { |
| if (inst->Type == RC_INSTRUCTION_NORMAL) { |
| const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
| |
| if (opcode->IsFlowControl) { |
| emit_flowcontrol(&s, inst); |
| } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { |
| continue; |
| } else { |
| emit_tex(compiler, &inst->U.I); |
| } |
| } else { |
| emit_paired(compiler, &inst->U.P); |
| } |
| } |
| |
| if (code->max_temp_idx >= compiler->Base.max_temp_regs) |
| rc_error(&compiler->Base, "Too many hardware temporaries used"); |
| |
| if (compiler->Base.Error) |
| return; |
| |
| if (code->inst_end == -1 || |
| (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { |
| int ip; |
| |
| /* This may happen when dead-code elimination is disabled or |
| * when most of the fragment program logic is leading to a KIL */ |
| if (code->inst_end >= compiler->Base.max_alu_insts-1) { |
| rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); |
| return; |
| } |
| |
| ip = ++code->inst_end; |
| code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; |
| } |
| |
| /* Make sure TEX_SEM_WAIT is set on the last instruction */ |
| code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT; |
| |
| /* Enable full flow control mode if we are using loops or have if |
| * statements nested at least four deep. */ |
| if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { |
| if (code->max_temp_idx < 1) |
| code->max_temp_idx = 1; |
| |
| code->us_fc_ctrl |= R500_FC_FULL_FC_EN; |
| } |
| } |