| /* |
| * Copyright 2011 Joakim Sindholt <opensource@zhasha.com> |
| * Copyright 2013 Christoph Bumiller |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * on the rights to use, copy, modify, merge, publish, distribute, sub |
| * license, and/or sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| * USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
| |
| #include "nine_shader.h" |
| |
| #include "device9.h" |
| #include "nine_debug.h" |
| #include "nine_state.h" |
| #include "vertexdeclaration9.h" |
| |
| #include "util/macros.h" |
| #include "util/u_memory.h" |
| #include "util/u_inlines.h" |
| #include "pipe/p_shader_tokens.h" |
| #include "tgsi/tgsi_ureg.h" |
| #include "tgsi/tgsi_dump.h" |
| |
| #define DBG_CHANNEL DBG_SHADER |
| |
| #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args) |
| |
| |
| struct shader_translator; |
| |
| typedef HRESULT (*translate_instruction_func)(struct shader_translator *); |
| |
| static inline const char *d3dsio_to_string(unsigned opcode); |
| |
| |
| #define NINED3D_SM1_VS 0xfffe |
| #define NINED3D_SM1_PS 0xffff |
| |
| #define NINE_MAX_COND_DEPTH 64 |
| #define NINE_MAX_LOOP_DEPTH 64 |
| |
| #define NINED3DSP_END 0x0000ffff |
| |
| #define NINED3DSPTYPE_FLOAT4 0 |
| #define NINED3DSPTYPE_INT4 1 |
| #define NINED3DSPTYPE_BOOL 2 |
| |
| #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1) |
| |
| #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL |
| #define NINED3DSP_WRITEMASK_SHIFT 16 |
| |
| #define NINED3DSHADER_INST_PREDICATED (1 << 28) |
| |
| #define NINED3DSHADER_REL_OP_GT 1 |
| #define NINED3DSHADER_REL_OP_EQ 2 |
| #define NINED3DSHADER_REL_OP_GE 3 |
| #define NINED3DSHADER_REL_OP_LT 4 |
| #define NINED3DSHADER_REL_OP_NE 5 |
| #define NINED3DSHADER_REL_OP_LE 6 |
| |
| #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16 |
| #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT) |
| |
| #define NINED3DSI_TEXLD_PROJECT 0x1 |
| #define NINED3DSI_TEXLD_BIAS 0x2 |
| |
| #define NINED3DSP_WRITEMASK_0 0x1 |
| #define NINED3DSP_WRITEMASK_1 0x2 |
| #define NINED3DSP_WRITEMASK_2 0x4 |
| #define NINED3DSP_WRITEMASK_3 0x8 |
| #define NINED3DSP_WRITEMASK_ALL 0xf |
| |
| #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6)) |
| |
| #define NINE_SWIZZLE4(x,y,z,w) \ |
| TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w |
| |
| #define NINE_CONSTANT_SRC(index) \ |
| ureg_src_register(TGSI_FILE_CONSTANT, index) |
| |
| #define NINE_APPLY_SWIZZLE(src, s) \ |
| ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) |
| |
| #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \ |
| NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s) |
| |
| #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) |
| #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) |
| #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) |
| |
| /* |
| * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4 |
| * BIAS <= PS 1.4 (x-0.5) |
| * BIASNEG <= PS 1.4 (-(x-0.5)) |
| * SIGN <= PS 1.4 (2(x-0.5)) |
| * SIGNNEG <= PS 1.4 (-2(x-0.5)) |
| * COMP <= PS 1.4 (1-x) |
| * X2 = PS 1.4 (2x) |
| * X2NEG = PS 1.4 (-2x) |
| * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11 |
| * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11 |
| * ABS >= SM 3.0 (abs(x)) |
| * ABSNEG >= SM 3.0 (-abs(x)) |
| * NOT >= SM 2.0 pedication only |
| */ |
| #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT) |
| #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT) |
| |
| static const char *sm1_mod_str[] = |
| { |
| [NINED3DSPSM_NONE] = "", |
| [NINED3DSPSM_NEG] = "-", |
| [NINED3DSPSM_BIAS] = "bias", |
| [NINED3DSPSM_BIASNEG] = "biasneg", |
| [NINED3DSPSM_SIGN] = "sign", |
| [NINED3DSPSM_SIGNNEG] = "signneg", |
| [NINED3DSPSM_COMP] = "comp", |
| [NINED3DSPSM_X2] = "x2", |
| [NINED3DSPSM_X2NEG] = "x2neg", |
| [NINED3DSPSM_DZ] = "dz", |
| [NINED3DSPSM_DW] = "dw", |
| [NINED3DSPSM_ABS] = "abs", |
| [NINED3DSPSM_ABSNEG] = "-abs", |
| [NINED3DSPSM_NOT] = "not" |
| }; |
| |
| static void |
| sm1_dump_writemask(BYTE mask) |
| { |
| if (mask & 1) DUMP("x"); else DUMP("_"); |
| if (mask & 2) DUMP("y"); else DUMP("_"); |
| if (mask & 4) DUMP("z"); else DUMP("_"); |
| if (mask & 8) DUMP("w"); else DUMP("_"); |
| } |
| |
| static void |
| sm1_dump_swizzle(BYTE s) |
| { |
| char c[4] = { 'x', 'y', 'z', 'w' }; |
| DUMP("%c%c%c%c", |
| c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]); |
| } |
| |
| static const char sm1_file_char[] = |
| { |
| [D3DSPR_TEMP] = 'r', |
| [D3DSPR_INPUT] = 'v', |
| [D3DSPR_CONST] = 'c', |
| [D3DSPR_ADDR] = 'A', |
| [D3DSPR_RASTOUT] = 'R', |
| [D3DSPR_ATTROUT] = 'D', |
| [D3DSPR_OUTPUT] = 'o', |
| [D3DSPR_CONSTINT] = 'I', |
| [D3DSPR_COLOROUT] = 'C', |
| [D3DSPR_DEPTHOUT] = 'D', |
| [D3DSPR_SAMPLER] = 's', |
| [D3DSPR_CONST2] = 'c', |
| [D3DSPR_CONST3] = 'c', |
| [D3DSPR_CONST4] = 'c', |
| [D3DSPR_CONSTBOOL] = 'B', |
| [D3DSPR_LOOP] = 'L', |
| [D3DSPR_TEMPFLOAT16] = 'h', |
| [D3DSPR_MISCTYPE] = 'M', |
| [D3DSPR_LABEL] = 'X', |
| [D3DSPR_PREDICATE] = 'p' |
| }; |
| |
| static void |
| sm1_dump_reg(BYTE file, INT index) |
| { |
| switch (file) { |
| case D3DSPR_LOOP: |
| DUMP("aL"); |
| break; |
| case D3DSPR_COLOROUT: |
| DUMP("oC%i", index); |
| break; |
| case D3DSPR_DEPTHOUT: |
| DUMP("oDepth"); |
| break; |
| case D3DSPR_RASTOUT: |
| DUMP("oRast%i", index); |
| break; |
| case D3DSPR_CONSTINT: |
| DUMP("iconst[%i]", index); |
| break; |
| case D3DSPR_CONSTBOOL: |
| DUMP("bconst[%i]", index); |
| break; |
| default: |
| DUMP("%c%i", sm1_file_char[file], index); |
| break; |
| } |
| } |
| |
| struct sm1_src_param |
| { |
| INT idx; |
| struct sm1_src_param *rel; |
| BYTE file; |
| BYTE swizzle; |
| BYTE mod; |
| BYTE type; |
| union { |
| DWORD d[4]; |
| float f[4]; |
| int i[4]; |
| BOOL b; |
| } imm; |
| }; |
| static void |
| sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *); |
| |
| struct sm1_dst_param |
| { |
| INT idx; |
| struct sm1_src_param *rel; |
| BYTE file; |
| BYTE mask; |
| BYTE mod; |
| int8_t shift; /* sint4 */ |
| BYTE type; |
| }; |
| |
| static inline void |
| assert_replicate_swizzle(const struct ureg_src *reg) |
| { |
| assert(reg->SwizzleY == reg->SwizzleX && |
| reg->SwizzleZ == reg->SwizzleX && |
| reg->SwizzleW == reg->SwizzleX); |
| } |
| |
| static void |
| sm1_dump_immediate(const struct sm1_src_param *param) |
| { |
| switch (param->type) { |
| case NINED3DSPTYPE_FLOAT4: |
| DUMP("{ %f %f %f %f }", |
| param->imm.f[0], param->imm.f[1], |
| param->imm.f[2], param->imm.f[3]); |
| break; |
| case NINED3DSPTYPE_INT4: |
| DUMP("{ %i %i %i %i }", |
| param->imm.i[0], param->imm.i[1], |
| param->imm.i[2], param->imm.i[3]); |
| break; |
| case NINED3DSPTYPE_BOOL: |
| DUMP("%s", param->imm.b ? "TRUE" : "FALSE"); |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| } |
| |
| static void |
| sm1_dump_src_param(const struct sm1_src_param *param) |
| { |
| if (param->file == NINED3DSPR_IMMEDIATE) { |
| assert(!param->mod && |
| !param->rel && |
| param->swizzle == NINED3DSP_NOSWIZZLE); |
| sm1_dump_immediate(param); |
| return; |
| } |
| |
| if (param->mod) |
| DUMP("%s(", sm1_mod_str[param->mod]); |
| if (param->rel) { |
| DUMP("%c[", sm1_file_char[param->file]); |
| sm1_dump_src_param(param->rel); |
| DUMP("+%i]", param->idx); |
| } else { |
| sm1_dump_reg(param->file, param->idx); |
| } |
| if (param->mod) |
| DUMP(")"); |
| if (param->swizzle != NINED3DSP_NOSWIZZLE) { |
| DUMP("."); |
| sm1_dump_swizzle(param->swizzle); |
| } |
| } |
| |
| static void |
| sm1_dump_dst_param(const struct sm1_dst_param *param) |
| { |
| if (param->mod & NINED3DSPDM_SATURATE) |
| DUMP("sat "); |
| if (param->mod & NINED3DSPDM_PARTIALP) |
| DUMP("pp "); |
| if (param->mod & NINED3DSPDM_CENTROID) |
| DUMP("centroid "); |
| if (param->shift < 0) |
| DUMP("/%u ", 1 << -param->shift); |
| if (param->shift > 0) |
| DUMP("*%u ", 1 << param->shift); |
| |
| if (param->rel) { |
| DUMP("%c[", sm1_file_char[param->file]); |
| sm1_dump_src_param(param->rel); |
| DUMP("+%i]", param->idx); |
| } else { |
| sm1_dump_reg(param->file, param->idx); |
| } |
| if (param->mask != NINED3DSP_WRITEMASK_ALL) { |
| DUMP("."); |
| sm1_dump_writemask(param->mask); |
| } |
| } |
| |
| struct sm1_semantic |
| { |
| struct sm1_dst_param reg; |
| BYTE sampler_type; |
| D3DDECLUSAGE usage; |
| BYTE usage_idx; |
| }; |
| |
| struct sm1_op_info |
| { |
| /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter |
| * should be ignored completely */ |
| unsigned sio; |
| unsigned opcode; /* TGSI_OPCODE_x */ |
| |
| /* versions are still set even handler is set */ |
| struct { |
| unsigned min; |
| unsigned max; |
| } vert_version, frag_version; |
| |
| /* number of regs parsed outside of special handler */ |
| unsigned ndst; |
| unsigned nsrc; |
| |
| /* some instructions don't map perfectly, so use a special handler */ |
| translate_instruction_func handler; |
| }; |
| |
| struct sm1_instruction |
| { |
| D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; |
| BYTE flags; |
| BOOL coissue; |
| BOOL predicated; |
| BYTE ndst; |
| BYTE nsrc; |
| struct sm1_src_param src[4]; |
| struct sm1_src_param src_rel[4]; |
| struct sm1_src_param pred; |
| struct sm1_src_param dst_rel[1]; |
| struct sm1_dst_param dst[1]; |
| |
| struct sm1_op_info *info; |
| }; |
| |
| static void |
| sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent) |
| { |
| unsigned i; |
| |
| /* no info stored for these: */ |
| if (insn->opcode == D3DSIO_DCL) |
| return; |
| for (i = 0; i < indent; ++i) |
| DUMP(" "); |
| |
| if (insn->predicated) { |
| DUMP("@"); |
| sm1_dump_src_param(&insn->pred); |
| DUMP(" "); |
| } |
| DUMP("%s", d3dsio_to_string(insn->opcode)); |
| if (insn->flags) { |
| switch (insn->opcode) { |
| case D3DSIO_TEX: |
| DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b"); |
| break; |
| default: |
| DUMP("_%x", insn->flags); |
| break; |
| } |
| } |
| if (insn->coissue) |
| DUMP("_co"); |
| DUMP(" "); |
| |
| for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) { |
| sm1_dump_dst_param(&insn->dst[i]); |
| DUMP(" "); |
| } |
| |
| for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) { |
| sm1_dump_src_param(&insn->src[i]); |
| DUMP(" "); |
| } |
| if (insn->opcode == D3DSIO_DEF || |
| insn->opcode == D3DSIO_DEFI || |
| insn->opcode == D3DSIO_DEFB) |
| sm1_dump_immediate(&insn->src[0]); |
| |
| DUMP("\n"); |
| } |
| |
| struct sm1_local_const |
| { |
| INT idx; |
| struct ureg_src reg; |
| float f[4]; /* for indirect addressing of float constants */ |
| }; |
| |
| struct shader_translator |
| { |
| const DWORD *byte_code; |
| const DWORD *parse; |
| const DWORD *parse_next; |
| |
| struct ureg_program *ureg; |
| |
| /* shader version */ |
| struct { |
| BYTE major; |
| BYTE minor; |
| } version; |
| unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */ |
| unsigned num_constf_allowed; |
| unsigned num_consti_allowed; |
| unsigned num_constb_allowed; |
| |
| boolean native_integers; |
| boolean inline_subroutines; |
| boolean lower_preds; |
| boolean want_texcoord; |
| boolean shift_wpos; |
| boolean wpos_is_sysval; |
| boolean face_is_sysval_integer; |
| unsigned texcoord_sn; |
| |
| struct sm1_instruction insn; /* current instruction */ |
| |
| struct { |
| struct ureg_dst *r; |
| struct ureg_dst oPos; |
| struct ureg_dst oPos_out; /* the real output when doing streamout */ |
| struct ureg_dst oFog; |
| struct ureg_dst oPts; |
| struct ureg_dst oCol[4]; |
| struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS]; |
| struct ureg_dst oDepth; |
| struct ureg_src v[PIPE_MAX_SHADER_INPUTS]; |
| struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */ |
| struct ureg_src vPos; |
| struct ureg_src vFace; |
| struct ureg_src s; |
| struct ureg_dst p; |
| struct ureg_dst address; |
| struct ureg_dst a0; |
| struct ureg_dst tS[8]; /* texture stage registers */ |
| struct ureg_dst tdst; /* scratch dst if we need extra modifiers */ |
| struct ureg_dst t[5]; /* scratch TEMPs */ |
| struct ureg_src vC[2]; /* PS color in */ |
| struct ureg_src vT[8]; /* PS texcoord in */ |
| struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */ |
| } regs; |
| unsigned num_temp; /* ARRAY_SIZE(regs.r) */ |
| unsigned num_scratch; |
| unsigned loop_depth; |
| unsigned loop_depth_max; |
| unsigned cond_depth; |
| unsigned loop_labels[NINE_MAX_LOOP_DEPTH]; |
| unsigned cond_labels[NINE_MAX_COND_DEPTH]; |
| boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */ |
| |
| unsigned *inst_labels; /* LABEL op */ |
| unsigned num_inst_labels; |
| |
| unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */ |
| |
| struct sm1_local_const *lconstf; |
| unsigned num_lconstf; |
| struct sm1_local_const *lconsti; |
| unsigned num_lconsti; |
| struct sm1_local_const *lconstb; |
| unsigned num_lconstb; |
| |
| boolean indirect_const_access; |
| boolean failure; |
| |
| struct nine_vs_output_info output_info[16]; |
| int num_outputs; |
| |
| struct nine_shader_info *info; |
| |
| int16_t op_info_map[D3DSIO_BREAKP + 1]; |
| }; |
| |
| #define IS_VS (tx->processor == PIPE_SHADER_VERTEX) |
| #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT) |
| |
| #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} |
| |
| static void |
| sm1_read_semantic(struct shader_translator *, struct sm1_semantic *); |
| |
| static void |
| sm1_instruction_check(const struct sm1_instruction *insn) |
| { |
| if (insn->opcode == D3DSIO_CRS) |
| { |
| if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3) |
| { |
| DBG("CRS.mask.w\n"); |
| } |
| } |
| } |
| |
| static void |
| nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex, |
| int mask, int output_index) |
| { |
| tx->output_info[tx->num_outputs].output_semantic = Usage; |
| tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex; |
| tx->output_info[tx->num_outputs].mask = mask; |
| tx->output_info[tx->num_outputs].output_index = output_index; |
| tx->num_outputs++; |
| } |
| |
| static boolean |
| tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) |
| { |
| INT i; |
| |
| if (index < 0 || index >= tx->num_constf_allowed) { |
| tx->failure = TRUE; |
| return FALSE; |
| } |
| for (i = 0; i < tx->num_lconstf; ++i) { |
| if (tx->lconstf[i].idx == index) { |
| *src = tx->lconstf[i].reg; |
| return TRUE; |
| } |
| } |
| return FALSE; |
| } |
| static boolean |
| tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) |
| { |
| int i; |
| |
| if (index < 0 || index >= tx->num_consti_allowed) { |
| tx->failure = TRUE; |
| return FALSE; |
| } |
| for (i = 0; i < tx->num_lconsti; ++i) { |
| if (tx->lconsti[i].idx == index) { |
| *src = tx->lconsti[i].reg; |
| return TRUE; |
| } |
| } |
| return FALSE; |
| } |
| static boolean |
| tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) |
| { |
| int i; |
| |
| if (index < 0 || index >= tx->num_constb_allowed) { |
| tx->failure = TRUE; |
| return FALSE; |
| } |
| for (i = 0; i < tx->num_lconstb; ++i) { |
| if (tx->lconstb[i].idx == index) { |
| *src = tx->lconstb[i].reg; |
| return TRUE; |
| } |
| } |
| return FALSE; |
| } |
| |
| static void |
| tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) |
| { |
| unsigned n; |
| |
| FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed) |
| |
| for (n = 0; n < tx->num_lconstf; ++n) |
| if (tx->lconstf[n].idx == index) |
| break; |
| if (n == tx->num_lconstf) { |
| if ((n % 8) == 0) { |
| tx->lconstf = REALLOC(tx->lconstf, |
| (n + 0) * sizeof(tx->lconstf[0]), |
| (n + 8) * sizeof(tx->lconstf[0])); |
| assert(tx->lconstf); |
| } |
| tx->num_lconstf++; |
| } |
| tx->lconstf[n].idx = index; |
| tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]); |
| |
| memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f)); |
| } |
| static void |
| tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) |
| { |
| unsigned n; |
| |
| FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) |
| |
| for (n = 0; n < tx->num_lconsti; ++n) |
| if (tx->lconsti[n].idx == index) |
| break; |
| if (n == tx->num_lconsti) { |
| if ((n % 8) == 0) { |
| tx->lconsti = REALLOC(tx->lconsti, |
| (n + 0) * sizeof(tx->lconsti[0]), |
| (n + 8) * sizeof(tx->lconsti[0])); |
| assert(tx->lconsti); |
| } |
| tx->num_lconsti++; |
| } |
| |
| tx->lconsti[n].idx = index; |
| tx->lconsti[n].reg = tx->native_integers ? |
| ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : |
| ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]); |
| } |
| static void |
| tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) |
| { |
| unsigned n; |
| |
| FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) |
| |
| for (n = 0; n < tx->num_lconstb; ++n) |
| if (tx->lconstb[n].idx == index) |
| break; |
| if (n == tx->num_lconstb) { |
| if ((n % 8) == 0) { |
| tx->lconstb = REALLOC(tx->lconstb, |
| (n + 0) * sizeof(tx->lconstb[0]), |
| (n + 8) * sizeof(tx->lconstb[0])); |
| assert(tx->lconstb); |
| } |
| tx->num_lconstb++; |
| } |
| |
| tx->lconstb[n].idx = index; |
| tx->lconstb[n].reg = tx->native_integers ? |
| ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : |
| ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f); |
| } |
| |
| static inline struct ureg_dst |
| tx_scratch(struct shader_translator *tx) |
| { |
| if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) { |
| tx->failure = TRUE; |
| return tx->regs.t[0]; |
| } |
| if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch])) |
| tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg); |
| return tx->regs.t[tx->num_scratch++]; |
| } |
| |
| static inline struct ureg_dst |
| tx_scratch_scalar(struct shader_translator *tx) |
| { |
| return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); |
| } |
| |
| static inline struct ureg_src |
| tx_src_scalar(struct ureg_dst dst) |
| { |
| struct ureg_src src = ureg_src(dst); |
| int c = ffs(dst.WriteMask) - 1; |
| if (dst.WriteMask == (1 << c)) |
| src = ureg_scalar(src, c); |
| return src; |
| } |
| |
| static inline void |
| tx_temp_alloc(struct shader_translator *tx, INT idx) |
| { |
| assert(idx >= 0); |
| if (idx >= tx->num_temp) { |
| unsigned k = tx->num_temp; |
| unsigned n = idx + 1; |
| tx->regs.r = REALLOC(tx->regs.r, |
| k * sizeof(tx->regs.r[0]), |
| n * sizeof(tx->regs.r[0])); |
| for (; k < n; ++k) |
| tx->regs.r[k] = ureg_dst_undef(); |
| tx->num_temp = n; |
| } |
| if (ureg_dst_is_undef(tx->regs.r[idx])) |
| tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg); |
| } |
| |
| static inline void |
| tx_addr_alloc(struct shader_translator *tx, INT idx) |
| { |
| assert(idx == 0); |
| if (ureg_dst_is_undef(tx->regs.address)) |
| tx->regs.address = ureg_DECL_address(tx->ureg); |
| if (ureg_dst_is_undef(tx->regs.a0)) |
| tx->regs.a0 = ureg_DECL_temporary(tx->ureg); |
| } |
| |
| static inline void |
| tx_pred_alloc(struct shader_translator *tx, INT idx) |
| { |
| assert(idx == 0); |
| if (ureg_dst_is_undef(tx->regs.p)) |
| tx->regs.p = ureg_DECL_predicate(tx->ureg); |
| } |
| |
| /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions |
| * the projection should be applied on the texture. It doesn't |
| * apply on texkill. |
| * The doc is very imprecise here (it says the projection is done |
| * before rasterization, thus in vs, which seems wrong since ps instructions |
| * are affected differently) |
| * For now we only apply to the ps TEX instruction and TEXBEM. |
| * Perhaps some other instructions would need it */ |
| static inline void |
| apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, |
| struct ureg_src src, INT idx) |
| { |
| struct ureg_dst tmp; |
| unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); |
| |
| /* no projection */ |
| if (dim == 1) { |
| ureg_MOV(tx->ureg, dst, src); |
| } else { |
| tmp = tx_scratch_scalar(tx); |
| ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1)); |
| ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src); |
| } |
| } |
| |
| static inline void |
| TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, |
| unsigned target, struct ureg_src src0, |
| struct ureg_src src1, INT idx) |
| { |
| unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); |
| struct ureg_dst tmp; |
| |
| /* dim == 1: no projection |
| * Looks like must be disabled when it makes no |
| * sense according the texture dimensions |
| */ |
| if (dim == 1 || dim <= target) { |
| ureg_TEX(tx->ureg, dst, target, src0, src1); |
| } else if (dim == 4) { |
| ureg_TXP(tx->ureg, dst, target, src0, src1); |
| } else { |
| tmp = tx_scratch(tx); |
| apply_ps1x_projection(tx, tmp, src0, idx); |
| ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1); |
| } |
| } |
| |
| static inline void |
| tx_texcoord_alloc(struct shader_translator *tx, INT idx) |
| { |
| assert(IS_PS); |
| assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT)); |
| if (ureg_src_is_undef(tx->regs.vT[idx])) |
| tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx, |
| TGSI_INTERPOLATE_PERSPECTIVE); |
| } |
| |
| static inline unsigned * |
| tx_bgnloop(struct shader_translator *tx) |
| { |
| tx->loop_depth++; |
| if (tx->loop_depth_max < tx->loop_depth) |
| tx->loop_depth_max = tx->loop_depth; |
| assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH); |
| return &tx->loop_labels[tx->loop_depth - 1]; |
| } |
| |
| static inline unsigned * |
| tx_endloop(struct shader_translator *tx) |
| { |
| assert(tx->loop_depth); |
| tx->loop_depth--; |
| ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth], |
| ureg_get_instruction_number(tx->ureg)); |
| return &tx->loop_labels[tx->loop_depth]; |
| } |
| |
| static struct ureg_dst |
| tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep) |
| { |
| const unsigned l = tx->loop_depth - 1; |
| |
| if (!tx->loop_depth) |
| { |
| DBG("loop counter requested outside of loop\n"); |
| return ureg_dst_undef(); |
| } |
| |
| if (ureg_dst_is_undef(tx->regs.rL[l])) { |
| /* loop or rep ctr creation */ |
| tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg); |
| tx->loop_or_rep[l] = loop_or_rep; |
| } |
| /* loop - rep - endloop - endrep not allowed */ |
| assert(tx->loop_or_rep[l] == loop_or_rep); |
| |
| return tx->regs.rL[l]; |
| } |
| |
| static struct ureg_src |
| tx_get_loopal(struct shader_translator *tx) |
| { |
| int loop_level = tx->loop_depth - 1; |
| |
| while (loop_level >= 0) { |
| /* handle loop - rep - endrep - endloop case */ |
| if (tx->loop_or_rep[loop_level]) |
| /* the value is in the loop counter y component (nine implementation) */ |
| return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y); |
| loop_level--; |
| } |
| |
| DBG("aL counter requested outside of loop\n"); |
| return ureg_src_undef(); |
| } |
| |
| static inline unsigned * |
| tx_cond(struct shader_translator *tx) |
| { |
| assert(tx->cond_depth <= NINE_MAX_COND_DEPTH); |
| tx->cond_depth++; |
| return &tx->cond_labels[tx->cond_depth - 1]; |
| } |
| |
| static inline unsigned * |
| tx_elsecond(struct shader_translator *tx) |
| { |
| assert(tx->cond_depth); |
| return &tx->cond_labels[tx->cond_depth - 1]; |
| } |
| |
| static inline void |
| tx_endcond(struct shader_translator *tx) |
| { |
| assert(tx->cond_depth); |
| tx->cond_depth--; |
| ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth], |
| ureg_get_instruction_number(tx->ureg)); |
| } |
| |
| static inline struct ureg_dst |
| nine_ureg_dst_register(unsigned file, int index) |
| { |
| return ureg_dst(ureg_src_register(file, index)); |
| } |
| |
| static inline struct ureg_src |
| nine_get_position_input(struct shader_translator *tx) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| |
| if (tx->wpos_is_sysval) |
| return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); |
| else |
| return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, |
| 0, TGSI_INTERPOLATE_LINEAR); |
| } |
| |
| static struct ureg_src |
| tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_src src; |
| struct ureg_dst tmp; |
| |
| switch (param->file) |
| { |
| case D3DSPR_TEMP: |
| assert(!param->rel); |
| tx_temp_alloc(tx, param->idx); |
| src = ureg_src(tx->regs.r[param->idx]); |
| break; |
| /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ |
| case D3DSPR_ADDR: |
| assert(!param->rel); |
| if (IS_VS) { |
| assert(param->idx == 0); |
| /* the address register (vs only) must be |
| * assigned before use */ |
| assert(!ureg_dst_is_undef(tx->regs.a0)); |
| /* Round to lowest for vs1.1 (contrary to the doc), else |
| * round to nearest */ |
| if (tx->version.major < 2 && tx->version.minor < 2) |
| ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0)); |
| else |
| ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0)); |
| src = ureg_src(tx->regs.address); |
| } else { |
| if (tx->version.major < 2 && tx->version.minor < 4) { |
| /* no subroutines, so should be defined */ |
| src = ureg_src(tx->regs.tS[param->idx]); |
| } else { |
| tx_texcoord_alloc(tx, param->idx); |
| src = tx->regs.vT[param->idx]; |
| } |
| } |
| break; |
| case D3DSPR_INPUT: |
| if (IS_VS) { |
| src = ureg_src_register(TGSI_FILE_INPUT, param->idx); |
| } else { |
| if (tx->version.major < 3) { |
| assert(!param->rel); |
| src = ureg_DECL_fs_input_cyl_centroid( |
| ureg, TGSI_SEMANTIC_COLOR, param->idx, |
| TGSI_INTERPOLATE_COLOR, 0, |
| tx->info->force_color_in_centroid ? |
| TGSI_INTERPOLATE_LOC_CENTROID : 0, |
| 0, 1); |
| } else { |
| if(param->rel) { |
| /* Copy all inputs (non consecutive) |
| * to temp array (consecutive). |
| * This is not good for performance. |
| * A better way would be to have inputs |
| * consecutive (would need implement alternative |
| * way to match vs outputs and ps inputs). |
| * However even with the better way, the temp array |
| * copy would need to be used if some inputs |
| * are not GENERIC or if they have different |
| * interpolation flag. */ |
| if (ureg_src_is_undef(tx->regs.v_consecutive)) { |
| int i; |
| tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0)); |
| for (i = 0; i < 10; i++) { |
| if (!ureg_src_is_undef(tx->regs.v[i])) |
| ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]); |
| else |
| ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); |
| } |
| } |
| src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx); |
| } else { |
| assert(param->idx < ARRAY_SIZE(tx->regs.v)); |
| src = tx->regs.v[param->idx]; |
| } |
| } |
| } |
| break; |
| case D3DSPR_PREDICATE: |
| assert(!param->rel); |
| tx_pred_alloc(tx, param->idx); |
| src = ureg_src(tx->regs.p); |
| break; |
| case D3DSPR_SAMPLER: |
| assert(param->mod == NINED3DSPSM_NONE); |
| assert(param->swizzle == NINED3DSP_NOSWIZZLE); |
| assert(!param->rel); |
| src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx); |
| break; |
| case D3DSPR_CONST: |
| assert(!param->rel || IS_VS); |
| if (param->rel) |
| tx->indirect_const_access = TRUE; |
| if (param->rel || !tx_lconstf(tx, &src, param->idx)) { |
| if (!param->rel) |
| nine_info_mark_const_f_used(tx->info, param->idx); |
| /* vswp constant handling: we use two buffers |
| * to fit all the float constants. The special handling |
| * doesn't need to be elsewhere, because all the instructions |
| * accessing the constants directly are VS1, and swvp |
| * is VS >= 2 */ |
| if (IS_VS && tx->info->swvp_on) { |
| if (!param->rel) { |
| if (param->idx < 4096) { |
| src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); |
| src = ureg_src_dimension(src, 0); |
| } else { |
| src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096); |
| src = ureg_src_dimension(src, 1); |
| } |
| } else { |
| src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */ |
| src = ureg_src_dimension(src, 0); |
| } |
| } else |
| src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); |
| } |
| if (!IS_VS && tx->version.major < 2) { |
| /* ps 1.X clamps constants */ |
| tmp = tx_scratch(tx); |
| ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f)); |
| ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); |
| src = ureg_src(tmp); |
| } |
| break; |
| case D3DSPR_CONST2: |
| case D3DSPR_CONST3: |
| case D3DSPR_CONST4: |
| DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n"); |
| assert(!"CONST2/3/4"); |
| src = ureg_imm1f(ureg, 0.0f); |
| break; |
| case D3DSPR_CONSTINT: |
| /* relative adressing only possible for float constants in vs */ |
| assert(!param->rel); |
| if (!tx_lconsti(tx, &src, param->idx)) { |
| nine_info_mark_const_i_used(tx->info, param->idx); |
| if (IS_VS && tx->info->swvp_on) { |
| src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); |
| src = ureg_src_dimension(src, 2); |
| } else |
| src = ureg_src_register(TGSI_FILE_CONSTANT, |
| tx->info->const_i_base + param->idx); |
| } |
| break; |
| case D3DSPR_CONSTBOOL: |
| assert(!param->rel); |
| if (!tx_lconstb(tx, &src, param->idx)) { |
| char r = param->idx / 4; |
| char s = param->idx & 3; |
| nine_info_mark_const_b_used(tx->info, param->idx); |
| if (IS_VS && tx->info->swvp_on) { |
| src = ureg_src_register(TGSI_FILE_CONSTANT, r); |
| src = ureg_src_dimension(src, 3); |
| } else |
| src = ureg_src_register(TGSI_FILE_CONSTANT, |
| tx->info->const_b_base + r); |
| src = ureg_swizzle(src, s, s, s, s); |
| } |
| break; |
| case D3DSPR_LOOP: |
| if (ureg_dst_is_undef(tx->regs.address)) |
| tx->regs.address = ureg_DECL_address(ureg); |
| if (!tx->native_integers) |
| ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx)); |
| else |
| ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx)); |
| src = ureg_src(tx->regs.address); |
| break; |
| case D3DSPR_MISCTYPE: |
| switch (param->idx) { |
| case D3DSMO_POSITION: |
| if (ureg_src_is_undef(tx->regs.vPos)) |
| tx->regs.vPos = nine_get_position_input(tx); |
| if (tx->shift_wpos) { |
| /* TODO: do this only once */ |
| struct ureg_dst wpos = tx_scratch(tx); |
| ureg_SUB(ureg, wpos, tx->regs.vPos, |
| ureg_imm4f(ureg, 0.5f, 0.5f, 0.0f, 0.0f)); |
| src = ureg_src(wpos); |
| } else { |
| src = tx->regs.vPos; |
| } |
| break; |
| case D3DSMO_FACE: |
| if (ureg_src_is_undef(tx->regs.vFace)) { |
| if (tx->face_is_sysval_integer) { |
| tmp = tx_scratch(tx); |
| tx->regs.vFace = |
| ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0); |
| |
| /* convert bool to float */ |
| ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X), |
| ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1)); |
| tx->regs.vFace = ureg_src(tmp); |
| } else { |
| tx->regs.vFace = ureg_DECL_fs_input(ureg, |
| TGSI_SEMANTIC_FACE, 0, |
| TGSI_INTERPOLATE_CONSTANT); |
| } |
| tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X); |
| } |
| src = tx->regs.vFace; |
| break; |
| default: |
| assert(!"invalid src D3DSMO"); |
| break; |
| } |
| assert(!param->rel); |
| break; |
| case D3DSPR_TEMPFLOAT16: |
| break; |
| default: |
| assert(!"invalid src D3DSPR"); |
| } |
| if (param->rel) |
| src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); |
| |
| switch (param->mod) { |
| case NINED3DSPSM_DW: |
| tmp = tx_scratch(tx); |
| /* NOTE: app is not allowed to read w with this modifier */ |
| ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src); |
| ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W))); |
| src = ureg_src(tmp); |
| break; |
| case NINED3DSPSM_DZ: |
| tmp = tx_scratch(tx); |
| /* NOTE: app is not allowed to read z with this modifier */ |
| ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src); |
| ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z))); |
| src = ureg_src(tmp); |
| break; |
| default: |
| break; |
| } |
| |
| if (param->swizzle != NINED3DSP_NOSWIZZLE) |
| src = ureg_swizzle(src, |
| (param->swizzle >> 0) & 0x3, |
| (param->swizzle >> 2) & 0x3, |
| (param->swizzle >> 4) & 0x3, |
| (param->swizzle >> 6) & 0x3); |
| |
| switch (param->mod) { |
| case NINED3DSPSM_ABS: |
| src = ureg_abs(src); |
| break; |
| case NINED3DSPSM_ABSNEG: |
| src = ureg_negate(ureg_abs(src)); |
| break; |
| case NINED3DSPSM_NEG: |
| src = ureg_negate(src); |
| break; |
| case NINED3DSPSM_BIAS: |
| tmp = tx_scratch(tx); |
| ureg_SUB(ureg, tmp, src, ureg_imm1f(ureg, 0.5f)); |
| src = ureg_src(tmp); |
| break; |
| case NINED3DSPSM_BIASNEG: |
| tmp = tx_scratch(tx); |
| ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 0.5f), src); |
| src = ureg_src(tmp); |
| break; |
| case NINED3DSPSM_NOT: |
| if (tx->native_integers) { |
| tmp = tx_scratch(tx); |
| ureg_NOT(ureg, tmp, src); |
| src = ureg_src(tmp); |
| break; |
| } |
| /* fall through */ |
| case NINED3DSPSM_COMP: |
| tmp = tx_scratch(tx); |
| ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), src); |
| src = ureg_src(tmp); |
| break; |
| case NINED3DSPSM_DZ: |
| case NINED3DSPSM_DW: |
| /* Already handled*/ |
| break; |
| case NINED3DSPSM_SIGN: |
| tmp = tx_scratch(tx); |
| ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); |
| src = ureg_src(tmp); |
| break; |
| case NINED3DSPSM_SIGNNEG: |
| tmp = tx_scratch(tx); |
| ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f)); |
| src = ureg_src(tmp); |
| break; |
| case NINED3DSPSM_X2: |
| tmp = tx_scratch(tx); |
| ureg_ADD(ureg, tmp, src, src); |
| src = ureg_src(tmp); |
| break; |
| case NINED3DSPSM_X2NEG: |
| tmp = tx_scratch(tx); |
| ureg_ADD(ureg, tmp, src, src); |
| src = ureg_negate(ureg_src(tmp)); |
| break; |
| default: |
| assert(param->mod == NINED3DSPSM_NONE); |
| break; |
| } |
| |
| return src; |
| } |
| |
| static struct ureg_dst |
| _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) |
| { |
| struct ureg_dst dst; |
| |
| switch (param->file) |
| { |
| case D3DSPR_TEMP: |
| assert(!param->rel); |
| tx_temp_alloc(tx, param->idx); |
| dst = tx->regs.r[param->idx]; |
| break; |
| /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ |
| case D3DSPR_ADDR: |
| assert(!param->rel); |
| if (tx->version.major < 2 && !IS_VS) { |
| if (ureg_dst_is_undef(tx->regs.tS[param->idx])) |
| tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg); |
| dst = tx->regs.tS[param->idx]; |
| } else |
| if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */ |
| tx_texcoord_alloc(tx, param->idx); |
| dst = ureg_dst(tx->regs.vT[param->idx]); |
| } else { |
| tx_addr_alloc(tx, param->idx); |
| dst = tx->regs.a0; |
| } |
| break; |
| case D3DSPR_RASTOUT: |
| assert(!param->rel); |
| switch (param->idx) { |
| case 0: |
| if (ureg_dst_is_undef(tx->regs.oPos)) |
| tx->regs.oPos = |
| ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); |
| dst = tx->regs.oPos; |
| break; |
| case 1: |
| if (ureg_dst_is_undef(tx->regs.oFog)) |
| tx->regs.oFog = |
| ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0)); |
| dst = tx->regs.oFog; |
| break; |
| case 2: |
| if (ureg_dst_is_undef(tx->regs.oPts)) |
| tx->regs.oPts = ureg_DECL_temporary(tx->ureg); |
| dst = tx->regs.oPts; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| break; |
| /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */ |
| case D3DSPR_OUTPUT: |
| if (tx->version.major < 3) { |
| assert(!param->rel); |
| dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx); |
| } else { |
| assert(!param->rel); /* TODO */ |
| assert(param->idx < ARRAY_SIZE(tx->regs.o)); |
| dst = tx->regs.o[param->idx]; |
| } |
| break; |
| case D3DSPR_ATTROUT: /* VS */ |
| case D3DSPR_COLOROUT: /* PS */ |
| assert(param->idx >= 0 && param->idx < 4); |
| assert(!param->rel); |
| tx->info->rt_mask |= 1 << param->idx; |
| if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { |
| /* ps < 3: oCol[0] will have fog blending afterward */ |
| if (!IS_VS && tx->version.major < 3 && param->idx == 0) { |
| tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); |
| } else { |
| tx->regs.oCol[param->idx] = |
| ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); |
| } |
| } |
| dst = tx->regs.oCol[param->idx]; |
| if (IS_VS && tx->version.major < 3) |
| dst = ureg_saturate(dst); |
| break; |
| case D3DSPR_DEPTHOUT: |
| assert(!param->rel); |
| if (ureg_dst_is_undef(tx->regs.oDepth)) |
| tx->regs.oDepth = |
| ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0, |
| TGSI_WRITEMASK_Z, 0, 1); |
| dst = tx->regs.oDepth; /* XXX: must write .z component */ |
| break; |
| case D3DSPR_PREDICATE: |
| assert(!param->rel); |
| tx_pred_alloc(tx, param->idx); |
| dst = tx->regs.p; |
| break; |
| case D3DSPR_TEMPFLOAT16: |
| DBG("unhandled D3DSPR: %u\n", param->file); |
| break; |
| default: |
| assert(!"invalid dst D3DSPR"); |
| break; |
| } |
| if (param->rel) |
| dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel)); |
| |
| if (param->mask != NINED3DSP_WRITEMASK_ALL) |
| dst = ureg_writemask(dst, param->mask); |
| if (param->mod & NINED3DSPDM_SATURATE) |
| dst = ureg_saturate(dst); |
| |
| return dst; |
| } |
| |
| static struct ureg_dst |
| tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) |
| { |
| if (param->shift) { |
| tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask); |
| return tx->regs.tdst; |
| } |
| return _tx_dst_param(tx, param); |
| } |
| |
| static void |
| tx_apply_dst0_modifiers(struct shader_translator *tx) |
| { |
| struct ureg_dst rdst; |
| float f; |
| |
| if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL) |
| return; |
| rdst = _tx_dst_param(tx, &tx->insn.dst[0]); |
| |
| assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */ |
| |
| if (tx->insn.dst[0].shift < 0) |
| f = 1.0f / (1 << -tx->insn.dst[0].shift); |
| else |
| f = 1 << tx->insn.dst[0].shift; |
| |
| ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f)); |
| } |
| |
| static struct ureg_src |
| tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param) |
| { |
| struct ureg_src src; |
| |
| assert(!param->shift); |
| assert(!(param->mod & NINED3DSPDM_SATURATE)); |
| |
| switch (param->file) { |
| case D3DSPR_INPUT: |
| if (IS_VS) { |
| src = ureg_src_register(TGSI_FILE_INPUT, param->idx); |
| } else { |
| assert(!param->rel); |
| assert(param->idx < ARRAY_SIZE(tx->regs.v)); |
| src = tx->regs.v[param->idx]; |
| } |
| break; |
| default: |
| src = ureg_src(tx_dst_param(tx, param)); |
| break; |
| } |
| if (param->rel) |
| src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); |
| |
| if (!param->mask) |
| WARN("mask is 0, using identity swizzle\n"); |
| |
| if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) { |
| char s[4]; |
| int n; |
| int c; |
| for (n = 0, c = 0; c < 4; ++c) |
| if (param->mask & (1 << c)) |
| s[n++] = c; |
| assert(n); |
| for (c = n; c < 4; ++c) |
| s[c] = s[n - 1]; |
| src = ureg_swizzle(src, s[0], s[1], s[2], s[3]); |
| } |
| return src; |
| } |
| |
| static HRESULT |
| NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst; |
| struct ureg_src src[2]; |
| struct sm1_src_param *src_mat = &tx->insn.src[1]; |
| unsigned i; |
| |
| dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| src[0] = tx_src_param(tx, &tx->insn.src[0]); |
| |
| for (i = 0; i < n; i++) |
| { |
| const unsigned m = (1 << i); |
| |
| src[1] = tx_src_param(tx, src_mat); |
| src_mat->idx++; |
| |
| if (!(dst.WriteMask & m)) |
| continue; |
| |
| /* XXX: src == dst case ? */ |
| |
| switch (k) { |
| case 3: |
| ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]); |
| break; |
| case 4: |
| ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]); |
| break; |
| default: |
| DBG("invalid operation: M%ux%u\n", m, n); |
| break; |
| } |
| } |
| |
| return D3D_OK; |
| } |
| |
| #define VNOTSUPPORTED 0, 0 |
| #define V(maj, min) (((maj) << 8) | (min)) |
| |
| static inline const char * |
| d3dsio_to_string( unsigned opcode ) |
| { |
| static const char *names[] = { |
| "NOP", |
| "MOV", |
| "ADD", |
| "SUB", |
| "MAD", |
| "MUL", |
| "RCP", |
| "RSQ", |
| "DP3", |
| "DP4", |
| "MIN", |
| "MAX", |
| "SLT", |
| "SGE", |
| "EXP", |
| "LOG", |
| "LIT", |
| "DST", |
| "LRP", |
| "FRC", |
| "M4x4", |
| "M4x3", |
| "M3x4", |
| "M3x3", |
| "M3x2", |
| "CALL", |
| "CALLNZ", |
| "LOOP", |
| "RET", |
| "ENDLOOP", |
| "LABEL", |
| "DCL", |
| "POW", |
| "CRS", |
| "SGN", |
| "ABS", |
| "NRM", |
| "SINCOS", |
| "REP", |
| "ENDREP", |
| "IF", |
| "IFC", |
| "ELSE", |
| "ENDIF", |
| "BREAK", |
| "BREAKC", |
| "MOVA", |
| "DEFB", |
| "DEFI", |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| "TEXCOORD", |
| "TEXKILL", |
| "TEX", |
| "TEXBEM", |
| "TEXBEML", |
| "TEXREG2AR", |
| "TEXREG2GB", |
| "TEXM3x2PAD", |
| "TEXM3x2TEX", |
| "TEXM3x3PAD", |
| "TEXM3x3TEX", |
| NULL, |
| "TEXM3x3SPEC", |
| "TEXM3x3VSPEC", |
| "EXPP", |
| "LOGP", |
| "CND", |
| "DEF", |
| "TEXREG2RGB", |
| "TEXDP3TEX", |
| "TEXM3x2DEPTH", |
| "TEXDP3", |
| "TEXM3x3", |
| "TEXDEPTH", |
| "CMP", |
| "BEM", |
| "DP2ADD", |
| "DSX", |
| "DSY", |
| "TEXLDD", |
| "SETP", |
| "TEXLDL", |
| "BREAKP" |
| }; |
| |
| if (opcode < ARRAY_SIZE(names)) return names[opcode]; |
| |
| switch (opcode) { |
| case D3DSIO_PHASE: return "PHASE"; |
| case D3DSIO_COMMENT: return "COMMENT"; |
| case D3DSIO_END: return "END"; |
| default: |
| return NULL; |
| } |
| } |
| |
| #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL } |
| #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \ |
| (inst).vert_version.max | \ |
| (inst).frag_version.min | \ |
| (inst).frag_version.max) |
| |
| #define SPECIAL(name) \ |
| NineTranslateInstruction_##name |
| |
| #define DECL_SPECIAL(name) \ |
| static HRESULT \ |
| NineTranslateInstruction_##name( struct shader_translator *tx ) |
| |
| static HRESULT |
| NineTranslateInstruction_Generic(struct shader_translator *); |
| |
| DECL_SPECIAL(NOP) |
| { |
| /* Nothing to do. NOP was used to avoid hangs |
| * with very old d3d drivers. */ |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(M4x4) |
| { |
| return NineTranslateInstruction_Mkxn(tx, 4, 4); |
| } |
| |
| DECL_SPECIAL(M4x3) |
| { |
| return NineTranslateInstruction_Mkxn(tx, 4, 3); |
| } |
| |
| DECL_SPECIAL(M3x4) |
| { |
| return NineTranslateInstruction_Mkxn(tx, 3, 4); |
| } |
| |
| DECL_SPECIAL(M3x3) |
| { |
| return NineTranslateInstruction_Mkxn(tx, 3, 3); |
| } |
| |
| DECL_SPECIAL(M3x2) |
| { |
| return NineTranslateInstruction_Mkxn(tx, 3, 2); |
| } |
| |
| DECL_SPECIAL(CMP) |
| { |
| ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]), |
| tx_src_param(tx, &tx->insn.src[0]), |
| tx_src_param(tx, &tx->insn.src[2]), |
| tx_src_param(tx, &tx->insn.src[1])); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(CND) |
| { |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_dst cgt; |
| struct ureg_src cnd; |
| |
| /* the coissue flag was a tip for compilers to advise to |
| * execute two operations at the same time, in cases |
| * the two executions had same dst with different channels. |
| * It has no effect on current hw. However it seems CND |
| * is affected. The handling of this very specific case |
| * handled below mimick wine behaviour */ |
| if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) { |
| ureg_MOV(tx->ureg, |
| dst, tx_src_param(tx, &tx->insn.src[1])); |
| return D3D_OK; |
| } |
| |
| cnd = tx_src_param(tx, &tx->insn.src[0]); |
| cgt = tx_scratch(tx); |
| |
| if (tx->version.major == 1 && tx->version.minor < 4) |
| cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W); |
| |
| ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f)); |
| |
| ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)), |
| tx_src_param(tx, &tx->insn.src[1]), |
| tx_src_param(tx, &tx->insn.src[2])); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(CALL) |
| { |
| assert(tx->insn.src[0].idx < tx->num_inst_labels); |
| ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(CALLNZ) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); |
| |
| if (!tx->native_integers) |
| ureg_IF(ureg, src, tx_cond(tx)); |
| else |
| ureg_UIF(ureg, src, tx_cond(tx)); |
| ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]); |
| tx_endcond(tx); |
| ureg_ENDIF(ureg); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(LOOP) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| unsigned *label; |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); |
| struct ureg_dst ctr; |
| struct ureg_dst tmp; |
| struct ureg_src ctrx; |
| |
| label = tx_bgnloop(tx); |
| ctr = tx_get_loopctr(tx, TRUE); |
| ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); |
| |
| /* src: num_iterations - start_value of al - step for al - 0 */ |
| ureg_MOV(ureg, ctr, src); |
| ureg_BGNLOOP(tx->ureg, label); |
| tmp = tx_scratch_scalar(tx); |
| /* Initially ctr.x contains the number of iterations. |
| * ctr.y will contain the updated value of al. |
| * We decrease ctr.x at the end of every iteration, |
| * and stop when it reaches 0. */ |
| |
| if (!tx->native_integers) { |
| /* case src and ctr contain floats */ |
| /* to avoid precision issue, we stop when ctr <= 0.5 */ |
| ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); |
| ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); |
| } else { |
| /* case src and ctr contain integers */ |
| ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); |
| ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); |
| } |
| ureg_BRK(ureg); |
| tx_endcond(tx); |
| ureg_ENDIF(ureg); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(RET) |
| { |
| ureg_RET(tx->ureg); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(ENDLOOP) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst ctr = tx_get_loopctr(tx, TRUE); |
| struct ureg_dst dst_ctrx, dst_al; |
| struct ureg_src src_ctr, al_counter; |
| |
| dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); |
| dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1); |
| src_ctr = ureg_src(ctr); |
| al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z); |
| |
| /* ctr.x -= 1 |
| * ctr.y (aL) += step */ |
| if (!tx->native_integers) { |
| ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); |
| ureg_ADD(ureg, dst_al, src_ctr, al_counter); |
| } else { |
| ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); |
| ureg_UADD(ureg, dst_al, src_ctr, al_counter); |
| } |
| ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(LABEL) |
| { |
| unsigned k = tx->num_inst_labels; |
| unsigned n = tx->insn.src[0].idx; |
| assert(n < 2048); |
| if (n >= k) |
| tx->inst_labels = REALLOC(tx->inst_labels, |
| k * sizeof(tx->inst_labels[0]), |
| n * sizeof(tx->inst_labels[0])); |
| |
| tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(SINCOS) |
| { |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| |
| assert(!(dst.WriteMask & 0xc)); |
| |
| dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */ |
| ureg_SCS(tx->ureg, dst, src); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(SGN) |
| { |
| ureg_SSG(tx->ureg, |
| tx_dst_param(tx, &tx->insn.dst[0]), |
| tx_src_param(tx, &tx->insn.src[0])); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(REP) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| unsigned *label; |
| struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]); |
| struct ureg_dst ctr; |
| struct ureg_dst tmp; |
| struct ureg_src ctrx; |
| |
| label = tx_bgnloop(tx); |
| ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0); |
| ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); |
| |
| /* NOTE: rep must be constant, so we don't have to save the count */ |
| assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE); |
| |
| /* rep: num_iterations - 0 - 0 - 0 */ |
| ureg_MOV(ureg, ctr, rep); |
| ureg_BGNLOOP(ureg, label); |
| tmp = tx_scratch_scalar(tx); |
| /* Initially ctr.x contains the number of iterations. |
| * We decrease ctr.x at the end of every iteration, |
| * and stop when it reaches 0. */ |
| |
| if (!tx->native_integers) { |
| /* case src and ctr contain floats */ |
| /* to avoid precision issue, we stop when ctr <= 0.5 */ |
| ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); |
| ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); |
| } else { |
| /* case src and ctr contain integers */ |
| ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); |
| ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); |
| } |
| ureg_BRK(ureg); |
| tx_endcond(tx); |
| ureg_ENDIF(ureg); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(ENDREP) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst ctr = tx_get_loopctr(tx, FALSE); |
| struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); |
| struct ureg_src src_ctr = ureg_src(ctr); |
| |
| /* ctr.x -= 1 */ |
| if (!tx->native_integers) |
| ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); |
| else |
| ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); |
| |
| ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(ENDIF) |
| { |
| tx_endcond(tx); |
| ureg_ENDIF(tx->ureg); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(IF) |
| { |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| |
| if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL) |
| ureg_UIF(tx->ureg, src, tx_cond(tx)); |
| else |
| ureg_IF(tx->ureg, src, tx_cond(tx)); |
| |
| return D3D_OK; |
| } |
| |
| static inline unsigned |
| sm1_insn_flags_to_tgsi_setop(BYTE flags) |
| { |
| switch (flags) { |
| case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT; |
| case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ; |
| case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE; |
| case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT; |
| case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE; |
| case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE; |
| default: |
| assert(!"invalid comparison flags"); |
| return TGSI_OPCODE_SGT; |
| } |
| } |
| |
| DECL_SPECIAL(IFC) |
| { |
| const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); |
| struct ureg_src src[2]; |
| struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); |
| src[0] = tx_src_param(tx, &tx->insn.src[0]); |
| src[1] = tx_src_param(tx, &tx->insn.src[1]); |
| ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2); |
| ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(ELSE) |
| { |
| ureg_ELSE(tx->ureg, tx_elsecond(tx)); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(BREAKC) |
| { |
| const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); |
| struct ureg_src src[2]; |
| struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); |
| src[0] = tx_src_param(tx, &tx->insn.src[0]); |
| src[1] = tx_src_param(tx, &tx->insn.src[1]); |
| ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2); |
| ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); |
| ureg_BRK(tx->ureg); |
| tx_endcond(tx); |
| ureg_ENDIF(tx->ureg); |
| return D3D_OK; |
| } |
| |
| static const char *sm1_declusage_names[] = |
| { |
| [D3DDECLUSAGE_POSITION] = "POSITION", |
| [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT", |
| [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES", |
| [D3DDECLUSAGE_NORMAL] = "NORMAL", |
| [D3DDECLUSAGE_PSIZE] = "PSIZE", |
| [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD", |
| [D3DDECLUSAGE_TANGENT] = "TANGENT", |
| [D3DDECLUSAGE_BINORMAL] = "BINORMAL", |
| [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR", |
| [D3DDECLUSAGE_POSITIONT] = "POSITIONT", |
| [D3DDECLUSAGE_COLOR] = "COLOR", |
| [D3DDECLUSAGE_FOG] = "FOG", |
| [D3DDECLUSAGE_DEPTH] = "DEPTH", |
| [D3DDECLUSAGE_SAMPLE] = "SAMPLE" |
| }; |
| |
| static inline unsigned |
| sm1_to_nine_declusage(struct sm1_semantic *dcl) |
| { |
| return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx); |
| } |
| |
| static void |
| sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, |
| boolean tc, |
| struct sm1_semantic *dcl) |
| { |
| BYTE index = dcl->usage_idx; |
| |
| /* For everything that is not matching to a TGSI_SEMANTIC_****, |
| * we match to a TGSI_SEMANTIC_GENERIC with index. |
| * |
| * The index can be anything UINT16 and usage_idx is BYTE, |
| * so we can fit everything. It doesn't matter if indices |
| * are close together or low. |
| * |
| * |
| * POSITION >= 1: 10 * index + 6 |
| * COLOR >= 2: 10 * (index-1) + 7 |
| * TEXCOORD[0..15]: index |
| * BLENDWEIGHT: 10 * index + 18 |
| * BLENDINDICES: 10 * index + 19 |
| * NORMAL: 10 * index + 20 |
| * TANGENT: 10 * index + 21 |
| * BINORMAL: 10 * index + 22 |
| * TESSFACTOR: 10 * index + 23 |
| */ |
| |
| switch (dcl->usage) { |
| case D3DDECLUSAGE_POSITION: |
| case D3DDECLUSAGE_POSITIONT: |
| case D3DDECLUSAGE_DEPTH: |
| if (index == 0) { |
| sem->Name = TGSI_SEMANTIC_POSITION; |
| sem->Index = 0; |
| } else { |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * index + 6; |
| } |
| break; |
| case D3DDECLUSAGE_COLOR: |
| if (index < 2) { |
| sem->Name = TGSI_SEMANTIC_COLOR; |
| sem->Index = index; |
| } else { |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * (index-1) + 7; |
| } |
| break; |
| case D3DDECLUSAGE_FOG: |
| assert(index == 0); |
| sem->Name = TGSI_SEMANTIC_FOG; |
| sem->Index = 0; |
| break; |
| case D3DDECLUSAGE_PSIZE: |
| assert(index == 0); |
| sem->Name = TGSI_SEMANTIC_PSIZE; |
| sem->Index = 0; |
| break; |
| case D3DDECLUSAGE_TEXCOORD: |
| assert(index < 16); |
| if (index < 8 && tc) |
| sem->Name = TGSI_SEMANTIC_TEXCOORD; |
| else |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = index; |
| break; |
| case D3DDECLUSAGE_BLENDWEIGHT: |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * index + 18; |
| break; |
| case D3DDECLUSAGE_BLENDINDICES: |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * index + 19; |
| break; |
| case D3DDECLUSAGE_NORMAL: |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * index + 20; |
| break; |
| case D3DDECLUSAGE_TANGENT: |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * index + 21; |
| break; |
| case D3DDECLUSAGE_BINORMAL: |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * index + 22; |
| break; |
| case D3DDECLUSAGE_TESSFACTOR: |
| sem->Name = TGSI_SEMANTIC_GENERIC; |
| sem->Index = 10 * index + 23; |
| break; |
| case D3DDECLUSAGE_SAMPLE: |
| sem->Name = TGSI_SEMANTIC_COUNT; |
| sem->Index = 0; |
| break; |
| default: |
| unreachable("Invalid DECLUSAGE."); |
| break; |
| } |
| } |
| |
| #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT) |
| #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT) |
| #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT) |
| #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT) |
| static inline unsigned |
| d3dstt_to_tgsi_tex(BYTE sampler_type) |
| { |
| switch (sampler_type) { |
| case NINED3DSTT_1D: return TGSI_TEXTURE_1D; |
| case NINED3DSTT_2D: return TGSI_TEXTURE_2D; |
| case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D; |
| case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE; |
| default: |
| assert(0); |
| return TGSI_TEXTURE_UNKNOWN; |
| } |
| } |
| static inline unsigned |
| d3dstt_to_tgsi_tex_shadow(BYTE sampler_type) |
| { |
| switch (sampler_type) { |
| case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D; |
| case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D; |
| case NINED3DSTT_VOLUME: |
| case NINED3DSTT_CUBE: |
| default: |
| assert(0); |
| return TGSI_TEXTURE_UNKNOWN; |
| } |
| } |
| static inline unsigned |
| ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage) |
| { |
| switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) { |
| case 1: return TGSI_TEXTURE_1D; |
| case 0: return TGSI_TEXTURE_2D; |
| case 3: return TGSI_TEXTURE_3D; |
| default: |
| return TGSI_TEXTURE_CUBE; |
| } |
| } |
| |
| static const char * |
| sm1_sampler_type_name(BYTE sampler_type) |
| { |
| switch (sampler_type) { |
| case NINED3DSTT_1D: return "1D"; |
| case NINED3DSTT_2D: return "2D"; |
| case NINED3DSTT_VOLUME: return "VOLUME"; |
| case NINED3DSTT_CUBE: return "CUBE"; |
| default: |
| return "(D3DSTT_?)"; |
| } |
| } |
| |
| static inline unsigned |
| nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem) |
| { |
| switch (sem->Name) { |
| case TGSI_SEMANTIC_POSITION: |
| case TGSI_SEMANTIC_NORMAL: |
| return TGSI_INTERPOLATE_LINEAR; |
| case TGSI_SEMANTIC_BCOLOR: |
| case TGSI_SEMANTIC_COLOR: |
| return TGSI_INTERPOLATE_COLOR; |
| case TGSI_SEMANTIC_FOG: |
| case TGSI_SEMANTIC_GENERIC: |
| case TGSI_SEMANTIC_TEXCOORD: |
| case TGSI_SEMANTIC_CLIPDIST: |
| case TGSI_SEMANTIC_CLIPVERTEX: |
| return TGSI_INTERPOLATE_PERSPECTIVE; |
| case TGSI_SEMANTIC_EDGEFLAG: |
| case TGSI_SEMANTIC_FACE: |
| case TGSI_SEMANTIC_INSTANCEID: |
| case TGSI_SEMANTIC_PCOORD: |
| case TGSI_SEMANTIC_PRIMID: |
| case TGSI_SEMANTIC_PSIZE: |
| case TGSI_SEMANTIC_VERTEXID: |
| return TGSI_INTERPOLATE_CONSTANT; |
| default: |
| assert(0); |
| return TGSI_INTERPOLATE_CONSTANT; |
| } |
| } |
| |
| DECL_SPECIAL(DCL) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| boolean is_input; |
| boolean is_sampler; |
| struct tgsi_declaration_semantic tgsi; |
| struct sm1_semantic sem; |
| sm1_read_semantic(tx, &sem); |
| |
| is_input = sem.reg.file == D3DSPR_INPUT; |
| is_sampler = |
| sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER; |
| |
| DUMP("DCL "); |
| sm1_dump_dst_param(&sem.reg); |
| if (is_sampler) |
| DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type)); |
| else |
| if (tx->version.major >= 3) |
| DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx); |
| else |
| if (sem.usage | sem.usage_idx) |
| DUMP(" %u[%u]\n", sem.usage, sem.usage_idx); |
| else |
| DUMP("\n"); |
| |
| if (is_sampler) { |
| const unsigned m = 1 << sem.reg.idx; |
| ureg_DECL_sampler(ureg, sem.reg.idx); |
| tx->info->sampler_mask |= m; |
| tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ? |
| d3dstt_to_tgsi_tex_shadow(sem.sampler_type) : |
| d3dstt_to_tgsi_tex(sem.sampler_type); |
| return D3D_OK; |
| } |
| |
| sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem); |
| if (IS_VS) { |
| if (is_input) { |
| /* linkage outside of shader with vertex declaration */ |
| ureg_DECL_vs_input(ureg, sem.reg.idx); |
| assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map)); |
| tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem); |
| tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1); |
| /* NOTE: preserving order in case of indirect access */ |
| } else |
| if (tx->version.major >= 3) { |
| /* SM2 output semantic determined by file */ |
| assert(sem.reg.mask != 0); |
| if (sem.usage == D3DDECLUSAGE_POSITIONT) |
| tx->info->position_t = TRUE; |
| assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o)); |
| assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing"); |
| tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( |
| ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); |
| nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx); |
| if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) { |
| tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; |
| tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); |
| tx->regs.oPos = tx->regs.o[sem.reg.idx]; |
| } |
| |
| if (tgsi.Name == TGSI_SEMANTIC_PSIZE) { |
| tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); |
| tx->regs.oPts = tx->regs.o[sem.reg.idx]; |
| } |
| } |
| } else { |
| if (is_input && tx->version.major >= 3) { |
| unsigned interp_location = 0; |
| /* SM3 only, SM2 input semantic determined by file */ |
| assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v)); |
| assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing"); |
| /* PositionT and tessfactor forbidden */ |
| if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR) |
| return D3DERR_INVALIDCALL; |
| |
| if (tgsi.Name == TGSI_SEMANTIC_POSITION) { |
| /* Position0 is forbidden (likely because vPos already does that) */ |
| if (sem.usage == D3DDECLUSAGE_POSITION) |
| return D3DERR_INVALIDCALL; |
| /* Following code is for depth */ |
| tx->regs.v[sem.reg.idx] = nine_get_position_input(tx); |
| return D3D_OK; |
| } |
| |
| if (sem.reg.mod & NINED3DSPDM_CENTROID || |
| (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid)) |
| interp_location = TGSI_INTERPOLATE_LOC_CENTROID; |
| |
| tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid( |
| ureg, tgsi.Name, tgsi.Index, |
| nine_tgsi_to_interp_mode(&tgsi), |
| 0, /* cylwrap */ |
| interp_location, 0, 1); |
| } else |
| if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ |
| /* FragColor or FragDepth */ |
| assert(sem.reg.mask != 0); |
| ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask, |
| 0, 1); |
| } |
| } |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(DEF) |
| { |
| tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(DEFB) |
| { |
| tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(DEFI) |
| { |
| tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(POW) |
| { |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src[2] = { |
| tx_src_param(tx, &tx->insn.src[0]), |
| tx_src_param(tx, &tx->insn.src[1]) |
| }; |
| ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(RSQ) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| struct ureg_dst tmp = tx_scratch(tx); |
| ureg_RSQ(ureg, tmp, ureg_abs(src)); |
| ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(LOG) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst tmp = tx_scratch_scalar(tx); |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| ureg_LG2(ureg, tmp, ureg_abs(src)); |
| ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp)); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(LIT) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst tmp = tx_scratch(tx); |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| ureg_LIT(ureg, tmp, src); |
| /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 |
| * states that dst.z is 0 when src.y <= 0. Gallium definition can assign |
| * it 0^0 if src.w=0, which value is driver dependent. */ |
| ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), |
| ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), |
| ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); |
| ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(NRM) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst tmp = tx_scratch_scalar(tx); |
| struct ureg_src nrm = tx_src_scalar(tmp); |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| ureg_DP3(ureg, tmp, src, src); |
| ureg_RSQ(ureg, tmp, nrm); |
| ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm); |
| ureg_MUL(ureg, dst, src, nrm); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(DP2ADD) |
| { |
| struct ureg_dst tmp = tx_scratch_scalar(tx); |
| struct ureg_src dp2 = tx_src_scalar(tmp); |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src[3]; |
| int i; |
| for (i = 0; i < 3; ++i) |
| src[i] = tx_src_param(tx, &tx->insn.src[i]); |
| assert_replicate_swizzle(&src[2]); |
| |
| ureg_DP2(tx->ureg, tmp, src[0], src[1]); |
| ureg_ADD(tx->ureg, dst, src[2], dp2); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXCOORD) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| const unsigned s = tx->insn.dst[0].idx; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| |
| tx_texcoord_alloc(tx, s); |
| ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]); |
| ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f)); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXCOORD_ps14) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| |
| assert(tx->insn.src[0].file == D3DSPR_TEXTURE); |
| |
| ureg_MOV(ureg, dst, src); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXKILL) |
| { |
| struct ureg_src reg; |
| |
| if (tx->version.major > 1 || tx->version.minor > 3) { |
| reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]); |
| } else { |
| tx_texcoord_alloc(tx, tx->insn.dst[0].idx); |
| reg = tx->regs.vT[tx->insn.dst[0].idx]; |
| } |
| if (tx->version.major < 2) |
| reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z)); |
| ureg_KILL_IF(tx->ureg, reg); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXBEM) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_dst tmp, tmp2, texcoord; |
| struct ureg_src sample, m00, m01, m10, m11; |
| struct ureg_src bumpenvlscale, bumpenvloffset; |
| const int m = tx->insn.dst[0].idx; |
| const int n = tx->insn.src[0].idx; |
| |
| assert(tx->version.major == 1); |
| |
| sample = ureg_DECL_sampler(ureg, m); |
| tx->info->sampler_mask |= 1 << m; |
| |
| tx_texcoord_alloc(tx, m); |
| |
| tmp = tx_scratch(tx); |
| tmp2 = tx_scratch(tx); |
| texcoord = tx_scratch(tx); |
| /* |
| * Bump-env-matrix: |
| * 00 is X |
| * 01 is Y |
| * 10 is Z |
| * 11 is W |
| */ |
| nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2); |
| m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); |
| m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); |
| m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); |
| m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); |
| |
| /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ |
| if (m % 2 == 0) { |
| bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X); |
| bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y); |
| } else { |
| bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z); |
| bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W); |
| } |
| |
| apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m); |
| |
| /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, |
| NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); |
| /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, |
| NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), |
| NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); |
| |
| /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, |
| NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); |
| /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, |
| NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), |
| NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); |
| |
| /* Now the texture coordinates are in tmp.xy */ |
| |
| if (tx->insn.opcode == D3DSIO_TEXBEM) { |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); |
| } else if (tx->insn.opcode == D3DSIO_TEXBEML) { |
| /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ |
| ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); |
| ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z), |
| bumpenvlscale, bumpenvloffset); |
| ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); |
| } |
| |
| tx->info->bumpenvmat_needed = 1; |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXREG2AR) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src sample; |
| const int m = tx->insn.dst[0].idx; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| sample = ureg_DECL_sampler(ureg, m); |
| tx->info->sampler_mask |= 1 << m; |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXREG2GB) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src sample; |
| const int m = tx->insn.dst[0].idx; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| sample = ureg_DECL_sampler(ureg, m); |
| tx->info->sampler_mask |= 1 << m; |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXM3x2PAD) |
| { |
| return D3D_OK; /* this is just padding */ |
| } |
| |
| DECL_SPECIAL(TEXM3x2TEX) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src sample; |
| const int m = tx->insn.dst[0].idx - 1; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| tx_texcoord_alloc(tx, m); |
| tx_texcoord_alloc(tx, m+1); |
| |
| /* performs the matrix multiplication */ |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); |
| |
| sample = ureg_DECL_sampler(ureg, m + 1); |
| tx->info->sampler_mask |= 1 << (m + 1); |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXM3x3PAD) |
| { |
| return D3D_OK; /* this is just padding */ |
| } |
| |
| DECL_SPECIAL(TEXM3x3SPEC) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]); |
| struct ureg_src sample; |
| struct ureg_dst tmp; |
| const int m = tx->insn.dst[0].idx - 2; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| tx_texcoord_alloc(tx, m); |
| tx_texcoord_alloc(tx, m+1); |
| tx_texcoord_alloc(tx, m+2); |
| |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n])); |
| |
| sample = ureg_DECL_sampler(ureg, m + 2); |
| tx->info->sampler_mask |= 1 << (m + 2); |
| tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); |
| |
| /* At this step, dst = N = (u', w', z'). |
| * We want dst to be the texture sampled at (u'', w'', z''), with |
| * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ |
| ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); |
| ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); |
| /* at this step tmp.x = 1/N.N */ |
| ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E); |
| /* at this step tmp.y = N.E */ |
| ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); |
| /* at this step tmp.x = N.E/N.N */ |
| ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); |
| ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); |
| /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ |
| ureg_SUB(ureg, tmp, ureg_src(tmp), E); |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXREG2RGB) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src sample; |
| const int m = tx->insn.dst[0].idx; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| sample = ureg_DECL_sampler(ureg, m); |
| tx->info->sampler_mask |= 1 << m; |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXDP3TEX) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_dst tmp; |
| struct ureg_src sample; |
| const int m = tx->insn.dst[0].idx; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| tx_texcoord_alloc(tx, m); |
| |
| tmp = tx_scratch(tx); |
| ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); |
| ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f)); |
| |
| sample = ureg_DECL_sampler(ureg, m); |
| tx->info->sampler_mask |= 1 << m; |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXM3x2DEPTH) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst tmp; |
| const int m = tx->insn.dst[0].idx - 1; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| tx_texcoord_alloc(tx, m); |
| tx_texcoord_alloc(tx, m+1); |
| |
| tmp = tx_scratch(tx); |
| |
| /* performs the matrix multiplication */ |
| ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); |
| ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); |
| |
| ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); |
| /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */ |
| ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z)); |
| /* res = 'w' == 0 ? 1.0 : z/w */ |
| ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))), |
| ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f)); |
| /* replace the depth for depth testing with the result */ |
| tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, |
| TGSI_WRITEMASK_Z, 0, 1); |
| ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); |
| /* note that we write nothing to the destination, since it's disallowed to use it afterward */ |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXDP3) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| const int m = tx->insn.dst[0].idx; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| tx_texcoord_alloc(tx, m); |
| |
| ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n])); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXM3x3) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src sample; |
| struct ureg_dst E, tmp; |
| const int m = tx->insn.dst[0].idx - 2; |
| const int n = tx->insn.src[0].idx; |
| assert(m >= 0 && m > n); |
| |
| tx_texcoord_alloc(tx, m); |
| tx_texcoord_alloc(tx, m+1); |
| tx_texcoord_alloc(tx, m+2); |
| |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); |
| ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n])); |
| |
| switch (tx->insn.opcode) { |
| case D3DSIO_TEXM3x3: |
| ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); |
| break; |
| case D3DSIO_TEXM3x3TEX: |
| sample = ureg_DECL_sampler(ureg, m + 2); |
| tx->info->sampler_mask |= 1 << (m + 2); |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample); |
| break; |
| case D3DSIO_TEXM3x3VSPEC: |
| sample = ureg_DECL_sampler(ureg, m + 2); |
| tx->info->sampler_mask |= 1 << (m + 2); |
| E = tx_scratch(tx); |
| tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); |
| ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W)); |
| ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W)); |
| ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W)); |
| /* At this step, dst = N = (u', w', z'). |
| * We want dst to be the texture sampled at (u'', w'', z''), with |
| * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ |
| ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); |
| ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); |
| /* at this step tmp.x = 1/N.N */ |
| ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E)); |
| /* at this step tmp.y = N.E */ |
| ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); |
| /* at this step tmp.x = N.E/N.N */ |
| ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); |
| ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); |
| /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ |
| ureg_SUB(ureg, tmp, ureg_src(tmp), ureg_src(E)); |
| ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); |
| break; |
| default: |
| return D3DERR_INVALIDCALL; |
| } |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXDEPTH) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst r5; |
| struct ureg_src r5r, r5g; |
| |
| assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */ |
| |
| /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g. |
| * r5 won't be used afterward, thus we can use r5.ba */ |
| r5 = tx->regs.r[5]; |
| r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X); |
| r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y); |
| |
| ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g); |
| ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z)); |
| /* r5.r = r/g */ |
| ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)), |
| r5r, ureg_imm1f(ureg, 1.0f)); |
| /* replace the depth for depth testing with the result */ |
| tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, |
| TGSI_WRITEMASK_Z, 0, 1); |
| ureg_MOV(ureg, tx->regs.oDepth, r5r); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(BEM) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); |
| struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); |
| struct ureg_src m00, m01, m10, m11; |
| const int m = tx->insn.dst[0].idx; |
| struct ureg_dst tmp; |
| /* |
| * Bump-env-matrix: |
| * 00 is X |
| * 01 is Y |
| * 10 is Z |
| * 11 is W |
| */ |
| nine_info_mark_const_f_used(tx->info, 8 + m); |
| m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); |
| m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); |
| m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); |
| m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); |
| /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, |
| NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); |
| /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, |
| NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); |
| |
| /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, |
| NINE_APPLY_SWIZZLE(src1, X), src0); |
| /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ |
| ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, |
| NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); |
| ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); |
| |
| tx->info->bumpenvmat_needed = 1; |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXLD) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| unsigned target; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src[2] = { |
| tx_src_param(tx, &tx->insn.src[0]), |
| tx_src_param(tx, &tx->insn.src[1]) |
| }; |
| assert(tx->insn.src[1].idx >= 0 && |
| tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); |
| target = tx->sampler_targets[tx->insn.src[1].idx]; |
| |
| switch (tx->insn.flags) { |
| case 0: |
| ureg_TEX(ureg, dst, target, src[0], src[1]); |
| break; |
| case NINED3DSI_TEXLD_PROJECT: |
| ureg_TXP(ureg, dst, target, src[0], src[1]); |
| break; |
| case NINED3DSI_TEXLD_BIAS: |
| ureg_TXB(ureg, dst, target, src[0], src[1]); |
| break; |
| default: |
| assert(0); |
| return D3DERR_INVALIDCALL; |
| } |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXLD_14) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); |
| const unsigned s = tx->insn.dst[0].idx; |
| const unsigned t = ps1x_sampler_type(tx->info, s); |
| |
| tx->info->sampler_mask |= 1 << s; |
| ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s)); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEX) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| const unsigned s = tx->insn.dst[0].idx; |
| const unsigned t = ps1x_sampler_type(tx->info, s); |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src[2]; |
| |
| tx_texcoord_alloc(tx, s); |
| |
| src[0] = tx->regs.vT[s]; |
| src[1] = ureg_DECL_sampler(ureg, s); |
| tx->info->sampler_mask |= 1 << s; |
| |
| TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s); |
| |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXLDD) |
| { |
| unsigned target; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src[4] = { |
| tx_src_param(tx, &tx->insn.src[0]), |
| tx_src_param(tx, &tx->insn.src[1]), |
| tx_src_param(tx, &tx->insn.src[2]), |
| tx_src_param(tx, &tx->insn.src[3]) |
| }; |
| assert(tx->insn.src[1].idx >= 0 && |
| tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); |
| target = tx->sampler_targets[tx->insn.src[1].idx]; |
| |
| ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(TEXLDL) |
| { |
| unsigned target; |
| struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); |
| struct ureg_src src[2] = { |
| tx_src_param(tx, &tx->insn.src[0]), |
| tx_src_param(tx, &tx->insn.src[1]) |
| }; |
| assert(tx->insn.src[1].idx >= 0 && |
| tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); |
| target = tx->sampler_targets[tx->insn.src[1].idx]; |
| |
| ureg_TXL(tx->ureg, dst, target, src[0], src[1]); |
| return D3D_OK; |
| } |
| |
| DECL_SPECIAL(SETP) |
| { |
| STUB(D3DERR_INVALIDCALL); |
| } |
| |
| DECL_SPECIAL(BREAKP) |
| { |
| STUB(D3DERR_INVALIDCALL); |
| } |
| |
| DECL_SPECIAL(PHASE) |
| { |
| return D3D_OK; /* we don't care about phase */ |
| } |
| |
| DECL_SPECIAL(COMMENT) |
| { |
| return D3D_OK; /* nothing to do */ |
| } |
| |
| |
| #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \ |
| { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h } |
| |
| struct sm1_op_info inst_table[] = |
| { |
| _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */ |
| _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), |
| _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */ |
| _OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */ |
| _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ |
| _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ |
| _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */ |
| _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ |
| _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ |
| _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */ |
| _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */ |
| _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */ |
| _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */ |
| _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ |
| _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ |
| _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */ |
| _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */ |
| _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ |
| _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ |
| _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ |
| |
| _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)), |
| _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)), |
| _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)), |
| _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)), |
| _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)), |
| |
| _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)), |
| _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)), |
| _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)), |
| _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)), |
| _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)), |
| _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)), |
| |
| _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)), |
| |
| _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)), |
| _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */ |
| _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */ |
| _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), |
| _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */ |
| |
| _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)), |
| _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)), |
| |
| /* More flow control */ |
| _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)), |
| _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)), |
| _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)), |
| _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)), |
| _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)), |
| _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)), |
| _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL), |
| _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)), |
| /* we don't write to the address register, but a normal register (copied |
| * when needed to the address register), thus we don't use ARR */ |
| _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), |
| |
| _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)), |
| _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)), |
| |
| _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)), |
| _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)), |
| _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)), |
| _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)), |
| _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), |
| _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), |
| _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), |
| _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), |
| _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), |
| _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), |
| _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), |
| _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)), |
| _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)), |
| _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), |
| _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)), |
| _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), |
| |
| _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL), |
| _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), |
| _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)), |
| _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)), |
| |
| _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)), |
| |
| /* More tex stuff */ |
| _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)), |
| _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)), |
| _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)), |
| _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)), |
| _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)), |
| _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)), |
| |
| /* Misc */ |
| _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */ |
| _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)), |
| _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)), |
| _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), |
| _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), |
| _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)), |
| _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)), |
| _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)), |
| _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP)) |
| }; |
| |
| struct sm1_op_info inst_phase = |
| _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE)); |
| |
| struct sm1_op_info inst_comment = |
| _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT)); |
| |
| static void |
| create_op_info_map(struct shader_translator *tx) |
| { |
| const unsigned version = (tx->version.major << 8) | tx->version.minor; |
| unsigned i; |
| |
| for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i) |
| tx->op_info_map[i] = -1; |
| |
| if (tx->processor == PIPE_SHADER_VERTEX) { |
| for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { |
| assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); |
| if (inst_table[i].vert_version.min <= version && |
| inst_table[i].vert_version.max >= version) |
| tx->op_info_map[inst_table[i].sio] = i; |
| } |
| } else { |
| for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { |
| assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); |
| if (inst_table[i].frag_version.min <= version && |
| inst_table[i].frag_version.max >= version) |
| tx->op_info_map[inst_table[i].sio] = i; |
| } |
| } |
| } |
| |
| static inline HRESULT |
| NineTranslateInstruction_Generic(struct shader_translator *tx) |
| { |
| struct ureg_dst dst[1]; |
| struct ureg_src src[4]; |
| unsigned i; |
| |
| for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i) |
| dst[i] = tx_dst_param(tx, &tx->insn.dst[i]); |
| for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i) |
| src[i] = tx_src_param(tx, &tx->insn.src[i]); |
| |
| ureg_insn(tx->ureg, tx->insn.info->opcode, |
| dst, tx->insn.ndst, |
| src, tx->insn.nsrc); |
| return D3D_OK; |
| } |
| |
| static inline DWORD |
| TOKEN_PEEK(struct shader_translator *tx) |
| { |
| return *(tx->parse); |
| } |
| |
| static inline DWORD |
| TOKEN_NEXT(struct shader_translator *tx) |
| { |
| return *(tx->parse)++; |
| } |
| |
| static inline void |
| TOKEN_JUMP(struct shader_translator *tx) |
| { |
| if (tx->parse_next && tx->parse != tx->parse_next) { |
| WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next); |
| tx->parse = tx->parse_next; |
| } |
| } |
| |
| static inline boolean |
| sm1_parse_eof(struct shader_translator *tx) |
| { |
| return TOKEN_PEEK(tx) == NINED3DSP_END; |
| } |
| |
| static void |
| sm1_read_version(struct shader_translator *tx) |
| { |
| const DWORD tok = TOKEN_NEXT(tx); |
| |
| tx->version.major = D3DSHADER_VERSION_MAJOR(tok); |
| tx->version.minor = D3DSHADER_VERSION_MINOR(tok); |
| |
| switch (tok >> 16) { |
| case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break; |
| case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break; |
| default: |
| DBG("Invalid shader type: %x\n", tok); |
| tx->processor = ~0; |
| break; |
| } |
| } |
| |
| /* This is just to check if we parsed the instruction properly. */ |
| static void |
| sm1_parse_get_skip(struct shader_translator *tx) |
| { |
| const DWORD tok = TOKEN_PEEK(tx); |
| |
| if (tx->version.major >= 2) { |
| tx->parse_next = tx->parse + 1 /* this */ + |
| ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT); |
| } else { |
| tx->parse_next = NULL; /* TODO: determine from param count */ |
| } |
| } |
| |
| static void |
| sm1_print_comment(const char *comment, UINT size) |
| { |
| if (!size) |
| return; |
| /* TODO */ |
| } |
| |
| static void |
| sm1_parse_comments(struct shader_translator *tx, BOOL print) |
| { |
| DWORD tok = TOKEN_PEEK(tx); |
| |
| while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT) |
| { |
| const char *comment = ""; |
| UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT; |
| tx->parse += size + 1; |
| |
| if (print) |
| sm1_print_comment(comment, size); |
| |
| tok = TOKEN_PEEK(tx); |
| } |
| } |
| |
| static void |
| sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel) |
| { |
| *reg = TOKEN_NEXT(tx); |
| |
| if (*reg & D3DSHADER_ADDRMODE_RELATIVE) |
| { |
| if (tx->version.major < 2) |
| *rel = (1 << 31) | |
| ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) | |
| ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | |
| D3DSP_NOSWIZZLE; |
| else |
| *rel = TOKEN_NEXT(tx); |
| } |
| } |
| |
| static void |
| sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok) |
| { |
| int8_t shift; |
| dst->file = |
| (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT | |
| (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2; |
| dst->type = TGSI_RETURN_TYPE_FLOAT; |
| dst->idx = tok & D3DSP_REGNUM_MASK; |
| dst->rel = NULL; |
| dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT; |
| dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT; |
| shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT; |
| dst->shift = (shift & 0x7) - (shift & 0x8); |
| } |
| |
| static void |
| sm1_parse_src_param(struct sm1_src_param *src, DWORD tok) |
| { |
| src->file = |
| ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | |
| ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2); |
| src->type = TGSI_RETURN_TYPE_FLOAT; |
| src->idx = tok & D3DSP_REGNUM_MASK; |
| src->rel = NULL; |
| src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT; |
| src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT; |
| |
| switch (src->file) { |
| case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break; |
| case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break; |
| case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break; |
| default: |
| break; |
| } |
| } |
| |
| static void |
| sm1_parse_immediate(struct shader_translator *tx, |
| struct sm1_src_param *imm) |
| { |
| imm->file = NINED3DSPR_IMMEDIATE; |
| imm->idx = INT_MIN; |
| imm->rel = NULL; |
| imm->swizzle = NINED3DSP_NOSWIZZLE; |
| imm->mod = 0; |
| switch (tx->insn.opcode) { |
| case D3DSIO_DEF: |
| imm->type = NINED3DSPTYPE_FLOAT4; |
| memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); |
| tx->parse += 4; |
| break; |
| case D3DSIO_DEFI: |
| imm->type = NINED3DSPTYPE_INT4; |
| memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); |
| tx->parse += 4; |
| break; |
| case D3DSIO_DEFB: |
| imm->type = NINED3DSPTYPE_BOOL; |
| memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD)); |
| tx->parse += 1; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| } |
| |
| static void |
| sm1_read_dst_param(struct shader_translator *tx, |
| struct sm1_dst_param *dst, |
| struct sm1_src_param *rel) |
| { |
| DWORD tok_dst, tok_rel = 0; |
| |
| sm1_parse_get_param(tx, &tok_dst, &tok_rel); |
| sm1_parse_dst_param(dst, tok_dst); |
| if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) { |
| sm1_parse_src_param(rel, tok_rel); |
| dst->rel = rel; |
| } |
| } |
| |
| static void |
| sm1_read_src_param(struct shader_translator *tx, |
| struct sm1_src_param *src, |
| struct sm1_src_param *rel) |
| { |
| DWORD tok_src, tok_rel = 0; |
| |
| sm1_parse_get_param(tx, &tok_src, &tok_rel); |
| sm1_parse_src_param(src, tok_src); |
| if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) { |
| assert(rel); |
| sm1_parse_src_param(rel, tok_rel); |
| src->rel = rel; |
| } |
| } |
| |
| static void |
| sm1_read_semantic(struct shader_translator *tx, |
| struct sm1_semantic *sem) |
| { |
| const DWORD tok_usg = TOKEN_NEXT(tx); |
| const DWORD tok_dst = TOKEN_NEXT(tx); |
| |
| sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT; |
| sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT; |
| sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT; |
| |
| sm1_parse_dst_param(&sem->reg, tok_dst); |
| } |
| |
| static void |
| sm1_parse_instruction(struct shader_translator *tx) |
| { |
| struct sm1_instruction *insn = &tx->insn; |
| HRESULT hr; |
| DWORD tok; |
| struct sm1_op_info *info = NULL; |
| unsigned i; |
| |
| sm1_parse_comments(tx, TRUE); |
| sm1_parse_get_skip(tx); |
| |
| tok = TOKEN_NEXT(tx); |
| |
| insn->opcode = tok & D3DSI_OPCODE_MASK; |
| insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT; |
| insn->coissue = !!(tok & D3DSI_COISSUE); |
| insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED); |
| |
| if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) { |
| int k = tx->op_info_map[insn->opcode]; |
| if (k >= 0) { |
| assert(k < ARRAY_SIZE(inst_table)); |
| info = &inst_table[k]; |
| } |
| } else { |
| if (insn->opcode == D3DSIO_PHASE) info = &inst_phase; |
| if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment; |
| } |
| if (!info) { |
| DBG("illegal or unhandled opcode: %08x\n", insn->opcode); |
| TOKEN_JUMP(tx); |
| return; |
| } |
| insn->info = info; |
| insn->ndst = info->ndst; |
| insn->nsrc = info->nsrc; |
| |
| assert(!insn->predicated && "TODO: predicated instructions"); |
| |
| /* check version */ |
| { |
| unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min; |
| unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max; |
| unsigned ver = (tx->version.major << 8) | tx->version.minor; |
| if (ver < min || ver > max) { |
| DBG("opcode not supported in this shader version: %x <= %x <= %x\n", |
| min, ver, max); |
| return; |
| } |
| } |
| |
| for (i = 0; i < insn->ndst; ++i) |
| sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]); |
| if (insn->predicated) |
| sm1_read_src_param(tx, &insn->pred, NULL); |
| for (i = 0; i < insn->nsrc; ++i) |
| sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]); |
| |
| /* parse here so we can dump them before processing */ |
| if (insn->opcode == D3DSIO_DEF || |
| insn->opcode == D3DSIO_DEFI || |
| insn->opcode == D3DSIO_DEFB) |
| sm1_parse_immediate(tx, &tx->insn.src[0]); |
| |
| sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth); |
| sm1_instruction_check(insn); |
| |
| if (info->handler) |
| hr = info->handler(tx); |
| else |
| hr = NineTranslateInstruction_Generic(tx); |
| tx_apply_dst0_modifiers(tx); |
| |
| if (hr != D3D_OK) |
| tx->failure = TRUE; |
| tx->num_scratch = 0; /* reset */ |
| |
| TOKEN_JUMP(tx); |
| } |
| |
| static void |
| tx_ctor(struct shader_translator *tx, struct nine_shader_info *info) |
| { |
| unsigned i; |
| |
| tx->info = info; |
| |
| tx->byte_code = info->byte_code; |
| tx->parse = info->byte_code; |
| |
| for (i = 0; i < ARRAY_SIZE(info->input_map); ++i) |
| info->input_map[i] = NINE_DECLUSAGE_NONE; |
| info->num_inputs = 0; |
| |
| info->position_t = FALSE; |
| info->point_size = FALSE; |
| |
| tx->info->const_float_slots = 0; |
| tx->info->const_int_slots = 0; |
| tx->info->const_bool_slots = 0; |
| |
| info->sampler_mask = 0x0; |
| info->rt_mask = 0x0; |
| |
| info->lconstf.data = NULL; |
| info->lconstf.ranges = NULL; |
| |
| info->bumpenvmat_needed = 0; |
| |
| for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) { |
| tx->regs.rL[i] = ureg_dst_undef(); |
| } |
| tx->regs.address = ureg_dst_undef(); |
| tx->regs.a0 = ureg_dst_undef(); |
| tx->regs.p = ureg_dst_undef(); |
| tx->regs.oDepth = ureg_dst_undef(); |
| tx->regs.vPos = ureg_src_undef(); |
| tx->regs.vFace = ureg_src_undef(); |
| for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i) |
| tx->regs.o[i] = ureg_dst_undef(); |
| for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i) |
| tx->regs.oCol[i] = ureg_dst_undef(); |
| for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i) |
| tx->regs.vC[i] = ureg_src_undef(); |
| for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i) |
| tx->regs.vT[i] = ureg_src_undef(); |
| |
| sm1_read_version(tx); |
| |
| info->version = (tx->version.major << 4) | tx->version.minor; |
| |
| tx->num_outputs = 0; |
| |
| create_op_info_map(tx); |
| } |
| |
| static void |
| tx_dtor(struct shader_translator *tx) |
| { |
| if (tx->num_inst_labels) |
| FREE(tx->inst_labels); |
| FREE(tx->lconstf); |
| FREE(tx->regs.r); |
| FREE(tx); |
| } |
| |
| /* CONST[0].xyz = width/2, -height/2, zmax-zmin |
| * CONST[1].xyz = x+width/2, y+height/2, zmin */ |
| static void |
| shader_add_vs_viewport_transform(struct shader_translator *tx) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_src c0 = NINE_CONSTANT_SRC(0); |
| struct ureg_src c1 = NINE_CONSTANT_SRC(1); |
| /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/ |
| |
| c0 = ureg_src_dimension(c0, 4); |
| c1 = ureg_src_dimension(c1, 4); |
| /* TODO: find out when we need to apply the viewport transformation or not. |
| * Likely will be XYZ vs XYZRHW in vdecl_out |
| * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0); |
| * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1); |
| */ |
| ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos)); |
| } |
| |
| static void |
| shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) |
| { |
| struct ureg_program *ureg = tx->ureg; |
| struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); |
| struct ureg_src fog_end, fog_coeff, fog_density; |
| struct ureg_src fog_vs, depth, fog_color; |
| struct ureg_dst fog_factor; |
| |
| if (!tx->info->fog_enable) { |
| ureg_MOV(ureg, oCol0, src_col); |
| return; |
| } |
| |
| if (tx->info->fog_mode != D3DFOG_NONE) { |
| depth = nine_get_position_input(tx); |
| depth = ureg_scalar(depth, TGSI_SWIZZLE_Z); |
| } |
| |
| nine_info_mark_const_f_used(tx->info, 33); |
| fog_color = NINE_CONSTANT_SRC(32); |
| fog_factor = tx_scratch_scalar(tx); |
| |
| if (tx->info->fog_mode == D3DFOG_LINEAR) { |
| fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X); |
| fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y); |
| ureg_SUB(ureg, fog_factor, fog_end, depth); |
| ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff); |
| } else if (tx->info->fog_mode == D3DFOG_EXP) { |
| fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); |
| ureg_MUL(ureg, fog_factor, depth, fog_density); |
| ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); |
| ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); |
| } else if (tx->info->fog_mode == D3DFOG_EXP2) { |
| fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); |
| ureg_MUL(ureg, fog_factor, depth, fog_density); |
| ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor)); |
| ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); |
| ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); |
| } else { |
| fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, |
| TGSI_INTERPOLATE_PERSPECTIVE), |
| TGSI_SWIZZLE_X); |
| ureg_MOV(ureg, fog_factor, fog_vs); |
| } |
| |
| ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ), |
| tx_src_scalar(fog_factor), src_col, fog_color); |
| ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); |
| } |
| |
| #define GET_CAP(n) screen->get_param( \ |
| screen, PIPE_CAP_##n) |
| #define GET_SHADER_CAP(n) screen->get_shader_param( \ |
| screen, info->type, PIPE_SHADER_CAP_##n) |
| |
| HRESULT |
| nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) |
| { |
| struct shader_translator *tx; |
| HRESULT hr = D3D_OK; |
| const unsigned processor = info->type; |
| struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen; |
| struct pipe_context *pipe = info->process_vertices ? device->pipe_sw : NineDevice9_GetPipe(device); |
| |
| user_assert(processor != ~0, D3DERR_INVALIDCALL); |
| |
| tx = CALLOC_STRUCT(shader_translator); |
| if (!tx) |
| return E_OUTOFMEMORY; |
| tx_ctor(tx, info); |
| |
| if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) { |
| hr = D3DERR_INVALIDCALL; |
| DBG("Unsupported shader version: %u.%u !\n", |
| tx->version.major, tx->version.minor); |
| goto out; |
| } |
| if (tx->processor != processor) { |
| hr = D3DERR_INVALIDCALL; |
| DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor); |
| goto out; |
| } |
| DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS", |
| tx->version.major, tx->version.minor); |
| |
| tx->ureg = ureg_create(processor); |
| if (!tx->ureg) { |
| hr = E_OUTOFMEMORY; |
| goto out; |
| } |
| |
| tx->native_integers = GET_SHADER_CAP(INTEGERS); |
| tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES); |
| tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS); |
| tx->want_texcoord = GET_CAP(TGSI_TEXCOORD); |
| tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER); |
| tx->texcoord_sn = tx->want_texcoord ? |
| TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; |
| tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL); |
| tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL); |
| |
| if (IS_VS) { |
| tx->num_constf_allowed = NINE_MAX_CONST_F; |
| } else if (tx->version.major < 2) {/* IS_PS v1 */ |
| tx->num_constf_allowed = 8; |
| } else if (tx->version.major == 2) {/* IS_PS v2 */ |
| tx->num_constf_allowed = 32; |
| } else {/* IS_PS v3 */ |
| tx->num_constf_allowed = NINE_MAX_CONST_F_PS3; |
| } |
| |
| if (tx->version.major < 2) { |
| tx->num_consti_allowed = 0; |
| tx->num_constb_allowed = 0; |
| } else { |
| tx->num_consti_allowed = NINE_MAX_CONST_I; |
| tx->num_constb_allowed = NINE_MAX_CONST_B; |
| } |
| |
| if (IS_VS && tx->version.major >= 2 && info->swvp_on) { |
| tx->num_constf_allowed = 8192; |
| tx->num_consti_allowed = 2048; |
| tx->num_constb_allowed = 2048; |
| } |
| |
| /* VS must always write position. Declare it here to make it the 1st output. |
| * (Some drivers like nv50 are buggy and rely on that.) |
| */ |
| if (IS_VS) { |
| tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); |
| } else { |
| ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT); |
| if (!tx->shift_wpos) |
| ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); |
| } |
| |
| while (!sm1_parse_eof(tx) && !tx->failure) |
| sm1_parse_instruction(tx); |
| tx->parse++; /* for byte_size */ |
| |
| if (tx->failure) { |
| /* For VS shaders, we print the warning later, |
| * we first try with swvp. */ |
| if (IS_PS) |
| ERR("Encountered buggy shader\n"); |
| ureg_destroy(tx->ureg); |
| hr = D3DERR_INVALIDCALL; |
| goto out; |
| } |
| |
| if (IS_PS && tx->version.major < 3) { |
| if (tx->version.major < 2) { |
| assert(tx->num_temp); /* there must be color output */ |
| info->rt_mask |= 0x1; |
| shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); |
| } else { |
| shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); |
| } |
| } |
| |
| if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { |
| tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0); |
| ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); |
| } |
| |
| if (info->position_t) |
| ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); |
| |
| if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) { |
| struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0); |
| ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min)); |
| ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max)); |
| info->point_size = TRUE; |
| } |
| |
| if (info->process_vertices) |
| shader_add_vs_viewport_transform(tx); |
| |
| ureg_END(tx->ureg); |
| |
| /* record local constants */ |
| if (tx->num_lconstf && tx->indirect_const_access) { |
| struct nine_range *ranges; |
| float *data; |
| int *indices; |
| unsigned i, k, n; |
| |
| hr = E_OUTOFMEMORY; |
| |
| data = MALLOC(tx->num_lconstf * 4 * sizeof(float)); |
| if (!data) |
| goto out; |
| info->lconstf.data = data; |
| |
| indices = MALLOC(tx->num_lconstf * sizeof(indices[0])); |
| if (!indices) |
| goto out; |
| |
| /* lazy sort, num_lconstf should be small */ |
| for (n = 0; n < tx->num_lconstf; ++n) { |
| for (k = 0, i = 0; i < tx->num_lconstf; ++i) { |
| if (tx->lconstf[i].idx < tx->lconstf[k].idx) |
| k = i; |
| } |
| indices[n] = tx->lconstf[k].idx; |
| memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float)); |
| tx->lconstf[k].idx = INT_MAX; |
| } |
| |
| /* count ranges */ |
| for (n = 1, i = 1; i < tx->num_lconstf; ++i) |
| if (indices[i] != indices[i - 1] + 1) |
| ++n; |
| ranges = MALLOC(n * sizeof(ranges[0])); |
| if (!ranges) { |
| FREE(indices); |
| goto out; |
| } |
| info->lconstf.ranges = ranges; |
| |
| k = 0; |
| ranges[k].bgn = indices[0]; |
| for (i = 1; i < tx->num_lconstf; ++i) { |
| if (indices[i] != indices[i - 1] + 1) { |
| ranges[k].next = &ranges[k + 1]; |
| ranges[k].end = indices[i - 1] + 1; |
| ++k; |
| ranges[k].bgn = indices[i]; |
| } |
| } |
| ranges[k].end = indices[i - 1] + 1; |
| ranges[k].next = NULL; |
| assert(n == (k + 1)); |
| |
| FREE(indices); |
| hr = D3D_OK; |
| } |
| |
| /* r500 */ |
| if (info->const_float_slots > device->max_vs_const_f && |
| (info->const_int_slots || info->const_bool_slots) && |
| (!IS_VS || !info->swvp_on)) |
| ERR("Overlapping constant slots. The shader is likely to be buggy\n"); |
| |
| |
| if (tx->indirect_const_access) /* vs only */ |
| info->const_float_slots = device->max_vs_const_f; |
| |
| if (!IS_VS || !info->swvp_on) { |
| unsigned s, slot_max; |
| unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f; |
| |
| slot_max = info->const_bool_slots > 0 ? |
| max_const_f + NINE_MAX_CONST_I |
| + DIV_ROUND_UP(info->const_bool_slots, 4) : |
| info->const_int_slots > 0 ? |
| max_const_f + info->const_int_slots : |
| info->const_float_slots; |
| |
| info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ |
| |
| for (s = 0; s < slot_max; s++) |
| ureg_DECL_constant(tx->ureg, s); |
| } else { |
| ureg_DECL_constant2D(tx->ureg, 0, 4095, 0); |
| ureg_DECL_constant2D(tx->ureg, 0, 4095, 1); |
| ureg_DECL_constant2D(tx->ureg, 0, 2047, 2); |
| ureg_DECL_constant2D(tx->ureg, 0, 511, 3); |
| } |
| |
| if (info->process_vertices) |
| ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */ |
| |
| if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) { |
| unsigned count; |
| const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count); |
| tgsi_dump(toks, 0); |
| ureg_free_tokens(toks); |
| } |
| |
| if (info->process_vertices) { |
| NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out, |
| tx->output_info, |
| tx->num_outputs, |
| &(info->so)); |
| info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so)); |
| } else |
| info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe); |
| if (!info->cso) { |
| hr = D3DERR_DRIVERINTERNALERROR; |
| FREE(info->lconstf.data); |
| FREE(info->lconstf.ranges); |
| goto out; |
| } |
| |
| info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD); |
| out: |
| tx_dtor(tx); |
| return hr; |
| } |