| /* |
| * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * on the rights to use, copy, modify, merge, publish, distribute, sub |
| * license, and/or sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * Authors: |
| * Vadim Girlin |
| */ |
| |
| #define BCP_DEBUG 0 |
| |
| #if BCP_DEBUG |
| #define BCP_DUMP(q) do { q } while (0) |
| #else |
| #define BCP_DUMP(q) |
| #endif |
| |
| #include "r600_pipe.h" |
| #include "r600_shader.h" |
| #include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1 |
| |
| #include <stack> |
| |
| #include "sb_bc.h" |
| #include "sb_shader.h" |
| #include "sb_pass.h" |
| #include "util/macros.h" |
| |
| namespace r600_sb { |
| |
| int bc_parser::decode() { |
| |
| dw = bc->bytecode; |
| bc_ndw = bc->ndw; |
| max_cf = 0; |
| |
| dec = new bc_decoder(ctx, dw, bc_ndw); |
| |
| shader_target t = TARGET_UNKNOWN; |
| |
| if (pshader) { |
| switch (bc->type) { |
| case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break; |
| case PIPE_SHADER_VERTEX: |
| t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS); |
| break; |
| case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break; |
| case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break; |
| case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break; |
| case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break; |
| default: assert(!"unknown shader target"); return -1; break; |
| } |
| } else { |
| if (bc->type == PIPE_SHADER_COMPUTE) |
| t = TARGET_COMPUTE; |
| else |
| t = TARGET_FETCH; |
| } |
| |
| sh = new shader(ctx, t, bc->debug_id); |
| sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); |
| |
| int r = decode_shader(); |
| |
| delete dec; |
| |
| sh->ngpr = bc->ngpr; |
| sh->nstack = bc->nstack; |
| |
| return r; |
| } |
| |
| int bc_parser::decode_shader() { |
| int r = 0; |
| unsigned i = 0; |
| bool eop = false; |
| |
| sh->init(); |
| |
| do { |
| eop = false; |
| if ((r = decode_cf(i, eop))) |
| return r; |
| |
| } while (!eop || (i >> 1) < max_cf); |
| |
| return 0; |
| } |
| |
| int bc_parser::prepare() { |
| int r = 0; |
| if ((r = parse_decls())) |
| return r; |
| if ((r = prepare_ir())) |
| return r; |
| return 0; |
| } |
| |
| int bc_parser::parse_decls() { |
| |
| if (!pshader) { |
| if (gpr_reladdr) |
| sh->add_gpr_array(0, bc->ngpr, 0x0F); |
| |
| // compute shaders have some values preloaded in R0, R1 |
| sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */); |
| sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */); |
| return 0; |
| } |
| |
| if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) { |
| |
| assert(pshader->num_arrays); |
| |
| if (pshader->num_arrays) { |
| for (unsigned i = 0; i < pshader->num_arrays; ++i) { |
| r600_shader_array &a = pshader->arrays[i]; |
| sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); |
| } |
| } else { |
| sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); |
| } |
| } |
| |
| // GS inputs can add indirect addressing |
| if (sh->target == TARGET_GS) { |
| if (pshader->num_arrays) { |
| for (unsigned i = 0; i < pshader->num_arrays; ++i) { |
| r600_shader_array &a = pshader->arrays[i]; |
| sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); |
| } |
| } |
| } |
| |
| if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS || sh->target == TARGET_LS) |
| sh->add_input(0, 1, 0x0F); |
| else if (sh->target == TARGET_GS) { |
| sh->add_input(0, 1, 0x0F); |
| sh->add_input(1, 1, 0x0F); |
| } else if (sh->target == TARGET_COMPUTE) { |
| sh->add_input(0, 1, 0x0F); |
| sh->add_input(1, 1, 0x0F); |
| } |
| |
| bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN |
| && sh->target == TARGET_PS; |
| |
| bool ij_interpolators[6]; |
| memset(ij_interpolators, 0, sizeof(ij_interpolators)); |
| |
| for (unsigned i = 0; i < pshader->ninput; ++i) { |
| r600_shader_io & in = pshader->input[i]; |
| bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); |
| sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); |
| if (ps_interp && in.spi_sid) { |
| int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); |
| if (k >= 0) |
| ij_interpolators[k] |= true; |
| } |
| } |
| |
| if (ps_interp) { |
| /* add the egcm ij interpolators to live inputs */ |
| unsigned num_ij = 0; |
| for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) { |
| num_ij += ij_interpolators[i]; |
| } |
| |
| unsigned mask = (1 << (2 * num_ij)) - 1; |
| unsigned gpr = 0; |
| |
| while (mask) { |
| sh->add_input(gpr, true, mask & 0x0F); |
| ++gpr; |
| mask >>= 4; |
| } |
| } |
| |
| return 0; |
| } |
| |
| int bc_parser::decode_cf(unsigned &i, bool &eop) { |
| |
| int r; |
| |
| cf_node *cf = sh->create_cf(); |
| sh->root->push_back(cf); |
| |
| unsigned id = i >> 1; |
| |
| cf->bc.id = id; |
| |
| if (cf_map.size() < id + 1) |
| cf_map.resize(id + 1); |
| |
| cf_map[id] = cf; |
| |
| if ((r = dec->decode_cf(i, cf->bc))) |
| return r; |
| |
| cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; |
| |
| if (flags & CF_ALU) { |
| if ((r = decode_alu_clause(cf))) |
| return r; |
| } else if (flags & CF_FETCH) { |
| if ((r = decode_fetch_clause(cf))) |
| return r; |
| } else if (flags & CF_EXP) { |
| if (cf->bc.rw_rel) |
| gpr_reladdr = true; |
| assert(!cf->bc.rw_rel); |
| } else if (flags & CF_MEM) { |
| if (cf->bc.rw_rel) |
| gpr_reladdr = true; |
| assert(!cf->bc.rw_rel); |
| } else if (flags & CF_BRANCH) { |
| if (cf->bc.addr > max_cf) |
| max_cf = cf->bc.addr; |
| } |
| |
| eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END || |
| cf->bc.op == CF_OP_RET; |
| return 0; |
| } |
| |
| int bc_parser::decode_alu_clause(cf_node* cf) { |
| unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; |
| |
| cf->subtype = NST_ALU_CLAUSE; |
| |
| cgroup = 0; |
| memset(slots[0], 0, 5*sizeof(slots[0][0])); |
| |
| unsigned ng = 0; |
| |
| do { |
| decode_alu_group(cf, i, gcnt); |
| assert(gcnt <= cnt); |
| cnt -= gcnt; |
| ng++; |
| } while (cnt); |
| |
| return 0; |
| } |
| |
| int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { |
| int r; |
| alu_node *n; |
| alu_group_node *g = sh->create_alu_group(); |
| |
| cgroup = !cgroup; |
| memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); |
| gcnt = 0; |
| |
| unsigned literal_mask = 0; |
| |
| do { |
| n = sh->create_alu(); |
| g->push_back(n); |
| |
| if ((r = dec->decode_alu(i, n->bc))) |
| return r; |
| |
| if (!sh->assign_slot(n, slots[cgroup])) { |
| assert(!"alu slot assignment failed"); |
| return -1; |
| } |
| |
| gcnt++; |
| |
| } while (gcnt <= 5 && !n->bc.last); |
| |
| assert(n->bc.last); |
| |
| for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { |
| n = static_cast<alu_node*>(*I); |
| |
| if (n->bc.dst_rel) |
| gpr_reladdr = true; |
| |
| for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { |
| bc_alu_src &src = n->bc.src[k]; |
| if (src.rel) |
| gpr_reladdr = true; |
| if (src.sel == ALU_SRC_LITERAL) { |
| literal_mask |= (1 << src.chan); |
| src.value.u = dw[i + src.chan]; |
| } |
| } |
| } |
| |
| unsigned literal_ndw = 0; |
| while (literal_mask) { |
| g->literals.push_back(dw[i + literal_ndw]); |
| literal_ndw += 1; |
| literal_mask >>= 1; |
| } |
| |
| literal_ndw = (literal_ndw + 1) & ~1u; |
| |
| i += literal_ndw; |
| gcnt += literal_ndw >> 1; |
| |
| cf->push_back(g); |
| return 0; |
| } |
| |
| int bc_parser::prepare_alu_clause(cf_node* cf) { |
| |
| // loop over alu groups |
| for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { |
| assert(I->subtype == NST_ALU_GROUP); |
| alu_group_node *g = static_cast<alu_group_node*>(*I); |
| prepare_alu_group(cf, g); |
| } |
| |
| return 0; |
| } |
| |
| void bc_parser::save_set_cf_index(value *val, unsigned idx) |
| { |
| assert(idx <= 1); |
| assert(val); |
| cf_index_value[idx] = val; |
| } |
| value *bc_parser::get_cf_index_value(unsigned idx) |
| { |
| assert(idx <= 1); |
| assert(cf_index_value[idx]); |
| return cf_index_value[idx]; |
| } |
| void bc_parser::save_mova(alu_node *mova) |
| { |
| assert(mova); |
| this->mova = mova; |
| } |
| alu_node *bc_parser::get_mova() |
| { |
| assert(mova); |
| return mova; |
| } |
| |
| int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { |
| |
| alu_node *n; |
| |
| cgroup = !cgroup; |
| memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); |
| |
| for (node_iterator I = g->begin(), E = g->end(); |
| I != E; ++I) { |
| n = static_cast<alu_node*>(*I); |
| bool ubo_indexing[2] = {}; |
| |
| if (!sh->assign_slot(n, slots[cgroup])) { |
| assert(!"alu slot assignment failed"); |
| return -1; |
| } |
| |
| unsigned src_count = n->bc.op_ptr->src_count; |
| |
| if (ctx.alu_slots(n->bc.op) & AF_4SLOT) |
| n->flags |= NF_ALU_4SLOT; |
| |
| n->src.resize(src_count); |
| |
| unsigned flags = n->bc.op_ptr->flags; |
| |
| if (flags & AF_LDS) { |
| bool need_rw = false, need_oqa = false, need_oqb = false; |
| int ndst = 0, ncount = 0; |
| |
| /* all non-read operations have side effects */ |
| if (n->bc.op != LDS_OP2_LDS_READ2_RET && |
| n->bc.op != LDS_OP1_LDS_READ_REL_RET && |
| n->bc.op != LDS_OP1_LDS_READ_RET) { |
| n->flags |= NF_DONT_KILL; |
| ndst++; |
| need_rw = true; |
| } |
| |
| if (n->bc.op >= LDS_OP2_LDS_ADD_RET && n->bc.op <= LDS_OP1_LDS_USHORT_READ_RET) { |
| need_oqa = true; |
| ndst++; |
| } |
| |
| if (n->bc.op == LDS_OP2_LDS_READ2_RET || n->bc.op == LDS_OP1_LDS_READ_REL_RET) { |
| need_oqb = true; |
| ndst++; |
| } |
| |
| n->dst.resize(ndst); |
| if (need_oqa) |
| n->dst[ncount++] = sh->get_special_value(SV_LDS_OQA); |
| if (need_oqb) |
| n->dst[ncount++] = sh->get_special_value(SV_LDS_OQB); |
| if (need_rw) |
| n->dst[ncount++] = sh->get_special_value(SV_LDS_RW); |
| |
| n->flags |= NF_DONT_MOVE | NF_DONT_HOIST; |
| |
| } else if (flags & AF_PRED) { |
| n->dst.resize(3); |
| if (n->bc.update_pred) |
| n->dst[1] = sh->get_special_value(SV_ALU_PRED); |
| if (n->bc.update_exec_mask) |
| n->dst[2] = sh->get_special_value(SV_EXEC_MASK); |
| |
| n->flags |= NF_DONT_HOIST; |
| |
| } else if (flags & AF_KILL) { |
| |
| n->dst.resize(2); |
| n->dst[1] = sh->get_special_value(SV_VALID_MASK); |
| sh->set_uses_kill(); |
| |
| n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | |
| NF_DONT_KILL | NF_SCHEDULE_EARLY; |
| |
| } else { |
| n->dst.resize(1); |
| } |
| |
| if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) { |
| // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX |
| // DCE will kill this op |
| save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1); |
| } else if (flags & AF_MOVA) { |
| |
| n->dst[0] = sh->get_special_value(SV_AR_INDEX); |
| save_mova(n); |
| |
| n->flags |= NF_DONT_HOIST; |
| |
| } else if ((n->bc.op_ptr->src_count == 3 || n->bc.write_mask) && !(flags & AF_LDS)) { |
| assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); |
| |
| value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, |
| n->bc.dst_rel); |
| |
| n->dst[0] = v; |
| } |
| |
| if (n->bc.pred_sel) { |
| sh->has_alu_predication = true; |
| n->pred = sh->get_special_value(SV_ALU_PRED); |
| } |
| |
| for (unsigned s = 0; s < src_count; ++s) { |
| bc_alu_src &src = n->bc.src[s]; |
| |
| if (src.sel == ALU_SRC_LITERAL) { |
| n->src[s] = sh->get_const_value(src.value); |
| } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { |
| unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? |
| SLOT_TRANS : src.chan; |
| |
| // XXX shouldn't happen but llvm backend uses PS on cayman |
| if (prev_slot == SLOT_TRANS && ctx.is_cayman()) |
| prev_slot = SLOT_X; |
| |
| alu_node *prev_alu = slots[pgroup][prev_slot]; |
| |
| assert(prev_alu); |
| |
| if (!prev_alu->dst[0]) { |
| value * t = sh->create_temp_value(); |
| prev_alu->dst[0] = t; |
| } |
| |
| value *d = prev_alu->dst[0]; |
| |
| if (d->is_rel()) { |
| d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr, |
| prev_alu->bc.dst_chan, |
| prev_alu->bc.dst_rel); |
| } |
| |
| n->src[s] = d; |
| } else if (ctx.is_kcache_sel(src.sel)) { |
| unsigned sel = src.sel, kc_addr; |
| unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1); |
| |
| bc_kcache &kc = cf->bc.kc[kc_set]; |
| kc_addr = (kc.addr << 4) + (sel & 0x1F); |
| n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode); |
| |
| if (kc.index_mode != KC_INDEX_NONE) { |
| assert(kc.index_mode != KC_LOCK_LOOP); |
| ubo_indexing[kc.index_mode - KC_INDEX_0] = true; |
| } |
| } else if (src.sel < MAX_GPR) { |
| value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); |
| |
| n->src[s] = v; |
| |
| } else if (src.sel >= ALU_SRC_PARAM_OFFSET) { |
| // using slot for value channel because in fact the slot |
| // determines the channel that is loaded by INTERP_LOAD_P0 |
| // (and maybe some others). |
| // otherwise GVN will consider INTERP_LOAD_P0s with the same |
| // param index as equal instructions and leave only one of them |
| n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, |
| n->bc.slot)); |
| } else if (ctx.is_lds_oq(src.sel)) { |
| switch (src.sel) { |
| case ALU_SRC_LDS_OQ_A: |
| case ALU_SRC_LDS_OQ_B: |
| assert(!"Unsupported LDS queue access in SB"); |
| break; |
| case ALU_SRC_LDS_OQ_A_POP: |
| n->src[s] = sh->get_special_value(SV_LDS_OQA); |
| break; |
| case ALU_SRC_LDS_OQ_B_POP: |
| n->src[s] = sh->get_special_value(SV_LDS_OQB); |
| break; |
| } |
| n->flags |= NF_DONT_HOIST | NF_DONT_MOVE; |
| |
| } else { |
| switch (src.sel) { |
| case ALU_SRC_0: |
| n->src[s] = sh->get_const_value(0); |
| break; |
| case ALU_SRC_0_5: |
| n->src[s] = sh->get_const_value(0.5f); |
| break; |
| case ALU_SRC_1: |
| n->src[s] = sh->get_const_value(1.0f); |
| break; |
| case ALU_SRC_1_INT: |
| n->src[s] = sh->get_const_value(1); |
| break; |
| case ALU_SRC_M_1_INT: |
| n->src[s] = sh->get_const_value(-1); |
| break; |
| default: |
| n->src[s] = sh->get_special_ro_value(src.sel); |
| break; |
| } |
| } |
| } |
| |
| // add UBO index values if any as dependencies |
| if (ubo_indexing[0]) { |
| n->src.push_back(get_cf_index_value(0)); |
| } |
| if (ubo_indexing[1]) { |
| n->src.push_back(get_cf_index_value(1)); |
| } |
| |
| if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && |
| ctx.is_cayman()) |
| // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX |
| save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); |
| } |
| |
| // pack multislot instructions into alu_packed_node |
| |
| alu_packed_node *p = NULL; |
| for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) { |
| N = I + 1; |
| alu_node *a = static_cast<alu_node*>(*I); |
| unsigned sflags = a->bc.slot_flags; |
| |
| if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) { |
| if (!p) |
| p = sh->create_alu_packed(); |
| |
| a->remove(); |
| p->push_back(a); |
| } |
| } |
| |
| if (p) { |
| g->push_front(p); |
| |
| if (p->count() == 3 && ctx.is_cayman()) { |
| // cayman's scalar instruction that can use 3 or 4 slots |
| |
| // FIXME for simplicity we'll always add 4th slot, |
| // but probably we might want to always remove 4th slot and make |
| // sure that regalloc won't choose 'w' component for dst |
| |
| alu_node *f = static_cast<alu_node*>(p->first); |
| alu_node *a = sh->create_alu(); |
| a->src = f->src; |
| a->dst.resize(f->dst.size()); |
| a->bc = f->bc; |
| a->bc.slot = SLOT_W; |
| p->push_back(a); |
| } |
| } |
| |
| return 0; |
| } |
| |
| int bc_parser::decode_fetch_clause(cf_node* cf) { |
| int r; |
| unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; |
| |
| if (cf->bc.op_ptr->flags && FF_GDS) |
| cf->subtype = NST_GDS_CLAUSE; |
| else |
| cf->subtype = NST_TEX_CLAUSE; |
| |
| while (cnt--) { |
| fetch_node *n = sh->create_fetch(); |
| cf->push_back(n); |
| if ((r = dec->decode_fetch(i, n->bc))) |
| return r; |
| if (n->bc.src_rel || n->bc.dst_rel) |
| gpr_reladdr = true; |
| |
| } |
| return 0; |
| } |
| |
| int bc_parser::prepare_fetch_clause(cf_node *cf) { |
| |
| vvec grad_v, grad_h, texture_offsets; |
| |
| for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { |
| |
| fetch_node *n = static_cast<fetch_node*>(*I); |
| assert(n->is_valid()); |
| |
| unsigned flags = n->bc.op_ptr->flags; |
| |
| unsigned vtx = flags & FF_VTX; |
| unsigned gds = flags & FF_GDS; |
| unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4; |
| |
| n->dst.resize(4); |
| |
| if (gds) { |
| n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL; |
| } |
| if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { |
| sh->uses_gradients = true; |
| } |
| |
| if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) { |
| |
| vvec *grad = NULL; |
| |
| switch (n->bc.op) { |
| case FETCH_OP_SET_GRADIENTS_V: |
| grad = &grad_v; |
| break; |
| case FETCH_OP_SET_GRADIENTS_H: |
| grad = &grad_h; |
| break; |
| case FETCH_OP_SET_TEXTURE_OFFSETS: |
| grad = &texture_offsets; |
| break; |
| default: |
| assert(!"unexpected SET_GRAD instruction"); |
| return -1; |
| } |
| |
| if (grad->empty()) |
| grad->resize(4); |
| |
| for(unsigned s = 0; s < 4; ++s) { |
| unsigned sw = n->bc.src_sel[s]; |
| if (sw <= SEL_W) |
| (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr, |
| sw, false); |
| else if (sw == SEL_0) |
| (*grad)[s] = sh->get_const_value(0.0f); |
| else if (sw == SEL_1) |
| (*grad)[s] = sh->get_const_value(1.0f); |
| } |
| } else { |
| // Fold source values for instructions with hidden target values in to the instructions |
| // using them. The set instructions are later re-emitted by bc_finalizer |
| if (flags & FF_USEGRAD) { |
| n->src.resize(12); |
| std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4); |
| std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8); |
| } else if (flags & FF_USE_TEXTURE_OFFSETS) { |
| n->src.resize(8); |
| std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4); |
| } else { |
| n->src.resize(4); |
| } |
| |
| for(int s = 0; s < 4; ++s) { |
| if (n->bc.dst_sel[s] != SEL_MASK) |
| n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false); |
| // NOTE: it doesn't matter here which components of the result we |
| // are using, but original n->bc.dst_sel should be taken into |
| // account when building the bytecode |
| } |
| for(unsigned s = 0; s < num_src; ++s) { |
| if (n->bc.src_sel[s] <= SEL_W) |
| n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr, |
| n->bc.src_sel[s], false); |
| } |
| |
| // Scheduler will emit the appropriate instructions to set CF_IDX0/1 |
| if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) { |
| n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1)); |
| } |
| if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) { |
| n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1)); |
| } |
| } |
| } |
| |
| return 0; |
| } |
| |
| int bc_parser::prepare_ir() { |
| |
| for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) { |
| cf_node *c = *I; |
| |
| if (!c) |
| continue; |
| |
| unsigned flags = c->bc.op_ptr->flags; |
| |
| if (flags & CF_ALU) { |
| prepare_alu_clause(c); |
| } else if (flags & CF_FETCH) { |
| prepare_fetch_clause(c); |
| } else if (c->bc.op == CF_OP_CALL_FS) { |
| sh->init_call_fs(c); |
| c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; |
| } else if (flags & CF_LOOP_START) { |
| prepare_loop(c); |
| } else if (c->bc.op == CF_OP_JUMP) { |
| prepare_if(c); |
| } else if (c->bc.op == CF_OP_LOOP_END) { |
| loop_stack.pop(); |
| } else if (c->bc.op == CF_OP_LOOP_CONTINUE) { |
| assert(!loop_stack.empty()); |
| repeat_node *rep = sh->create_repeat(loop_stack.top()); |
| if (c->parent->first != c) |
| rep->move(c->parent->first, c); |
| c->replace_with(rep); |
| sh->simplify_dep_rep(rep); |
| } else if (c->bc.op == CF_OP_LOOP_BREAK) { |
| assert(!loop_stack.empty()); |
| depart_node *dep = sh->create_depart(loop_stack.top()); |
| if (c->parent->first != c) |
| dep->move(c->parent->first, c); |
| c->replace_with(dep); |
| sh->simplify_dep_rep(dep); |
| } else if (flags & CF_EXP) { |
| |
| // unroll burst exports |
| |
| assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE); |
| |
| c->bc.set_op(CF_OP_EXPORT); |
| |
| unsigned burst_count = c->bc.burst_count; |
| unsigned eop = c->bc.end_of_program; |
| |
| c->bc.end_of_program = 0; |
| c->bc.burst_count = 0; |
| |
| do { |
| c->src.resize(4); |
| |
| for(int s = 0; s < 4; ++s) { |
| switch (c->bc.sel[s]) { |
| case SEL_0: |
| c->src[s] = sh->get_const_value(0.0f); |
| break; |
| case SEL_1: |
| c->src[s] = sh->get_const_value(1.0f); |
| break; |
| case SEL_MASK: |
| break; |
| default: |
| if (c->bc.sel[s] <= SEL_W) |
| c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, |
| c->bc.sel[s], false); |
| else |
| assert(!"invalid src_sel for export"); |
| } |
| } |
| |
| if (!burst_count--) |
| break; |
| |
| cf_node *cf_next = sh->create_cf(); |
| cf_next->bc = c->bc; |
| ++cf_next->bc.rw_gpr; |
| ++cf_next->bc.array_base; |
| |
| c->insert_after(cf_next); |
| c = cf_next; |
| |
| } while (1); |
| |
| c->bc.end_of_program = eop; |
| } else if (flags & CF_MEM) { |
| |
| unsigned burst_count = c->bc.burst_count; |
| unsigned eop = c->bc.end_of_program; |
| |
| c->bc.end_of_program = 0; |
| c->bc.burst_count = 0; |
| |
| do { |
| |
| c->src.resize(4); |
| |
| for(int s = 0; s < 4; ++s) { |
| if (c->bc.comp_mask & (1 << s)) |
| c->src[s] = |
| sh->get_gpr_value(true, c->bc.rw_gpr, s, false); |
| } |
| |
| if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write |
| c->src.resize(8); |
| for(int s = 0; s < 3; ++s) { |
| c->src[4 + s] = |
| sh->get_gpr_value(true, c->bc.index_gpr, s, false); |
| } |
| |
| // FIXME probably we can relax it a bit |
| c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; |
| } |
| |
| if (flags & CF_EMIT) { |
| // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX |
| c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); |
| c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); |
| if (sh->target == TARGET_ES) { |
| // For ES shaders this is an export |
| c->flags |= NF_DONT_KILL; |
| } |
| } |
| |
| if (!burst_count--) |
| break; |
| |
| cf_node *cf_next = sh->create_cf(); |
| cf_next->bc = c->bc; |
| ++cf_next->bc.rw_gpr; |
| |
| // FIXME is it correct? |
| cf_next->bc.array_base += cf_next->bc.elem_size + 1; |
| |
| c->insert_after(cf_next); |
| c = cf_next; |
| } while (1); |
| |
| c->bc.end_of_program = eop; |
| |
| } else if (flags & CF_EMIT) { |
| /* quick peephole */ |
| cf_node *prev = static_cast<cf_node *>(c->prev); |
| if (c->bc.op == CF_OP_CUT_VERTEX && |
| prev && prev->is_valid() && |
| prev->bc.op == CF_OP_EMIT_VERTEX && |
| c->bc.count == prev->bc.count) { |
| prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX); |
| prev->bc.end_of_program = c->bc.end_of_program; |
| c->remove(); |
| } |
| else { |
| c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; |
| |
| c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); |
| c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); |
| } |
| } |
| } |
| |
| assert(loop_stack.empty()); |
| return 0; |
| } |
| |
| int bc_parser::prepare_loop(cf_node* c) { |
| assert(c->bc.addr-1 < cf_map.size()); |
| |
| cf_node *end = cf_map[c->bc.addr - 1]; |
| assert(end->bc.op == CF_OP_LOOP_END); |
| assert(c->parent == end->parent); |
| |
| region_node *reg = sh->create_region(); |
| repeat_node *rep = sh->create_repeat(reg); |
| |
| reg->push_back(rep); |
| c->insert_before(reg); |
| rep->move(c, end->next); |
| |
| reg->src_loop = true; |
| |
| loop_stack.push(reg); |
| return 0; |
| } |
| |
| int bc_parser::prepare_if(cf_node* c) { |
| assert(c->bc.addr-1 < cf_map.size()); |
| cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; |
| |
| if (!end) |
| return 0; // not quite sure how this happens, malformed input? |
| |
| BCP_DUMP( |
| sblog << "parsing JUMP @" << c->bc.id; |
| sblog << "\n"; |
| ); |
| |
| if (end->bc.op == CF_OP_ELSE) { |
| BCP_DUMP( |
| sblog << " found ELSE : "; |
| dump::dump_op(end); |
| sblog << "\n"; |
| ); |
| |
| c_else = end; |
| end = cf_map[c_else->bc.addr]; |
| } else { |
| BCP_DUMP( |
| sblog << " no else\n"; |
| ); |
| |
| c_else = end; |
| } |
| |
| if (c_else->parent != c->parent) |
| c_else = NULL; |
| |
| if (end && end->parent != c->parent) |
| end = NULL; |
| |
| region_node *reg = sh->create_region(); |
| |
| depart_node *dep2 = sh->create_depart(reg); |
| depart_node *dep = sh->create_depart(reg); |
| if_node *n_if = sh->create_if(); |
| |
| c->insert_before(reg); |
| |
| if (c_else != end) |
| dep->move(c_else, end); |
| dep2->move(c, end); |
| |
| reg->push_back(dep); |
| dep->push_front(n_if); |
| n_if->push_back(dep2); |
| |
| n_if->cond = sh->get_special_value(SV_EXEC_MASK); |
| |
| return 0; |
| } |
| |
| |
| } // namespace r600_sb |