| /* |
| * Copyright (c) 2017 Lima Project |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sub license, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| * |
| */ |
| |
| #include <string.h> |
| |
| #include "util/hash_table.h" |
| #include "util/ralloc.h" |
| #include "util/bitscan.h" |
| #include "compiler/nir/nir.h" |
| #include "pipe/p_state.h" |
| |
| |
| #include "ppir.h" |
| |
| static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa) |
| { |
| ppir_node *node = ppir_node_create(block, op, ssa->index, 0); |
| if (!node) |
| return NULL; |
| |
| ppir_dest *dest = ppir_node_get_dest(node); |
| dest->type = ppir_target_ssa; |
| dest->ssa.num_components = ssa->num_components; |
| dest->write_mask = u_bit_consecutive(0, ssa->num_components); |
| |
| if (node->type == ppir_node_type_load || |
| node->type == ppir_node_type_store) |
| dest->ssa.is_head = true; |
| |
| return node; |
| } |
| |
| static void *ppir_node_create_reg(ppir_block *block, ppir_op op, |
| nir_register *reg, unsigned mask) |
| { |
| ppir_node *node = ppir_node_create(block, op, reg->index, mask); |
| if (!node) |
| return NULL; |
| |
| ppir_dest *dest = ppir_node_get_dest(node); |
| |
| list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) { |
| if (r->index == reg->index) { |
| dest->reg = r; |
| break; |
| } |
| } |
| |
| dest->type = ppir_target_register; |
| dest->write_mask = mask; |
| |
| if (node->type == ppir_node_type_load || |
| node->type == ppir_node_type_store) |
| dest->reg->is_head = true; |
| |
| return node; |
| } |
| |
| static void *ppir_node_create_dest(ppir_block *block, ppir_op op, |
| nir_dest *dest, unsigned mask) |
| { |
| unsigned index = -1; |
| |
| if (dest) { |
| if (dest->is_ssa) |
| return ppir_node_create_ssa(block, op, &dest->ssa); |
| else |
| return ppir_node_create_reg(block, op, dest->reg.reg, mask); |
| } |
| |
| return ppir_node_create(block, op, index, 0); |
| } |
| |
| static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, |
| ppir_src *ps, nir_src *ns, unsigned mask) |
| { |
| ppir_node *child = NULL; |
| |
| if (ns->is_ssa) { |
| child = comp->var_nodes[ns->ssa->index]; |
| if (child->op != ppir_op_undef) |
| ppir_node_add_dep(node, child, ppir_dep_src); |
| } |
| else { |
| nir_register *reg = ns->reg.reg; |
| while (mask) { |
| int swizzle = ps->swizzle[u_bit_scan(&mask)]; |
| child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle]; |
| /* Reg is read before it was written, create a dummy node for it */ |
| if (!child) { |
| child = ppir_node_create_reg(node->block, ppir_op_dummy, reg, |
| u_bit_consecutive(0, 4)); |
| comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child; |
| } |
| /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */ |
| if (child && node != child && child->op != ppir_op_dummy) |
| ppir_node_add_dep(node, child, ppir_dep_src); |
| } |
| } |
| |
| ppir_node_target_assign(ps, child); |
| } |
| |
| static int nir_to_ppir_opcodes[nir_num_opcodes] = { |
| /* not supported */ |
| [0 ... nir_last_opcode] = -1, |
| |
| [nir_op_mov] = ppir_op_mov, |
| [nir_op_fmul] = ppir_op_mul, |
| [nir_op_fabs] = ppir_op_abs, |
| [nir_op_fneg] = ppir_op_neg, |
| [nir_op_fadd] = ppir_op_add, |
| [nir_op_fsum3] = ppir_op_sum3, |
| [nir_op_fsum4] = ppir_op_sum4, |
| [nir_op_frsq] = ppir_op_rsqrt, |
| [nir_op_flog2] = ppir_op_log2, |
| [nir_op_fexp2] = ppir_op_exp2, |
| [nir_op_fsqrt] = ppir_op_sqrt, |
| [nir_op_fsin] = ppir_op_sin, |
| [nir_op_fcos] = ppir_op_cos, |
| [nir_op_fmax] = ppir_op_max, |
| [nir_op_fmin] = ppir_op_min, |
| [nir_op_frcp] = ppir_op_rcp, |
| [nir_op_ffloor] = ppir_op_floor, |
| [nir_op_fceil] = ppir_op_ceil, |
| [nir_op_ffract] = ppir_op_fract, |
| [nir_op_sge] = ppir_op_ge, |
| [nir_op_slt] = ppir_op_lt, |
| [nir_op_seq] = ppir_op_eq, |
| [nir_op_sne] = ppir_op_ne, |
| [nir_op_fcsel] = ppir_op_select, |
| [nir_op_inot] = ppir_op_not, |
| [nir_op_ftrunc] = ppir_op_trunc, |
| [nir_op_fsat] = ppir_op_sat, |
| [nir_op_fddx] = ppir_op_ddx, |
| [nir_op_fddy] = ppir_op_ddy, |
| }; |
| |
| static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) |
| { |
| nir_alu_instr *instr = nir_instr_as_alu(ni); |
| int op = nir_to_ppir_opcodes[instr->op]; |
| |
| if (op < 0) { |
| ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); |
| return false; |
| } |
| |
| ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest, |
| instr->dest.write_mask); |
| if (!node) |
| return false; |
| |
| ppir_dest *pd = &node->dest; |
| nir_alu_dest *nd = &instr->dest; |
| if (nd->saturate) |
| pd->modifier = ppir_outmod_clamp_fraction; |
| |
| unsigned src_mask; |
| switch (op) { |
| case ppir_op_sum3: |
| src_mask = 0b0111; |
| break; |
| case ppir_op_sum4: |
| src_mask = 0b1111; |
| break; |
| default: |
| src_mask = pd->write_mask; |
| break; |
| } |
| |
| unsigned num_child = nir_op_infos[instr->op].num_inputs; |
| node->num_src = num_child; |
| |
| for (int i = 0; i < num_child; i++) { |
| nir_alu_src *ns = instr->src + i; |
| ppir_src *ps = node->src + i; |
| memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle)); |
| ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask); |
| |
| ps->absolute = ns->abs; |
| ps->negate = ns->negate; |
| } |
| |
| list_addtail(&node->node.list, &block->node_list); |
| return true; |
| } |
| |
| static ppir_block *ppir_block_create(ppir_compiler *comp); |
| |
| static bool ppir_emit_discard_block(ppir_compiler *comp) |
| { |
| ppir_block *block = ppir_block_create(comp); |
| ppir_discard_node *discard; |
| if (!block) |
| return false; |
| |
| comp->discard_block = block; |
| block->comp = comp; |
| |
| discard = ppir_node_create(block, ppir_op_discard, -1, 0); |
| if (discard) |
| list_addtail(&discard->node.list, &block->node_list); |
| else |
| return false; |
| |
| return true; |
| } |
| |
| static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni) |
| { |
| nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); |
| ppir_node *node; |
| ppir_compiler *comp = block->comp; |
| ppir_branch_node *branch; |
| |
| if (!comp->discard_block && !ppir_emit_discard_block(comp)) |
| return NULL; |
| |
| node = ppir_node_create(block, ppir_op_branch, -1, 0); |
| if (!node) |
| return NULL; |
| branch = ppir_node_to_branch(node); |
| |
| /* second src and condition will be updated during lowering */ |
| ppir_node_add_src(block->comp, node, &branch->src[0], |
| &instr->src[0], u_bit_consecutive(0, instr->num_components)); |
| branch->num_src = 1; |
| branch->target = comp->discard_block; |
| |
| return node; |
| } |
| |
| static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni) |
| { |
| ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0); |
| |
| return node; |
| } |
| |
| static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) |
| { |
| ppir_node *node; |
| nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); |
| unsigned mask = 0; |
| ppir_load_node *lnode; |
| ppir_alu_node *alu_node; |
| |
| switch (instr->intrinsic) { |
| case nir_intrinsic_load_input: |
| if (!instr->dest.is_ssa) |
| mask = u_bit_consecutive(0, instr->num_components); |
| |
| lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask); |
| if (!lnode) |
| return false; |
| |
| lnode->num_components = instr->num_components; |
| lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr); |
| if (nir_src_is_const(instr->src[0])) |
| lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4); |
| else { |
| lnode->num_src = 1; |
| ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1); |
| } |
| list_addtail(&lnode->node.list, &block->node_list); |
| return true; |
| |
| case nir_intrinsic_load_frag_coord: |
| case nir_intrinsic_load_point_coord: |
| case nir_intrinsic_load_front_face: |
| if (!instr->dest.is_ssa) |
| mask = u_bit_consecutive(0, instr->num_components); |
| |
| ppir_op op; |
| switch (instr->intrinsic) { |
| case nir_intrinsic_load_frag_coord: |
| op = ppir_op_load_fragcoord; |
| break; |
| case nir_intrinsic_load_point_coord: |
| op = ppir_op_load_pointcoord; |
| break; |
| case nir_intrinsic_load_front_face: |
| op = ppir_op_load_frontface; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| |
| lnode = ppir_node_create_dest(block, op, &instr->dest, mask); |
| if (!lnode) |
| return false; |
| |
| lnode->num_components = instr->num_components; |
| list_addtail(&lnode->node.list, &block->node_list); |
| return true; |
| |
| case nir_intrinsic_load_uniform: |
| if (!instr->dest.is_ssa) |
| mask = u_bit_consecutive(0, instr->num_components); |
| |
| lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask); |
| if (!lnode) |
| return false; |
| |
| lnode->num_components = instr->num_components; |
| lnode->index = nir_intrinsic_base(instr); |
| if (nir_src_is_const(instr->src[0])) |
| lnode->index += (uint32_t)nir_src_as_float(instr->src[0]); |
| else { |
| lnode->num_src = 1; |
| ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1); |
| } |
| |
| list_addtail(&lnode->node.list, &block->node_list); |
| return true; |
| |
| case nir_intrinsic_store_output: { |
| /* In simple cases where the store_output is ssa, that register |
| * can be directly marked as the output. |
| * If discard is used or the source is not ssa, things can get a |
| * lot more complicated, so don't try to optimize those and fall |
| * back to inserting a mov at the end. |
| * If the source node will only be able to output to pipeline |
| * registers, fall back to the mov as well. */ |
| if (!block->comp->uses_discard && instr->src->is_ssa) { |
| node = block->comp->var_nodes[instr->src->ssa->index]; |
| switch (node->op) { |
| case ppir_op_load_uniform: |
| case ppir_op_load_texture: |
| case ppir_op_const: |
| break; |
| default: |
| node->is_end = 1; |
| return true; |
| } |
| } |
| |
| alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0); |
| if (!alu_node) |
| return false; |
| |
| ppir_dest *dest = ppir_node_get_dest(&alu_node->node); |
| dest->type = ppir_target_ssa; |
| dest->ssa.num_components = instr->num_components; |
| dest->ssa.index = 0; |
| dest->write_mask = u_bit_consecutive(0, instr->num_components); |
| |
| alu_node->num_src = 1; |
| |
| for (int i = 0; i < instr->num_components; i++) |
| alu_node->src[0].swizzle[i] = i; |
| |
| ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src, |
| u_bit_consecutive(0, instr->num_components)); |
| |
| alu_node->node.is_end = 1; |
| |
| list_addtail(&alu_node->node.list, &block->node_list); |
| return true; |
| } |
| |
| case nir_intrinsic_discard: |
| node = ppir_emit_discard(block, ni); |
| list_addtail(&node->list, &block->node_list); |
| return true; |
| |
| case nir_intrinsic_discard_if: |
| node = ppir_emit_discard_if(block, ni); |
| list_addtail(&node->list, &block->node_list); |
| return true; |
| |
| default: |
| ppir_error("unsupported nir_intrinsic_instr %s\n", |
| nir_intrinsic_infos[instr->intrinsic].name); |
| return false; |
| } |
| } |
| |
| static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni) |
| { |
| nir_load_const_instr *instr = nir_instr_as_load_const(ni); |
| ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def); |
| if (!node) |
| return false; |
| |
| assert(instr->def.bit_size == 32); |
| |
| for (int i = 0; i < instr->def.num_components; i++) |
| node->constant.value[i].i = instr->value[i].i32; |
| node->constant.num = instr->def.num_components; |
| |
| list_addtail(&node->node.list, &block->node_list); |
| return true; |
| } |
| |
| static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni) |
| { |
| nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni); |
| ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def); |
| if (!node) |
| return false; |
| ppir_alu_node *alu = ppir_node_to_alu(node); |
| |
| ppir_dest *dest = &alu->dest; |
| dest->ssa.undef = true; |
| |
| list_addtail(&node->list, &block->node_list); |
| return true; |
| } |
| |
| static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) |
| { |
| nir_tex_instr *instr = nir_instr_as_tex(ni); |
| ppir_load_texture_node *node; |
| |
| switch (instr->op) { |
| case nir_texop_tex: |
| case nir_texop_txb: |
| case nir_texop_txl: |
| break; |
| default: |
| ppir_error("unsupported texop %d\n", instr->op); |
| return false; |
| } |
| |
| switch (instr->sampler_dim) { |
| case GLSL_SAMPLER_DIM_2D: |
| case GLSL_SAMPLER_DIM_CUBE: |
| case GLSL_SAMPLER_DIM_RECT: |
| case GLSL_SAMPLER_DIM_EXTERNAL: |
| break; |
| default: |
| ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim); |
| return false; |
| } |
| |
| /* emit ld_tex node */ |
| |
| unsigned mask = 0; |
| if (!instr->dest.is_ssa) |
| mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr)); |
| |
| node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask); |
| if (!node) |
| return false; |
| |
| node->sampler = instr->texture_index; |
| node->sampler_dim = instr->sampler_dim; |
| |
| for (int i = 0; i < instr->coord_components; i++) |
| node->src[0].swizzle[i] = i; |
| |
| for (int i = 0; i < instr->num_srcs; i++) { |
| switch (instr->src[i].src_type) { |
| case nir_tex_src_coord: { |
| nir_src *ns = &instr->src[i].src; |
| if (ns->is_ssa) { |
| ppir_node *child = block->comp->var_nodes[ns->ssa->index]; |
| if (child->op == ppir_op_load_varying) { |
| /* If the successor is load_texture, promote it to load_coords */ |
| nir_tex_src *nts = (nir_tex_src *)ns; |
| if (nts->src_type == nir_tex_src_coord) |
| child->op = ppir_op_load_coords; |
| } |
| } |
| |
| /* src[0] is not used by the ld_tex instruction but ensures |
| * correct scheduling due to the pipeline dependency */ |
| ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src, |
| u_bit_consecutive(0, instr->coord_components)); |
| node->num_src++; |
| break; |
| } |
| case nir_tex_src_bias: |
| case nir_tex_src_lod: |
| node->lod_bias_en = true; |
| node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod); |
| ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1); |
| node->num_src++; |
| break; |
| default: |
| ppir_error("unsupported texture source type\n"); |
| return false; |
| } |
| } |
| |
| list_addtail(&node->node.list, &block->node_list); |
| |
| /* validate load coords node */ |
| |
| ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node; |
| ppir_load_node *load = NULL; |
| |
| if (src_coords && ppir_node_has_single_src_succ(src_coords) && |
| (src_coords->op == ppir_op_load_coords)) |
| load = ppir_node_to_load(src_coords); |
| else { |
| /* Create load_coords node */ |
| load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0); |
| if (!load) |
| return false; |
| list_addtail(&load->node.list, &block->node_list); |
| |
| load->src = node->src[0]; |
| load->num_src = 1; |
| if (node->sampler_dim == GLSL_SAMPLER_DIM_CUBE) |
| load->num_components = 3; |
| else |
| load->num_components = 2; |
| |
| ppir_debug("%s create load_coords node %d for %d\n", |
| __FUNCTION__, load->index, node->node.index); |
| |
| ppir_node_foreach_pred_safe((&node->node), dep) { |
| ppir_node *pred = dep->pred; |
| ppir_node_remove_dep(dep); |
| ppir_node_add_dep(&load->node, pred, ppir_dep_src); |
| } |
| ppir_node_add_dep(&node->node, &load->node, ppir_dep_src); |
| } |
| |
| assert(load); |
| node->src[0].type = load->dest.type = ppir_target_pipeline; |
| node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard; |
| |
| return true; |
| } |
| |
| static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock) |
| { |
| ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock); |
| |
| return block; |
| } |
| |
| static bool ppir_emit_jump(ppir_block *block, nir_instr *ni) |
| { |
| ppir_node *node; |
| ppir_compiler *comp = block->comp; |
| ppir_branch_node *branch; |
| ppir_block *jump_block; |
| nir_jump_instr *jump = nir_instr_as_jump(ni); |
| |
| switch (jump->type) { |
| case nir_jump_break: { |
| assert(comp->current_block->successors[0]); |
| assert(!comp->current_block->successors[1]); |
| jump_block = comp->current_block->successors[0]; |
| } |
| break; |
| case nir_jump_continue: |
| jump_block = comp->loop_cont_block; |
| break; |
| default: |
| ppir_error("nir_jump_instr not support\n"); |
| return false; |
| } |
| |
| assert(jump_block != NULL); |
| |
| node = ppir_node_create(block, ppir_op_branch, -1, 0); |
| if (!node) |
| return false; |
| branch = ppir_node_to_branch(node); |
| |
| /* Unconditional */ |
| branch->num_src = 0; |
| branch->target = jump_block; |
| |
| list_addtail(&node->list, &block->node_list); |
| return true; |
| } |
| |
| static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = { |
| [nir_instr_type_alu] = ppir_emit_alu, |
| [nir_instr_type_intrinsic] = ppir_emit_intrinsic, |
| [nir_instr_type_load_const] = ppir_emit_load_const, |
| [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef, |
| [nir_instr_type_tex] = ppir_emit_tex, |
| [nir_instr_type_jump] = ppir_emit_jump, |
| }; |
| |
| static ppir_block *ppir_block_create(ppir_compiler *comp) |
| { |
| ppir_block *block = rzalloc(comp, ppir_block); |
| if (!block) |
| return NULL; |
| |
| list_inithead(&block->node_list); |
| list_inithead(&block->instr_list); |
| |
| block->comp = comp; |
| |
| return block; |
| } |
| |
| static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock) |
| { |
| ppir_block *block = ppir_get_block(comp, nblock); |
| |
| comp->current_block = block; |
| |
| list_addtail(&block->list, &comp->block_list); |
| |
| nir_foreach_instr(instr, nblock) { |
| assert(instr->type < nir_instr_type_phi); |
| if (!ppir_emit_instr[instr->type](block, instr)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list); |
| |
| static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt) |
| { |
| ppir_node *node; |
| ppir_branch_node *else_branch, *after_branch; |
| nir_block *nir_else_block = nir_if_first_else_block(if_stmt); |
| bool empty_else_block = |
| (nir_else_block == nir_if_last_else_block(if_stmt) && |
| exec_list_is_empty(&nir_else_block->instr_list)); |
| ppir_block *block = comp->current_block; |
| |
| node = ppir_node_create(block, ppir_op_branch, -1, 0); |
| if (!node) |
| return false; |
| else_branch = ppir_node_to_branch(node); |
| ppir_node_add_src(block->comp, node, &else_branch->src[0], |
| &if_stmt->condition, 1); |
| else_branch->num_src = 1; |
| /* Negate condition to minimize branching. We're generating following: |
| * current_block: { ...; if (!statement) branch else_block; } |
| * then_block: { ...; branch after_block; } |
| * else_block: { ... } |
| * after_block: { ... } |
| * |
| * or if else list is empty: |
| * block: { if (!statement) branch else_block; } |
| * then_block: { ... } |
| * else_block: after_block: { ... } |
| */ |
| else_branch->negate = true; |
| list_addtail(&else_branch->node.list, &block->node_list); |
| |
| if (!ppir_emit_cf_list(comp, &if_stmt->then_list)) |
| return false; |
| |
| if (empty_else_block) { |
| nir_block *nblock = nir_if_last_else_block(if_stmt); |
| assert(nblock->successors[0]); |
| assert(!nblock->successors[1]); |
| else_branch->target = ppir_get_block(comp, nblock->successors[0]); |
| /* Add empty else block to the list */ |
| list_addtail(&block->successors[1]->list, &comp->block_list); |
| return true; |
| } |
| |
| else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt)); |
| |
| nir_block *last_then_block = nir_if_last_then_block(if_stmt); |
| assert(last_then_block->successors[0]); |
| assert(!last_then_block->successors[1]); |
| block = ppir_get_block(comp, last_then_block); |
| node = ppir_node_create(block, ppir_op_branch, -1, 0); |
| if (!node) |
| return false; |
| after_branch = ppir_node_to_branch(node); |
| /* Unconditional */ |
| after_branch->num_src = 0; |
| after_branch->target = ppir_get_block(comp, last_then_block->successors[0]); |
| /* Target should be after_block, will fixup later */ |
| list_addtail(&after_branch->node.list, &block->node_list); |
| |
| if (!ppir_emit_cf_list(comp, &if_stmt->else_list)) |
| return false; |
| |
| return true; |
| } |
| |
| static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop) |
| { |
| ppir_block *save_loop_cont_block = comp->loop_cont_block; |
| ppir_block *block; |
| ppir_branch_node *loop_branch; |
| nir_block *loop_last_block; |
| ppir_node *node; |
| |
| comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop)); |
| |
| if (!ppir_emit_cf_list(comp, &nloop->body)) |
| return false; |
| |
| loop_last_block = nir_loop_last_block(nloop); |
| block = ppir_get_block(comp, loop_last_block); |
| node = ppir_node_create(block, ppir_op_branch, -1, 0); |
| if (!node) |
| return false; |
| loop_branch = ppir_node_to_branch(node); |
| /* Unconditional */ |
| loop_branch->num_src = 0; |
| loop_branch->target = comp->loop_cont_block; |
| list_addtail(&loop_branch->node.list, &block->node_list); |
| |
| comp->loop_cont_block = save_loop_cont_block; |
| |
| comp->num_loops++; |
| |
| return true; |
| } |
| |
| static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc) |
| { |
| ppir_error("function nir_cf_node not support\n"); |
| return false; |
| } |
| |
| static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list) |
| { |
| foreach_list_typed(nir_cf_node, node, node, list) { |
| bool ret; |
| |
| switch (node->type) { |
| case nir_cf_node_block: |
| ret = ppir_emit_block(comp, nir_cf_node_as_block(node)); |
| break; |
| case nir_cf_node_if: |
| ret = ppir_emit_if(comp, nir_cf_node_as_if(node)); |
| break; |
| case nir_cf_node_loop: |
| ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node)); |
| break; |
| case nir_cf_node_function: |
| ret = ppir_emit_function(comp, nir_cf_node_as_function(node)); |
| break; |
| default: |
| ppir_error("unknown NIR node type %d\n", node->type); |
| return false; |
| } |
| |
| if (!ret) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa) |
| { |
| ppir_compiler *comp = rzalloc_size( |
| prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *)); |
| if (!comp) |
| return NULL; |
| |
| list_inithead(&comp->block_list); |
| list_inithead(&comp->reg_list); |
| comp->blocks = _mesa_hash_table_u64_create(prog); |
| |
| comp->var_nodes = (ppir_node **)(comp + 1); |
| comp->reg_base = num_ssa; |
| comp->prog = prog; |
| return comp; |
| } |
| |
| static void ppir_add_ordering_deps(ppir_compiler *comp) |
| { |
| /* Some intrinsics do not have explicit dependencies and thus depend |
| * on instructions order. Consider discard_if and the is_end node as |
| * example. If we don't add fake dependency of discard_if to is_end, |
| * scheduler may put the is_end first and since is_end terminates |
| * shader on Utgard PP, rest of it will never be executed. |
| * Add fake dependencies for discard/branch/store to preserve |
| * instruction order. |
| * |
| * TODO: scheduler should schedule discard_if as early as possible otherwise |
| * we may end up with suboptimal code for cases like this: |
| * |
| * s3 = s1 < s2 |
| * discard_if s3 |
| * s4 = s1 + s2 |
| * store s4 |
| * |
| * In this case store depends on discard_if and s4, but since dependencies can |
| * be scheduled in any order it can result in code like this: |
| * |
| * instr1: s3 = s1 < s3 |
| * instr2: s4 = s1 + s2 |
| * instr3: discard_if s3 |
| * instr4: store s4 |
| */ |
| list_for_each_entry(ppir_block, block, &comp->block_list, list) { |
| ppir_node *prev_node = NULL; |
| list_for_each_entry_rev(ppir_node, node, &block->node_list, list) { |
| if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) { |
| ppir_node_add_dep(prev_node, node, ppir_dep_sequence); |
| } |
| if (node->is_end || |
| node->op == ppir_op_discard || |
| node->op == ppir_op_store_temp || |
| node->op == ppir_op_branch) { |
| prev_node = node; |
| } |
| } |
| } |
| } |
| |
| static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp, |
| struct pipe_debug_callback *debug) |
| { |
| const struct shader_info *info = &nir->info; |
| char *shaderdb; |
| int ret = asprintf(&shaderdb, |
| "%s shader: %d inst, %d loops, %d:%d spills:fills\n", |
| gl_shader_stage_name(info->stage), |
| comp->cur_instr_index, |
| comp->num_loops, |
| comp->num_spills, |
| comp->num_fills); |
| assert(ret >= 0); |
| |
| if (lima_debug & LIMA_DEBUG_SHADERDB) |
| fprintf(stderr, "SHADER-DB: %s\n", shaderdb); |
| |
| pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb); |
| free(shaderdb); |
| } |
| |
| static void ppir_add_write_after_read_deps(ppir_compiler *comp) |
| { |
| list_for_each_entry(ppir_block, block, &comp->block_list, list) { |
| list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { |
| ppir_node *write = NULL; |
| list_for_each_entry_rev(ppir_node, node, &block->node_list, list) { |
| for (int i = 0; i < ppir_node_get_src_num(node); i++) { |
| ppir_src *src = ppir_node_get_src(node, i); |
| if (src && src->type == ppir_target_register && |
| src->reg == reg && |
| write) { |
| ppir_debug("Adding dep %d for write %d\n", node->index, write->index); |
| ppir_node_add_dep(write, node, ppir_dep_write_after_read); |
| } |
| } |
| ppir_dest *dest = ppir_node_get_dest(node); |
| if (dest && dest->type == ppir_target_register && |
| dest->reg == reg) |
| write = node; |
| } |
| } |
| } |
| } |
| |
| bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir, |
| struct ra_regs *ra, |
| struct pipe_debug_callback *debug) |
| { |
| nir_function_impl *func = nir_shader_get_entrypoint(nir); |
| ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc); |
| if (!comp) |
| return false; |
| |
| comp->ra = ra; |
| comp->uses_discard = nir->info.fs.uses_discard; |
| |
| /* 1st pass: create ppir blocks */ |
| nir_foreach_function(function, nir) { |
| if (!function->impl) |
| continue; |
| |
| nir_foreach_block(nblock, function->impl) { |
| ppir_block *block = ppir_block_create(comp); |
| if (!block) |
| return false; |
| block->index = nblock->index; |
| _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block); |
| } |
| } |
| |
| /* 2nd pass: populate successors */ |
| nir_foreach_function(function, nir) { |
| if (!function->impl) |
| continue; |
| |
| nir_foreach_block(nblock, function->impl) { |
| ppir_block *block = ppir_get_block(comp, nblock); |
| assert(block); |
| |
| for (int i = 0; i < 2; i++) { |
| if (nblock->successors[i]) |
| block->successors[i] = ppir_get_block(comp, nblock->successors[i]); |
| } |
| } |
| } |
| |
| /* Validate outputs, we support only gl_FragColor */ |
| nir_foreach_shader_out_variable(var, nir) { |
| switch (var->data.location) { |
| case FRAG_RESULT_COLOR: |
| case FRAG_RESULT_DATA0: |
| break; |
| default: |
| ppir_error("unsupported output type\n"); |
| goto err_out0; |
| break; |
| } |
| } |
| |
| foreach_list_typed(nir_register, reg, node, &func->registers) { |
| ppir_reg *r = rzalloc(comp, ppir_reg); |
| if (!r) |
| return false; |
| |
| r->index = reg->index; |
| r->num_components = reg->num_components; |
| r->is_head = false; |
| list_addtail(&r->list, &comp->reg_list); |
| } |
| |
| if (!ppir_emit_cf_list(comp, &func->body)) |
| goto err_out0; |
| |
| /* If we have discard block add it to the very end */ |
| if (comp->discard_block) |
| list_addtail(&comp->discard_block->list, &comp->block_list); |
| |
| ppir_node_print_prog(comp); |
| |
| if (!ppir_lower_prog(comp)) |
| goto err_out0; |
| |
| ppir_add_ordering_deps(comp); |
| ppir_add_write_after_read_deps(comp); |
| |
| ppir_node_print_prog(comp); |
| |
| if (!ppir_node_to_instr(comp)) |
| goto err_out0; |
| |
| if (!ppir_schedule_prog(comp)) |
| goto err_out0; |
| |
| if (!ppir_regalloc_prog(comp)) |
| goto err_out0; |
| |
| if (!ppir_codegen_prog(comp)) |
| goto err_out0; |
| |
| ppir_print_shader_db(nir, comp, debug); |
| |
| _mesa_hash_table_u64_destroy(comp->blocks, NULL); |
| ralloc_free(comp); |
| return true; |
| |
| err_out0: |
| _mesa_hash_table_u64_destroy(comp->blocks, NULL); |
| ralloc_free(comp); |
| return false; |
| } |
| |