| /* |
| * Copyright (C) 2020 Collabora, Ltd. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include "compiler.h" |
| |
| /* NIR creates vectors as vecN ops, which we represent by a synthetic |
| * BI_COMBINE instruction, e.g.: |
| * |
| * v = combine x, y, z, w |
| * |
| * These combines need to be lowered by the pass in this file. Fix a given |
| * source at component c. |
| * |
| * First suppose the source is SSA. If it is also scalar, then we may rewrite |
| * the destination of the generating instruction (unique by SSA+scalar) to |
| * write to v.c, and rewrite each of its uses to swizzle out .c instead of .x |
| * (the original by scalar). If it is vector, there are two cases. If the |
| * component c is `x`, we are accessing v.x, and each of the succeeding |
| * components y, z... up to the last component of the vector are accessed |
| * sequentially, then we may perform the same rewrite. If this is not the case, |
| * rewriting would require more complex vector features, so we fallback on a |
| * move. |
| * |
| * Otherwise is the source is not SSA, we also fallback on a move. We could |
| * probably do better. |
| */ |
| |
| static void |
| bi_combine_mov32(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R) |
| { |
| bi_instruction move = { |
| .type = BI_MOV, |
| .dest = R, |
| .dest_type = nir_type_uint32, |
| .dest_offset = comp, |
| .src = { parent->src[comp] }, |
| .src_types = { nir_type_uint32 }, |
| .swizzle = { { parent->swizzle[comp][0] } } |
| }; |
| |
| bi_emit_before(ctx, parent, move); |
| } |
| |
| static void |
| bi_combine_sel16(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R) |
| { |
| bi_instruction sel = { |
| .type = BI_SELECT, |
| .dest = R, |
| .dest_type = nir_type_uint32, |
| .dest_offset = comp >> 1, |
| .src = { parent->src[comp], parent->src[comp + 1] }, |
| .src_types = { nir_type_uint16, nir_type_uint16 }, |
| .swizzle = { |
| { parent->swizzle[comp][0] }, |
| { parent->swizzle[comp + 1][0] }, |
| } |
| }; |
| |
| /* In case we have a combine from a vec3 */ |
| if (!sel.src[1]) |
| sel.src[1] = BIR_INDEX_ZERO; |
| |
| bi_emit_before(ctx, parent, sel); |
| } |
| |
| /* Copies result of combine from the temp R to the instruction destination, |
| * given a bitsize sz */ |
| |
| static void |
| bi_combine_copy(bi_context *ctx, bi_instruction *ins, unsigned R, unsigned sz) |
| { |
| bi_foreach_src(ins, s) { |
| if (!ins->src[s]) |
| continue; |
| |
| /* Iterate by 32-bits */ |
| unsigned shift = (sz == 8) ? 2 : |
| (sz == 16) ? 1 : 0; |
| |
| if (s & ((1 << shift) - 1)) |
| continue; |
| |
| bi_instruction copy = { |
| .type = BI_MOV, |
| .dest = ins->dest, |
| .dest_type = nir_type_uint32, |
| .dest_offset = s >> shift, |
| .src = { R }, |
| .src_types = { nir_type_uint32 }, |
| .swizzle = { { s >> shift } } |
| }; |
| |
| bi_emit_before(ctx, ins, copy); |
| } |
| } |
| |
| void |
| bi_lower_combine(bi_context *ctx, bi_block *block) |
| { |
| bi_foreach_instr_in_block_safe(block, ins) { |
| if (ins->type != BI_COMBINE) continue; |
| |
| /* If a register COMBINE reads its own output, we need a |
| * temporary move to allow for swapping. TODO: Could do a bit |
| * better for pairwise swaps of 16-bit vectors */ |
| bool reads_self = false; |
| |
| bi_foreach_src(ins, s) { |
| if(ins->src[s] == ins->dest) |
| reads_self = true; |
| } |
| |
| bool needs_rewrite = !(ins->dest & PAN_IS_REG); |
| bool needs_copy = (ins->dest & PAN_IS_REG) && reads_self; |
| bool needs_temp = needs_rewrite || needs_copy; |
| |
| unsigned R = needs_temp ? bi_make_temp_reg(ctx) : ins->dest; |
| unsigned sz = nir_alu_type_get_type_size(ins->dest_type); |
| |
| bi_foreach_src(ins, s) { |
| /* We're done early for vec2/3 */ |
| if (!ins->src[s]) |
| continue; |
| |
| if (sz == 32) |
| bi_combine_mov32(ctx, ins, s, R); |
| else if (sz == 16) { |
| bi_combine_sel16(ctx, ins, s, R); |
| s++; |
| } else { |
| unreachable("Unknown COMBINE size"); |
| } |
| } |
| |
| if (needs_rewrite) |
| bi_rewrite_uses(ctx, ins->dest, 0, R, 0); |
| else if (needs_copy) |
| bi_combine_copy(ctx, ins, R, sz); |
| |
| bi_remove_instruction(ins); |
| } |
| } |