| /* |
| * Copyright (C) 2019 Google. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include "util/ralloc.h" |
| |
| #include "ir3.h" |
| |
| static bool |
| is_fp16_conv(struct ir3_instruction *instr) |
| { |
| if (instr->opc != OPC_MOV) |
| return false; |
| |
| struct ir3_register *dst = instr->regs[0]; |
| struct ir3_register *src = instr->regs[1]; |
| |
| /* disallow conversions that cannot be folded into |
| * alu instructions: |
| */ |
| if (dst->flags & (IR3_REG_EVEN | IR3_REG_POS_INF)) |
| return false; |
| |
| if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) |
| return false; |
| if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) |
| return false; |
| |
| if (instr->cat1.src_type == TYPE_F32 && |
| instr->cat1.dst_type == TYPE_F16) |
| return true; |
| |
| if (instr->cat1.src_type == TYPE_F16 && |
| instr->cat1.dst_type == TYPE_F32) |
| return true; |
| |
| return false; |
| } |
| |
| static bool |
| all_uses_fp16_conv(struct ir3_instruction *conv_src) |
| { |
| foreach_ssa_use (use, conv_src) |
| if (!is_fp16_conv(use)) |
| return false; |
| return true; |
| } |
| |
| /* For an instruction which has a conversion folded in, re-write the |
| * uses of *all* conv's that used that src to be a simple mov that |
| * cp can eliminate. This avoids invalidating the SSA uses, it just |
| * shifts the use to a simple mov. |
| */ |
| static void |
| rewrite_src_uses(struct ir3_instruction *src) |
| { |
| foreach_ssa_use (use, src) { |
| assert(is_fp16_conv(use)); |
| |
| if (is_half(src)) { |
| use->regs[1]->flags |= IR3_REG_HALF; |
| } else { |
| use->regs[1]->flags &= ~IR3_REG_HALF; |
| } |
| |
| use->cat1.src_type = use->cat1.dst_type; |
| } |
| } |
| |
| static bool |
| try_conversion_folding(struct ir3_instruction *conv) |
| { |
| struct ir3_instruction *src; |
| |
| if (!is_fp16_conv(conv)) |
| return false; |
| |
| /* NOTE: we can have non-ssa srcs after copy propagation: */ |
| src = ssa(conv->regs[1]); |
| if (!src) |
| return false; |
| |
| if (!is_alu(src)) |
| return false; |
| |
| /* avoid folding f2f32(f2f16) together, in cases where this is legal to |
| * do (glsl) nir should have handled that for us already: |
| */ |
| if (is_fp16_conv(src)) |
| return false; |
| |
| switch (src->opc) { |
| case OPC_SEL_B32: |
| case OPC_SEL_B16: |
| case OPC_MAX_F: |
| case OPC_MIN_F: |
| case OPC_SIGN_F: |
| case OPC_ABSNEG_F: |
| return false; |
| case OPC_MOV: |
| /* if src is a "cov" and type doesn't match, then it can't be folded |
| * for example cov.u32u16+cov.f16f32 can't be folded to cov.u32f32 |
| */ |
| if (src->cat1.dst_type != src->cat1.src_type && |
| conv->cat1.src_type != src->cat1.dst_type) |
| return false; |
| break; |
| default: |
| break; |
| } |
| |
| if (!all_uses_fp16_conv(src)) |
| return false; |
| |
| if (src->opc == OPC_MOV) { |
| if (src->cat1.dst_type == src->cat1.src_type) { |
| /* If we're folding a conversion into a bitwise move, we need to |
| * change the dst type to F32 to get the right behavior, since we |
| * could be moving a float with a u32.u32 move. |
| */ |
| src->cat1.dst_type = conv->cat1.dst_type; |
| src->cat1.src_type = conv->cat1.src_type; |
| } else { |
| /* Otherwise, for typechanging movs, we can just change the dst |
| * type to F16 to collaps the two conversions. For example |
| * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16. |
| */ |
| src->cat1.dst_type = conv->cat1.dst_type; |
| } |
| } |
| |
| ir3_set_dst_type(src, is_half(conv)); |
| rewrite_src_uses(src); |
| |
| return true; |
| } |
| |
| bool |
| ir3_cf(struct ir3 *ir) |
| { |
| void *mem_ctx = ralloc_context(NULL); |
| bool progress = false; |
| |
| ir3_find_ssa_uses(ir, mem_ctx, false); |
| |
| foreach_block (block, &ir->block_list) { |
| foreach_instr (instr, &block->instr_list) { |
| progress |= try_conversion_folding(instr); |
| } |
| } |
| |
| ralloc_free(mem_ctx); |
| |
| return progress; |
| } |