| /* |
| * Copyright (C) 2020 Collabora, Ltd. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| /* Autogenerated file, do not edit */ |
| |
| #ifndef _BI_GENERATED_PACK_H |
| #define _BI_GENERATED_PACK_H |
| |
| #include "compiler.h" |
| #include "bi_pack_helpers.h" |
| |
| static inline unsigned |
| pan_pack_fma_rshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 4); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; |
| assert(not_result < 2); |
| |
| return 0x301000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_add_iadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 32) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (lanes1 == 0) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); |
| } else if ((lanes1 == 1) || (lanes1 == 2)) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 3) derived_9 = 0; |
| else if (lanes1 == 4) derived_9 = 1; |
| else if (lanes1 == 5) derived_9 = 2; |
| else if (lanes1 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_iadd_u32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_var_flat(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float32) register_format_temp = 0; |
| else if (ins->format == nir_type_float16) register_format_temp = 1; |
| else if (ins->format == nir_type_uint32) register_format_temp = 2; |
| else if (ins->format == nir_type_int32) register_format_temp = 3; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 8); |
| |
| unsigned function = 3; |
| |
| bi_write_staging_register(clause, ins); |
| if (register_format != 4) { |
| unsigned derived_10 = 0; |
| if ((register_format == 0) || (register_format == 1)) derived_10 = 0; |
| else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| unsigned derived_19 = 0; |
| if ((register_format == 0) || (register_format == 2)) derived_19 = 0; |
| else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; |
| else unreachable("No pattern match at pos 19"); |
| |
| return 0x538c0 | (src0 << 3) | (vecsize << 8) | (function << 0) | (derived_10 << 10) | (derived_19 << 19); |
| } else if (register_format == 4) { |
| return 0xcf8c0 | (src0 << 3) | (vecsize << 8) | (function << 0); |
| } else { |
| unreachable("No matching state found in add_ld_var_flat"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x65800 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_clz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned mask = 0; |
| |
| return 0x701fd0 | (src0 << 0) | (mask << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_clz_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned mask = 0; |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| return 0x701ec0 | (src0 << 0) | (mask << 3) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_popcount_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| return 0x73c6d8 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fatan_table_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 2); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x67900 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned bytes2 = 0; |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| unsigned result_word = 0; |
| |
| return 0x33a000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_isubb_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| return 0x27fe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frcbrt_approx_b_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| return 0x67ab0 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if (lanes2 != 0) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 1) derived_9 = 0; |
| else if (lanes2 == 2) derived_9 = 1; |
| else if (lanes2 == 3) derived_9 = 2; |
| else if (lanes2 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x324000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); |
| } else if (lanes2 == 0) { |
| return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); |
| } else { |
| unreachable("No matching state found in fma_lshift_xor_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_texs_cube_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned skip = ins->skip; |
| assert(skip < 2); |
| |
| unsigned sampler_index = ins->texture.sampler_index; |
| unsigned texture_index = ins->texture.texture_index; |
| bi_write_staging_register(clause, ins); |
| return 0x5c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 4); |
| |
| return 0x3e820 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (round << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| return 0x66ac0 | (src0 << 0) | (src1 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_doorbell(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0xd7860 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_logb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| return 0x3d980 | (src0 << 0) | (swz0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x62800 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_arshift_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0x8); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| if (lanes2 != 0) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 1) derived_9 = 0; |
| else if (lanes2 == 2) derived_9 = 1; |
| else if (lanes2 == 3) derived_9 = 2; |
| else if (lanes2 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x334018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); |
| } else if (lanes2 == 0) { |
| return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } else { |
| unreachable("No matching state found in fma_arshift_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_vn_asst1_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned neg2 = ins->src_neg[2]; |
| assert(neg2 < 2); |
| |
| return 0x27c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (neg2 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ldexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| return 0x74c00 | (src0 << 0) | (src1 << 3) | (round << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_isub_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; |
| else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 2); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 0) derived_9 = 0; |
| else if (lanes1 == 1) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_10 = 0; |
| if (lanes0 == 0) derived_10 = 0; |
| else if (lanes0 == 1) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); |
| } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 2) derived_9 = 0; |
| else if (lanes1 == 3) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 4) derived_9 = 0; |
| else if (lanes1 == 5) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_isub_v2u16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchc_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned combine = 0; |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| unsigned derived_9 = 0; |
| if (lane0 == 0) derived_9 = 0; |
| else if (lane0 == 1) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_3 = 0; |
| if (lane0 == 1) derived_3 = 0; |
| else if (lane0 == 0) derived_3 = 1; |
| else unreachable("No pattern match at pos 3"); |
| |
| return 0x6f030 | (src0 << 0) | (src1 << 6) | (combine << 10) | (derived_9 << 9) | (derived_3 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| if (round != 4) { |
| unsigned derived_9 = 0; |
| if (round == 0) derived_9 = 0; |
| else if (round == 1) derived_9 = 1; |
| else if (round == 2) derived_9 = 2; |
| else if (round == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x70c020 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (derived_9 << 9); |
| } else if (round == 4) { |
| return 0x707620 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3); |
| } else { |
| unreachable("No matching state found in fma_fround_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_vn_asst2_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| return 0x3dfa0 | (src0 << 0) | (neg0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 4); |
| |
| return 0x3e800 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (round << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f2000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmpi_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| return 0x7b800 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fma_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 4); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned neg2 = ins->src_neg[2]; |
| assert(neg2 < 2); |
| |
| unsigned swz2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned swz2_temp = 0; |
| if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) swz2_temp = 0; |
| else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swz2_temp = 1; |
| else if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swz2_temp = 2; |
| else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) swz2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz2 = swz2_temp; |
| assert(swz2 < 4); |
| |
| unsigned derived_17 = 0; |
| if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; |
| else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; |
| else unreachable("No pattern match at pos 17"); |
| |
| return 0x400000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (neg2 << 18) | (swz2 << 19) | (derived_17 << 17); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_arshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned bytes2 = 0; |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| unsigned result_word = 0; |
| |
| return 0x33e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fmul_slice_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| return 0x70cb40 | (src0 << 0) | (src1 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_var_flat_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float32) register_format_temp = 0; |
| else if (ins->format == nir_type_float16) register_format_temp = 1; |
| else if (ins->format == nir_type_uint32) register_format_temp = 2; |
| else if (ins->format == nir_type_int32) register_format_temp = 3; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 8); |
| |
| unsigned function = 3; |
| |
| unsigned index = bi_get_immediate(ins, 0); |
| bi_write_staging_register(clause, ins); |
| if (register_format != 4) { |
| unsigned derived_10 = 0; |
| if ((register_format == 0) || (register_format == 1)) derived_10 = 0; |
| else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| unsigned derived_19 = 0; |
| if ((register_format == 0) || (register_format == 2)) derived_19 = 0; |
| else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; |
| else unreachable("No pattern match at pos 19"); |
| |
| return 0x53800 | (vecsize << 8) | (function << 0) | (index << 3) | (derived_10 << 10) | (derived_19 << 19); |
| } else if (register_format == 4) { |
| return 0xcf800 | (vecsize << 8) | (function << 0) | (index << 3); |
| } else { |
| unreachable("No matching state found in add_ld_var_flat_imm"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| if ((cmpf == 4) || (cmpf == 5)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| } |
| |
| if (cmpf == 3) { |
| { unsigned temp = src2; src2 = src3; src3 = temp; } |
| if (cmpf == 3) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else if (cmpf == 2) derived_12 = 2; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| if (cmpf == 1) { |
| { unsigned temp = src2; src2 = src3; src3 = temp; } |
| if (cmpf == 1) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 3; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane_temp = 0; |
| if (lane_sz == 32) lane_temp = 0; |
| else if (lane_sz == 64) lane_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane = lane_temp; |
| assert(lane < 2); |
| |
| ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; |
| bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; |
| unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0; |
| assert(extend < 4); |
| |
| bi_write_staging_register(clause, ins); |
| if ((extend == 0) && (lane == 0)) { |
| return 0x60c00 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } else if ((extend != 0) && (lane == 1)) { |
| unsigned derived_9 = 0; |
| if (extend == 1) derived_9 = 0; |
| else if (extend == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x61c00 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_load_i32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_st_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| unsigned src2 = bi_get_src(ins, regs, 3); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| bi_read_staging_register(clause, ins); |
| return 0xcb800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if (lanes2 != 0) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 1) derived_9 = 0; |
| else if (lanes2 == 2) derived_9 = 1; |
| else if (lanes2 == 3) derived_9 = 2; |
| else if (lanes2 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x302000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if (lanes2 == 0) { |
| return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); |
| } else { |
| unreachable("No matching state found in fma_rshift_or_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x2e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 2); |
| |
| return 0x75300 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_pre_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x6ec000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_shaddxl_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; |
| else if (lane1_sz == 32) lane1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 4); |
| |
| unsigned shift = 0; |
| return 0x70e000 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| if (((src0 > src1) && (cmpf == 0)) || ((src0 < src1) && (cmpf == 1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if ((src0 == src1) && (cmpf == 0)) derived_9 = 1; |
| else if (((src0 < src1) && (cmpf == 0)) || ((src0 >= src1) && (cmpf == 1))) derived_9 = 4; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| return 0x701900 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if (lanes2 != 0) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 1) derived_9 = 0; |
| else if (lanes2 == 2) derived_9 = 1; |
| else if (lanes2 == 3) derived_9 = 2; |
| else if (lanes2 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x312000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if (lanes2 == 0) { |
| return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); |
| } else { |
| unreachable("No matching state found in fma_lshift_or_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_var_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| unsigned update = (ins->constant.u64 >= 20) ? 3 : 0; |
| assert(update < 4); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float32) register_format_temp = 0; |
| else if (ins->format == nir_type_float16) register_format_temp = 1; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 4); |
| |
| unsigned sample = ins->load_vary.interp_mode; |
| assert(sample < 8); |
| |
| unsigned index = bi_get_immediate(ins, 0); |
| bi_write_staging_register(clause, ins); |
| if (register_format != 2) { |
| unsigned derived_19 = 0; |
| if (register_format == 0) derived_19 = 0; |
| else if (register_format == 1) derived_19 = 1; |
| else unreachable("No pattern match at pos 19"); |
| |
| unsigned derived_10 = 0; |
| if ((sample == 0) && (update == 0)) derived_10 = 0; |
| else if ((sample == 1) && (update == 0)) derived_10 = 1; |
| else if ((sample == 2) && (update == 0)) derived_10 = 2; |
| else if ((sample == 3) && (update == 0)) derived_10 = 3; |
| else if ((sample == 4) && (update == 1)) derived_10 = 4; |
| else if ((sample == 0) && (update == 2)) derived_10 = 8; |
| else if ((sample == 1) && (update == 2)) derived_10 = 9; |
| else if ((sample == 0) && (update == 3)) derived_10 = 10; |
| else if ((sample == 1) && (update == 3)) derived_10 = 11; |
| else if ((sample == 2) && (update == 3)) derived_10 = 12; |
| else if ((sample == 3) && (update == 3)) derived_10 = 13; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0x50000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_19 << 19) | (derived_10 << 10); |
| } else if (register_format == 2) { |
| unsigned derived_10 = 0; |
| if ((sample == 0) && (update == 0)) derived_10 = 0; |
| else if ((sample == 1) && (update == 0)) derived_10 = 1; |
| else if ((sample == 2) && (update == 0)) derived_10 = 2; |
| else if ((sample == 3) && (update == 0)) derived_10 = 3; |
| else if ((sample == 4) && (update == 1)) derived_10 = 4; |
| else if ((sample == 0) && (update == 2)) derived_10 = 8; |
| else if ((sample == 1) && (update == 2)) derived_10 = 9; |
| else if ((sample == 0) && (update == 3)) derived_10 = 10; |
| else if ((sample == 1) && (update == 3)) derived_10 = 11; |
| else if ((sample == 2) && (update == 3)) derived_10 = 12; |
| else if ((sample == 3) && (update == 3)) derived_10 = 13; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0xcc000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_10 << 10); |
| } else { |
| unreachable("No matching state found in add_ld_var_imm"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_barrier(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| return 0xd7874; |
| } |
| |
| static inline unsigned |
| pan_pack_add_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0x3d970 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2s8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 16); |
| |
| return 0x3c800 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fsincos_offset_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned scale = 0; |
| |
| return 0x67aa0 | (src0 << 0) | (scale << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_lea_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float16) register_format_temp = 0; |
| else if (ins->format == nir_type_float32) register_format_temp = 1; |
| else if (ins->format == nir_type_int32) register_format_temp = 2; |
| else if (ins->format == nir_type_uint32) register_format_temp = 3; |
| else if (ins->format == nir_type_int16) register_format_temp = 4; |
| else if (ins->format == nir_type_uint16) register_format_temp = 5; |
| else if (ins->format == nir_type_float64) register_format_temp = 6; |
| else if (ins->format == nir_type_int64) register_format_temp = 7; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 16); |
| |
| bi_write_staging_register(clause, ins); |
| if (register_format != 8) { |
| unsigned derived_11 = 0; |
| if (register_format == 0) derived_11 = 0; |
| else if (register_format == 1) derived_11 = 1; |
| else if (register_format == 2) derived_11 = 2; |
| else if (register_format == 3) derived_11 = 3; |
| else if (register_format == 4) derived_11 = 4; |
| else if (register_format == 5) derived_11 = 5; |
| else if (register_format == 6) derived_11 = 6; |
| else if (register_format == 7) derived_11 = 7; |
| else unreachable("No pattern match at pos 11"); |
| |
| return 0xc0400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); |
| } else if (register_format == 8) { |
| return 0xc8400 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } else { |
| unreachable("No matching state found in add_lea_attr"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| if (((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| } |
| |
| if (round != 4) { |
| unsigned derived_13 = 0; |
| if (round == 0) derived_13 = 0; |
| else if (round == 1) derived_13 = 1; |
| else if (round == 2) derived_13 = 2; |
| else if (round == 3) derived_13 = 3; |
| else unreachable("No pattern match at pos 13"); |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; |
| else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; |
| else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; |
| else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x20000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (abs0 << 15) | (derived_13 << 13) | (derived_9 << 9); |
| } else if ((round == 4) && (widen0 == 0) && (widen1 == 0) && (abs0 == 0) && (abs1 == 0) && (neg0 == 0) && (neg1 == 0) && (clamp == 0)) { |
| return 0x75200 | (src0 << 0) | (src1 << 3); |
| } else { |
| unreachable("No matching state found in add_fadd_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_post_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x6ee000 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| unsigned preserve_null = 0; |
| |
| return 0x701540 | (src0 << 0) | (seg << 3) | (preserve_null << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_add_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| unsigned preserve_null = 0; |
| |
| return 0x3d540 | (src0 << 0) | (seg << 3) | (preserve_null << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| if (log == 0) { |
| return 0x701c20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3); |
| } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { |
| return 0x701e20 | (src0 << 0) | (widen0 << 3); |
| } else { |
| unreachable("No matching state found in fma_frexpe_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_frsq_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned divzero = 0; |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x67280 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; |
| assert(not_result < 2); |
| |
| if (lanes2 != 0) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 1) derived_9 = 0; |
| else if (lanes2 == 2) derived_9 = 1; |
| else if (lanes2 == 3) derived_9 = 2; |
| else if (lanes2 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x310000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if (lanes2 == 0) { |
| return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); |
| } else { |
| unreachable("No matching state found in fma_lshift_and_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == widen1) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| if (cmpf == 1) cmpf = 4; |
| else if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| else if (cmpf == 2) cmpf = 5; |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; |
| else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5))) derived_12 = 3; |
| else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) derived_12 = 4; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; |
| else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 3))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; |
| else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 1) || (cmpf == 2))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; |
| else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 4) || (cmpf == 5))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 4)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == widen1) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_clper_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0x7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned lane_op = 0; |
| |
| unsigned subgroup = 1; |
| |
| unsigned inactive_result = 0; |
| |
| return 0x7c000 | (src0 << 0) | (src1 << 3) | (lane_op << 6) | (subgroup << 8) | (inactive_result << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2s16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c600 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cb00 | (src0 << 0) | (swz0 << 4); |
| } else { |
| unreachable("No matching state found in add_v2s16_to_v2f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c1_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f5e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_axchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); |
| unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; |
| assert(seg < 2); |
| |
| bi_read_staging_register(clause, ins); |
| assert(ins->src[0] == ins->dest); |
| return 0x640c0 | (src0 << 0) | (src1 << 3) | (seg << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_vn_asst1_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned h = 0; |
| |
| unsigned l = 0; |
| |
| unsigned neg2 = ins->src_neg[2]; |
| assert(neg2 < 2); |
| |
| return 0x6eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (h << 9) | (l << 10) | (neg2 << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fma_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ); |
| unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0; |
| assert(round < 2); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg2 = ins->src_neg[2]; |
| assert(neg2 < 2); |
| |
| unsigned special = 0; |
| |
| unsigned derived_16 = 0; |
| if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; |
| else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; |
| else unreachable("No pattern match at pos 16"); |
| |
| unsigned derived_12 = 0; |
| if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; |
| else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; |
| else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; |
| else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; |
| else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; |
| else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; |
| else if ((clamp == 0) && (special == 3) && (round == 0)) derived_12 = 6; |
| else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x280000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_hadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); |
| unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; |
| assert(round < 2); |
| |
| return 0xbc6c0 | (src0 << 0) | (src1 << 3) | (round << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_imul_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| return 0x7240c0 | (src0 << 0) | (src1 << 3) | (swz0 << 9) | (swz1 << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_write_staging_register(clause, ins); |
| return 0x65200 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_hadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); |
| unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; |
| assert(round < 2); |
| |
| return 0xbc640 | (src0 << 0) | (src1 << 3) | (round << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_imov_fma(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| unsigned threads = 0; |
| |
| return 0xd7820 | (threads << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmpi_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| return 0x7b880 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x62000 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_jump_ex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned test_mode = 0; |
| |
| unsigned stack_mode = 2; |
| |
| return 0x2eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (test_mode << 9) | (stack_mode << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_iadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 32) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (lanes1 == 0) { |
| return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8); |
| } else if ((lanes1 == 1) || (lanes1 == 2)) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 3) derived_9 = 0; |
| else if (lanes1 == 4) derived_9 = 1; |
| else if (lanes1 == 5) derived_9 = 2; |
| else if (lanes1 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_iadd_s32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 0) derived_9 = 0; |
| else if (lanes2 == 1) derived_9 = 1; |
| else if (lanes2 == 2) derived_9 = 2; |
| else if (lanes2 == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x320800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); |
| } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 4) derived_9 = 1; |
| else if (lanes2 == 5) derived_9 = 2; |
| else if (lanes2 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_rshift_xor_v2i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x6e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_shaddxh_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| return 0x3f8c0 | (src0 << 0) | (src1 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_isub_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 8); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if ((lanes0 == 0) && (lanes1 == 0)) { |
| unsigned derived_7 = 0; |
| if (saturate == 0) derived_7 = 0; |
| else if (saturate == 1) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); |
| } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { |
| unsigned derived_7 = 0; |
| if (saturate == 0) derived_7 = 0; |
| else if (saturate == 1) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else if (lanes1 == 3) derived_9 = 2; |
| else if (lanes1 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { |
| unsigned derived_7 = 0; |
| if (saturate == 0) derived_7 = 0; |
| else if (saturate == 1) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 5) derived_9 = 0; |
| else if (lanes1 == 6) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_isub_v4u8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| if ((log == 0) && (neg0 == 0)) { |
| return 0x3db20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); |
| } else if ((log == 1) && (sqrt == 0)) { |
| return 0x3da20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); |
| } else { |
| unreachable("No matching state found in add_frexpm_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| if (log == 0) { |
| return 0x3dc00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3); |
| } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { |
| return 0x3de00 | (src0 << 0) | (swz0 << 3); |
| } else { |
| unreachable("No matching state found in add_frexpe_v2f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_logb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| return 0x3d9a0 | (src0 << 0) | (widen0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 0) derived_9 = 0; |
| else if (lanes2 == 1) derived_9 = 1; |
| else if (lanes2 == 2) derived_9 = 2; |
| else if (lanes2 == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x302800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 4) derived_9 = 1; |
| else if (lanes2 == 5) derived_9 = 2; |
| else if (lanes2 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_rshift_or_v2i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_shaddxl_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; |
| else if (lane1_sz == 32) lane1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 4); |
| |
| unsigned shift = 0; |
| return 0x70e800 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned result_type = 2; |
| |
| if ((neg0 == 0) && (neg1 == 1)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } |
| if (cmpf == 1) cmpf = 4; |
| else if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| else if (cmpf == 2) cmpf = 5; |
| } |
| |
| unsigned derived_13 = 0; |
| if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; |
| else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; |
| else unreachable("No pattern match at pos 13"); |
| |
| return 0xb0000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (swz0 << 9) | (swz1 << 11) | (result_type << 14) | (derived_13 << 13); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_clz_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned mask = 0; |
| |
| return 0x701f90 | (src0 << 0) | (mask << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| if (log == 0) { |
| return 0x3dc20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3); |
| } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { |
| return 0x3de20 | (src0 << 0) | (widen0 << 3); |
| } else { |
| unreachable("No matching state found in add_frexpe_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| return 0x701970 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fmin_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned sem = 0; |
| |
| if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } |
| if (sem == 3) sem = 2; |
| else if (sem == 2) sem = 3; |
| } |
| |
| unsigned derived_6 = 0; |
| if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; |
| else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x90000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_var_tex_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| unsigned update = 0; |
| |
| unsigned skip = ins->skip; |
| assert(skip < 2); |
| |
| unsigned lod_mode = 1 - ins->texture.compute_lod; |
| assert(lod_mode < 2); |
| |
| unsigned sample = ins->load_vary.interp_mode; |
| assert(sample < 2); |
| |
| unsigned varying_index = bi_get_immediate(ins, 0); |
| unsigned texture_index = ins->texture.texture_index; |
| bi_write_staging_register(clause, ins); |
| unsigned derived_5 = 0; |
| if ((sample == 0) && (update == 0)) derived_5 = 0; |
| else if ((sample == 1) && (update == 1)) derived_5 = 1; |
| else unreachable("No pattern match at pos 5"); |
| |
| return 0xca000 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_lowbits_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| return 0x6fa38 | (src0 << 0) | (src1 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned bytes2 = 0; |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| unsigned result_word = 0; |
| |
| return 0x33c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_idp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| ASSERTED bool sign0_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; |
| bool sign0_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; |
| assert(sign0_small); |
| unsigned sign0 = sign0_signed ? 1 : 0; |
| assert(sign0 < 2); |
| |
| ASSERTED bool sign1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16; |
| bool sign1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int; |
| assert(sign1_small); |
| unsigned sign1 = sign1_signed ? 1 : 0; |
| assert(sign1 < 2); |
| |
| return 0x73e8c0 | (src0 << 0) | (src1 << 3) | (sign0 << 9) | (sign1 << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_6 = 0; |
| if (cmpf == 0) derived_6 = 0; |
| else if (cmpf == 1) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x7b080 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_acmpstore_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); |
| unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; |
| assert(seg < 2); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x64900 | (src0 << 0) | (src1 << 3) | (seg << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_hadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); |
| unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; |
| assert(round < 2); |
| |
| return 0xbc4c0 | (src0 << 0) | (src1 << 3) | (round << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_cubeface2(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0x3de58 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x700cc0 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| if (round != 4) { |
| unsigned derived_9 = 0; |
| if (round == 0) derived_9 = 0; |
| else if (round == 1) derived_9 = 1; |
| else if (round == 2) derived_9 = 2; |
| else if (round == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x70c000 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (derived_9 << 9); |
| } else if (round == 4) { |
| return 0x707600 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3); |
| } else { |
| unreachable("No matching state found in fma_fround_v2f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2u8_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 16); |
| |
| return 0x3c708 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fmax_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned sem = 0; |
| |
| if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } |
| if (sem == 3) sem = 2; |
| else if (sem == 2) sem = 3; |
| } |
| |
| unsigned derived_6 = 0; |
| if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; |
| else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x80000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x700d10 | (src0 << 0) | (lane0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned combine = 0; |
| |
| return 0x6f238 | (src0 << 0) | (src1 << 6) | (combine << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_dtsel_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned table = 63; |
| return 0x70f200 | (src0 << 0) | (table << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_iadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 8); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if ((lanes0 == 0) && (lanes1 == 0)) { |
| return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8); |
| } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else if (lanes1 == 3) derived_9 = 2; |
| else if (lanes1 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 5) derived_9 = 0; |
| else if (lanes1 == 6) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_iadd_v4s8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_texs_2d_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned skip = ins->skip; |
| assert(skip < 2); |
| |
| unsigned lod_mode = 1 - ins->texture.compute_lod; |
| assert(lod_mode < 2); |
| |
| unsigned texture_index = ins->texture.texture_index; |
| unsigned sampler_index = ins->texture.sampler_index; |
| bi_write_staging_register(clause, ins); |
| return 0x58000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_vn_asst2_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned scale = 0; |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| if (scale == 0) { |
| return 0x3df80 | (src0 << 0) | (neg0 << 3); |
| } else if (scale == 1) { |
| return 0x3de80 | (src0 << 0) | (neg0 << 4); |
| } else { |
| unreachable("No matching state found in add_vn_asst2_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_texc(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| unsigned src2 = bi_get_src(ins, regs, 3); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned skip = ins->skip; |
| assert(skip < 2); |
| |
| bi_read_staging_register(clause, ins); |
| assert(ins->src[0] == ins->dest); |
| return 0xd7000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_lea_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float16) register_format_temp = 0; |
| else if (ins->format == nir_type_float32) register_format_temp = 1; |
| else if (ins->format == nir_type_int32) register_format_temp = 2; |
| else if (ins->format == nir_type_uint32) register_format_temp = 3; |
| else if (ins->format == nir_type_int16) register_format_temp = 4; |
| else if (ins->format == nir_type_uint16) register_format_temp = 5; |
| else if (ins->format == nir_type_float64) register_format_temp = 6; |
| else if (ins->format == nir_type_int64) register_format_temp = 7; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 16); |
| |
| unsigned attribute_index = bi_get_immediate(ins, 0); |
| bi_write_staging_register(clause, ins); |
| if (register_format != 8) { |
| unsigned derived_11 = 0; |
| if (register_format == 0) derived_11 = 0; |
| else if (register_format == 1) derived_11 = 1; |
| else if (register_format == 2) derived_11 = 2; |
| else if (register_format == 3) derived_11 = 3; |
| else if (register_format == 4) derived_11 = 4; |
| else if (register_format == 5) derived_11 = 5; |
| else if (register_format == 6) derived_11 = 6; |
| else if (register_format == 7) derived_11 = 7; |
| else unreachable("No pattern match at pos 11"); |
| |
| return 0xc0000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6) | (derived_11 << 11); |
| } else if (register_format == 8) { |
| return 0xc8000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6); |
| } else { |
| unreachable("No matching state found in add_lea_attr_imm"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_f16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c500 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cc40 | (src0 << 0) | (lane0 << 5); |
| } else { |
| unreachable("No matching state found in add_f16_to_s32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_st_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| unsigned src2 = bi_get_src(ins, regs, 3); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| bi_read_staging_register(clause, ins); |
| return 0xc9800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_write_staging_register(clause, ins); |
| return 0x65000 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 4); |
| |
| return 0x700b40 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_cubeface1(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned neg2 = ins->src_neg[2]; |
| assert(neg2 < 2); |
| |
| unsigned derived_9 = 0; |
| if ((neg0 == 0) && (neg1 == 0) && (neg2 == 0)) derived_9 = 0; |
| else if ((neg0 == 1) && (neg1 == 1) && (neg2 == 1)) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x706800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if (src0 > src1) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| if (cmpf == 0) cmpf = 2; |
| else if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| else if (cmpf == 1) cmpf = 3; |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if ((src0 < src1) && (cmpf == 2)) derived_9 = 0; |
| else if (((src0 < src1) && (cmpf == 3)) || ((src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; |
| else if ((src0 < src1) && (cmpf == 1)) derived_9 = 2; |
| else if ((src0 < src1) && (cmpf == 0)) derived_9 = 3; |
| else if ((src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_iabs_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0x3dea0 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_iadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; |
| else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 2); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 0) derived_9 = 0; |
| else if (lanes1 == 1) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_10 = 0; |
| if (lanes0 == 0) derived_10 = 0; |
| else if (lanes0 == 1) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); |
| } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 2) derived_9 = 0; |
| else if (lanes1 == 3) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 4) derived_9 = 0; |
| else if (lanes1 == 5) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_iadd_v2u16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_6 = 0; |
| if (cmpf == 0) derived_6 = 0; |
| else if (cmpf == 1) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x7b200 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fsin_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned offset = 0; |
| |
| return 0x67a80 | (src0 << 0) | (offset << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_cube_ssel(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned derived_9 = 0; |
| if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; |
| else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x3e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fatan_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| return 0x67a40 | (src0 << 0) | (src1 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_isub_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; |
| else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 2); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 0) derived_9 = 0; |
| else if (lanes1 == 1) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_10 = 0; |
| if (lanes0 == 0) derived_10 = 0; |
| else if (lanes0 == 1) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); |
| } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 2) derived_9 = 0; |
| else if (lanes1 == 3) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 4) derived_9 = 0; |
| else if (lanes1 == 5) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_isub_v2s16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c1_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f1e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_isub_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 32) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (lanes1 == 0) { |
| return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8); |
| } else if ((lanes1 == 1) || (lanes1 == 2)) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 3) derived_9 = 0; |
| else if (lanes1 == 4) derived_9 = 1; |
| else if (lanes1 == 5) derived_9 = 2; |
| else if (lanes1 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_isub_s32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float16) register_format_temp = 0; |
| else if (ins->format == nir_type_float32) register_format_temp = 1; |
| else if (ins->format == nir_type_int32) register_format_temp = 2; |
| else if (ins->format == nir_type_uint32) register_format_temp = 3; |
| else if (ins->format == nir_type_int16) register_format_temp = 4; |
| else if (ins->format == nir_type_uint16) register_format_temp = 5; |
| else if (ins->format == nir_type_float64) register_format_temp = 6; |
| else if (ins->format == nir_type_int64) register_format_temp = 7; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 16); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| unsigned attribute_index = bi_get_immediate(ins, 0); |
| bi_write_staging_register(clause, ins); |
| if (register_format != 8) { |
| unsigned derived_13 = 0; |
| if (register_format == 0) derived_13 = 0; |
| else if (register_format == 1) derived_13 = 1; |
| else if (register_format == 2) derived_13 = 2; |
| else if (register_format == 3) derived_13 = 3; |
| else if (register_format == 4) derived_13 = 4; |
| else if (register_format == 5) derived_13 = 5; |
| else if (register_format == 6) derived_13 = 6; |
| else if (register_format == 7) derived_13 = 7; |
| else unreachable("No pattern match at pos 13"); |
| |
| return 0x40000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6) | (derived_13 << 13); |
| } else if (register_format == 8) { |
| return 0xc4000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6); |
| } else { |
| unreachable("No matching state found in add_ld_attr_imm"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if (lanes2 != 0) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 1) derived_9 = 0; |
| else if (lanes2 == 2) derived_9 = 1; |
| else if (lanes2 == 3) derived_9 = 2; |
| else if (lanes2 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x320000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); |
| } else if (lanes2 == 0) { |
| return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); |
| } else { |
| unreachable("No matching state found in fma_rshift_xor_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmpm_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| return 0x7ba00 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| return 0x7b300 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| if (cmpf == 0) cmpf = 2; |
| else if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| else if (cmpf == 1) cmpf = 3; |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; |
| else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 4; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 2))) derived_9 = 0; |
| else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; |
| else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) derived_9 = 2; |
| else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0))) derived_9 = 3; |
| else if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane_temp = 0; |
| if (lane_sz == 8 && ins->swizzle[0][0] == 0) lane_temp = 0; |
| else if (lane_sz == 8 && ins->swizzle[0][0] == 1) lane_temp = 1; |
| else if (lane_sz == 8 && ins->swizzle[0][0] == 2) lane_temp = 2; |
| else if (lane_sz == 8 && ins->swizzle[0][0] == 3) lane_temp = 3; |
| else if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 4; |
| else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 5; |
| else if (lane_sz == 32) lane_temp = 6; |
| else if (lane_sz == 64) lane_temp = 7; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane = lane_temp; |
| assert(lane < 8); |
| |
| ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; |
| bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; |
| unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0; |
| assert(extend < 4); |
| |
| bi_write_staging_register(clause, ins); |
| if ((extend == 0) && ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))) { |
| unsigned derived_9 = 0; |
| if (lane == 0) derived_9 = 0; |
| else if (lane == 1) derived_9 = 1; |
| else if (lane == 2) derived_9 = 2; |
| else if (lane == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x60000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); |
| } else if ((extend != 0) && ((lane == 4) || (lane == 5))) { |
| unsigned derived_9 = 0; |
| if (extend == 1) derived_9 = 0; |
| else if (extend == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_10 = 0; |
| if (lane == 4) derived_10 = 0; |
| else if (lane == 5) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0x63800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9) | (derived_10 << 10); |
| } else if ((extend != 0) && (lane == 6)) { |
| unsigned derived_9 = 0; |
| if (extend == 1) derived_9 = 0; |
| else if (extend == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x63400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); |
| } else if ((extend != 0) && (lane == 7)) { |
| unsigned derived_9 = 0; |
| if (extend == 1) derived_9 = 0; |
| else if (extend == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x61400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_load_i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| if ((cmpf == 4) || (cmpf == 5)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| } |
| |
| if (cmpf == 3) { |
| { unsigned temp = src2; src2 = src3; src3 = temp; } |
| if (cmpf == 3) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else if (cmpf == 2) derived_12 = 2; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frsq_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned divzero = 0; |
| |
| if (widen0 == 0) { |
| return 0x67100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); |
| } else if (widen0 != 0) { |
| unsigned derived_7 = 0; |
| if (widen0 == 1) derived_7 = 0; |
| else if (widen0 == 2) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0x67140 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); |
| } else { |
| unreachable("No matching state found in add_frsq_approx_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_iabs_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| return 0x3de88 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float16) register_format_temp = 0; |
| else if (ins->format == nir_type_float32) register_format_temp = 1; |
| else if (ins->format == nir_type_int32) register_format_temp = 2; |
| else if (ins->format == nir_type_uint32) register_format_temp = 3; |
| else if (ins->format == nir_type_int16) register_format_temp = 4; |
| else if (ins->format == nir_type_uint16) register_format_temp = 5; |
| else if (ins->format == nir_type_float64) register_format_temp = 6; |
| else if (ins->format == nir_type_int64) register_format_temp = 7; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 16); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| bi_write_staging_register(clause, ins); |
| if (register_format != 8) { |
| unsigned derived_13 = 0; |
| if (register_format == 0) derived_13 = 0; |
| else if (register_format == 1) derived_13 = 1; |
| else if (register_format == 2) derived_13 = 2; |
| else if (register_format == 3) derived_13 = 3; |
| else if (register_format == 4) derived_13 = 4; |
| else if (register_format == 5) derived_13 = 5; |
| else if (register_format == 6) derived_13 = 6; |
| else if (register_format == 7) derived_13 = 7; |
| else unreachable("No pattern match at pos 13"); |
| |
| return 0x40600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); |
| } else if (register_format == 8) { |
| return 0xc4600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); |
| } else { |
| unreachable("No matching state found in add_ld_attr_tex"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_imuld(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0x33); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0x33); |
| |
| unsigned threads = 0; |
| |
| return 0x70f100 | (src0 << 0) | (src1 << 3) | (threads << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_var_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| |
| unsigned varying_name = ins->constant.u64 & 0x3; |
| assert(varying_name < 32); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| unsigned update = (ins->constant.u64 >= 20) ? 3 : 0; |
| assert(update < 4); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float32) register_format_temp = 0; |
| else if (ins->format == nir_type_float16) register_format_temp = 1; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 4); |
| |
| unsigned sample = ins->load_vary.interp_mode; |
| assert(sample < 8); |
| |
| bi_write_staging_register(clause, ins); |
| if (register_format != 2) { |
| unsigned derived_3 = 0; |
| if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; |
| else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; |
| else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; |
| else unreachable("No pattern match at pos 3"); |
| |
| unsigned derived_19 = 0; |
| if (register_format == 0) derived_19 = 0; |
| else if (register_format == 1) derived_19 = 1; |
| else unreachable("No pattern match at pos 19"); |
| |
| unsigned derived_10 = 0; |
| if ((sample == 0) && (update == 0)) derived_10 = 0; |
| else if ((sample == 1) && (update == 0)) derived_10 = 1; |
| else if ((sample == 2) && (update == 0)) derived_10 = 2; |
| else if ((sample == 3) && (update == 0)) derived_10 = 3; |
| else if ((sample == 4) && (update == 1)) derived_10 = 4; |
| else if ((sample == 0) && (update == 2)) derived_10 = 8; |
| else if ((sample == 1) && (update == 2)) derived_10 = 9; |
| else if ((sample == 0) && (update == 3)) derived_10 = 10; |
| else if ((sample == 1) && (update == 3)) derived_10 = 11; |
| else if ((sample == 2) && (update == 3)) derived_10 = 12; |
| else if ((sample == 3) && (update == 3)) derived_10 = 13; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0x500a0 | (src0 << 0) | (derived_3 << 3) | (derived_19 << 19) | (derived_10 << 10); |
| } else if (register_format == 2) { |
| unsigned derived_3 = 0; |
| if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; |
| else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; |
| else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; |
| else unreachable("No pattern match at pos 3"); |
| |
| unsigned derived_10 = 0; |
| if ((sample == 0) && (update == 0)) derived_10 = 0; |
| else if ((sample == 1) && (update == 0)) derived_10 = 1; |
| else if ((sample == 2) && (update == 0)) derived_10 = 2; |
| else if ((sample == 3) && (update == 0)) derived_10 = 3; |
| else if ((sample == 4) && (update == 1)) derived_10 = 4; |
| else if ((sample == 0) && (update == 2)) derived_10 = 8; |
| else if ((sample == 1) && (update == 2)) derived_10 = 9; |
| else if ((sample == 0) && (update == 3)) derived_10 = 10; |
| else if ((sample == 1) && (update == 3)) derived_10 = 11; |
| else if ((sample == 2) && (update == 3)) derived_10 = 12; |
| else if ((sample == 3) && (update == 3)) derived_10 = 13; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0xcc0a0 | (src0 << 0) | (derived_3 << 3) | (derived_10 << 10); |
| } else { |
| unreachable("No matching state found in add_ld_var_special"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_fcos_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned offset = 0; |
| |
| return 0x67a88 | (src0 << 0) | (offset << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| bi_write_staging_register(clause, ins); |
| return 0xc9000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_arshift_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0x8); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 0) derived_9 = 0; |
| else if (lanes2 == 1) derived_9 = 1; |
| else if (lanes2 == 2) derived_9 = 2; |
| else if (lanes2 == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x334818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); |
| } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 4) derived_9 = 1; |
| else if (lanes2 == 5) derived_9 = 2; |
| else if (lanes2 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_arshift_v2i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| if (cmpf == 1) { |
| { unsigned temp = src2; src2 = src3; src3 = temp; } |
| if (cmpf == 1) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 3; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| bi_write_staging_register(clause, ins); |
| return 0xcb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x78000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_write_staging_register(clause, ins); |
| return 0x61000 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ilogb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| return 0x3d9e0 | (src0 << 0) | (widen0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frcp_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned divzero = 0; |
| |
| if (widen0 == 0) { |
| return 0x67000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); |
| } else if (widen0 != 0) { |
| unsigned derived_7 = 0; |
| if (widen0 == 1) derived_7 = 0; |
| else if (widen0 == 2) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0x67040 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); |
| } else { |
| unreachable("No matching state found in add_frcp_approx_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_frcp_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned divzero = 0; |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x67080 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); |
| } |
| |
| static inline unsigned |
| pan_pack_add_discard_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| if ((cmpf == 1) || (cmpf == 2)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| if (cmpf == 1) cmpf = 4; |
| else if (cmpf == 2) cmpf = 5; |
| } |
| |
| unsigned derived_6 = 0; |
| if (cmpf == 0) derived_6 = 0; |
| else if (cmpf == 3) derived_6 = 1; |
| else if (cmpf == 4) derived_6 = 2; |
| else if (cmpf == 5) derived_6 = 3; |
| else unreachable("No pattern match at pos 6"); |
| |
| unsigned derived_8 = 0; |
| if ((widen0 == 1) && (widen1 == 1)) derived_8 = 0; |
| else if ((widen0 == 2) && (widen1 == 1)) derived_8 = 1; |
| else if ((widen0 == 1) && (widen1 == 2)) derived_8 = 2; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_8 = 3; |
| else if ((widen0 == 0) && (widen1 == 0)) derived_8 = 4; |
| else unreachable("No pattern match at pos 8"); |
| |
| return 0xc8800 | (src0 << 0) | (src1 << 3) | (derived_6 << 6) | (derived_8 << 8); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_iaddc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| return 0x27fc00 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x3cd10 | (src0 << 0) | (lane0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fexp_table_u4(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned adj = 0; |
| |
| return 0x67ac0 | (src0 << 0) | (adj << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_no_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| return 0x6fa34 | (src0 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_acmpxchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); |
| unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; |
| assert(seg < 2); |
| |
| bi_read_staging_register(clause, ins); |
| assert(ins->src[0] == ins->dest); |
| return 0x644c0 | (src0 << 0) | (src1 << 3) | (seg << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| return 0x7a000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (cmpf << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 4); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| if ((widen0 == 2) && (widen1 == 1)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| } |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; |
| else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; |
| else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; |
| else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; |
| else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; |
| else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; |
| else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x2c0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (round << 13) | (clamp << 15) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x78800 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x6e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| if (((widen0 != 0) && (widen1 == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| if (cmpf == 1) cmpf = 4; |
| else if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| else if (cmpf == 2) cmpf = 5; |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; |
| else if ((widen0 == 0) && (widen1 == 1)) derived_12 = 5; |
| else if ((widen0 == 0) && (widen1 == 2)) derived_12 = 6; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == 0) && (widen1 != 0) && (cmpf == 3)) derived_9 = 1; |
| else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 2)) derived_9 = 2; |
| else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 5)) derived_9 = 3; |
| else if ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; |
| else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; |
| else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; |
| else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_6 = 0; |
| if (cmpf == 0) derived_6 = 0; |
| else if (cmpf == 1) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x7b280 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_texs_cube_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned skip = ins->skip; |
| assert(skip < 2); |
| |
| unsigned sampler_index = ins->texture.sampler_index; |
| unsigned texture_index = ins->texture.texture_index; |
| bi_write_staging_register(clause, ins); |
| return 0xdc000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 4); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| return 0x303000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_var(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| unsigned update = (ins->constant.u64 >= 20) ? 3 : 0; |
| assert(update < 4); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float32) register_format_temp = 0; |
| else if (ins->format == nir_type_float16) register_format_temp = 1; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 4); |
| |
| unsigned sample = ins->load_vary.interp_mode; |
| assert(sample < 8); |
| |
| bi_write_staging_register(clause, ins); |
| if (register_format != 2) { |
| unsigned derived_19 = 0; |
| if (register_format == 0) derived_19 = 0; |
| else if (register_format == 1) derived_19 = 1; |
| else unreachable("No pattern match at pos 19"); |
| |
| unsigned derived_10 = 0; |
| if ((sample == 0) && (update == 0)) derived_10 = 0; |
| else if ((sample == 1) && (update == 0)) derived_10 = 1; |
| else if ((sample == 2) && (update == 0)) derived_10 = 2; |
| else if ((sample == 3) && (update == 0)) derived_10 = 3; |
| else if ((sample == 4) && (update == 1)) derived_10 = 4; |
| else if ((sample == 0) && (update == 2)) derived_10 = 8; |
| else if ((sample == 1) && (update == 2)) derived_10 = 9; |
| else if ((sample == 0) && (update == 3)) derived_10 = 10; |
| else if ((sample == 1) && (update == 3)) derived_10 = 11; |
| else if ((sample == 2) && (update == 3)) derived_10 = 12; |
| else if ((sample == 3) && (update == 3)) derived_10 = 13; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0x500c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_19 << 19) | (derived_10 << 10); |
| } else if (register_format == 2) { |
| unsigned derived_10 = 0; |
| if ((sample == 0) && (update == 0)) derived_10 = 0; |
| else if ((sample == 1) && (update == 0)) derived_10 = 1; |
| else if ((sample == 2) && (update == 0)) derived_10 = 2; |
| else if ((sample == 3) && (update == 0)) derived_10 = 3; |
| else if ((sample == 4) && (update == 1)) derived_10 = 4; |
| else if ((sample == 0) && (update == 2)) derived_10 = 8; |
| else if ((sample == 1) && (update == 2)) derived_10 = 9; |
| else if ((sample == 0) && (update == 3)) derived_10 = 10; |
| else if ((sample == 1) && (update == 3)) derived_10 = 11; |
| else if ((sample == 2) && (update == 3)) derived_10 = 12; |
| else if ((sample == 3) && (update == 3)) derived_10 = 13; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0xcc0c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_10 << 10); |
| } else { |
| unreachable("No matching state found in add_ld_var"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_hadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); |
| unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; |
| assert(round < 2); |
| |
| unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swap1_temp = 0; |
| if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0; |
| else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned swap1 = swap1_temp; |
| assert(swap1 < 2); |
| |
| unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swap0_temp = 0; |
| if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0; |
| else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned swap0 = swap0_temp; |
| assert(swap0 < 2); |
| |
| return 0xbc840 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_swz_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| return 0x3d948 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_atest(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 2; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| bi_write_staging_register(clause, ins); |
| return 0xc8f00 | (src0 << 0) | (src1 << 3) | (widen1 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ldexp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| return 0x74e00 | (src0 << 0) | (src1 << 3) | (round << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_bitrev_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| return 0x701fc0 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmpi_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| return 0x7b900 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0x3d968 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| if ((log == 0) && (neg0 == 0)) { |
| return 0x701b20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); |
| } else if ((log == 1) && (sqrt == 0)) { |
| return 0x701a20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); |
| } else { |
| unreachable("No matching state found in fma_frexpm_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_atom_cx(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| unsigned src2 = bi_get_src(ins, regs, 3); |
| |
| bi_read_staging_register(clause, ins); |
| assert(ins->src[0] == ins->dest); |
| return 0xd7400 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fadd_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->outmod == BIFROST_NONE || ins->outmod == BIFROST_SAT); |
| unsigned clamp = (ins->outmod == BIFROST_SAT) ? 1 : 0; |
| assert(clamp < 2); |
| |
| unsigned special = 0; |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned derived_9 = 0; |
| if ((clamp == 0) && (special == 0) && (round == 0)) derived_9 = 0; |
| else if ((clamp == 1) && (special == 0) && (round == 0)) derived_9 = 2; |
| else if ((clamp == 0) && (special == 1) && (round == 4)) derived_9 = 3; |
| else if ((clamp == 0) && (special == 1) && (round == 0)) derived_9 = 4; |
| else if ((clamp == 0) && (special == 1) && (round == 1)) derived_9 = 5; |
| else if ((clamp == 0) && (special == 1) && (round == 2)) derived_9 = 6; |
| else if ((clamp == 0) && (special == 1) && (round == 3)) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x88000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs1 << 12) | (neg0 << 13) | (neg1 << 14) | (abs0 << 16) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_post_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x6ee400 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_imul_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else if (widen1_sz == 8 && ins->swizzle[1][0] == 0) widen1_temp = 3; |
| else if (widen1_sz == 8 && ins->swizzle[1][0] == 1) widen1_temp = 4; |
| else if (widen1_sz == 8 && ins->swizzle[1][0] == 2) widen1_temp = 5; |
| else if (widen1_sz == 8 && ins->swizzle[1][0] == 3) widen1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 8); |
| |
| ASSERTED bool extend1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16; |
| bool extend1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int; |
| unsigned extend1 = extend1_small ? (extend1_signed ? 1 : 2) : 0; |
| assert(extend1 < 4); |
| |
| if ((extend1 == 0) && (widen1 == 0)) { |
| return 0x73c0c0 | (src0 << 0) | (src1 << 3); |
| } else if ((extend1 != 0) && ((widen1 == 1) || (widen1 == 2))) { |
| unsigned derived_9 = 0; |
| if (widen1 == 1) derived_9 = 0; |
| else if (widen1 == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_10 = 0; |
| if (extend1 == 2) derived_10 = 0; |
| else if (extend1 == 1) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0x73c8c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_10 << 10); |
| } else if ((extend1 != 0) && ((widen1 == 3) || (widen1 == 4) || (widen1 == 5) || (widen1 == 6))) { |
| unsigned derived_9 = 0; |
| if (widen1 == 3) derived_9 = 0; |
| else if (widen1 == 4) derived_9 = 1; |
| else if (widen1 == 5) derived_9 = 2; |
| else if (widen1 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_11 = 0; |
| if (extend1 == 2) derived_11 = 0; |
| else if (extend1 == 1) derived_11 = 1; |
| else unreachable("No pattern match at pos 11"); |
| |
| return 0x73b0c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_11 << 11); |
| } else { |
| unreachable("No matching state found in fma_imul_i32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_flogd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| return 0x66340 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| if ((log == 0) && (neg0 == 0)) { |
| return 0x701b00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); |
| } else if ((log == 1) && (sqrt == 0)) { |
| return 0x701a00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); |
| } else { |
| unreachable("No matching state found in fma_frexpm_v2f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_s8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 4); |
| |
| return 0x3cb80 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_zs_emit(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned stencil = (ins->src[1] != 0); |
| assert(stencil < 2); |
| |
| unsigned z = (ins->src[0] != 0); |
| assert(z < 2); |
| |
| bi_write_staging_register(clause, ins); |
| unsigned derived_9 = 0; |
| if ((stencil == 1) && (z == 0)) derived_9 = 1; |
| else if ((stencil == 0) && (z == 1)) derived_9 = 2; |
| else if ((stencil == 1) && (z == 1)) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xd7800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_write_staging_register(clause, ins); |
| return 0x60e00 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| unsigned derived_9 = 0; |
| if (cmpf == 2) derived_9 = 0; |
| else if (cmpf == 3) derived_9 = 1; |
| else if (cmpf == 1) derived_9 = 2; |
| else if (cmpf == 0) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned result_type = 2; |
| |
| if (((neg0 == 0) && (neg1 == 1)) || ((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| if (cmpf == 1) cmpf = 4; |
| else if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| else if (cmpf == 2) cmpf = 5; |
| } |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; |
| else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; |
| else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; |
| else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_13 = 0; |
| if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; |
| else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; |
| else unreachable("No pattern match at pos 13"); |
| |
| return 0x30000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (abs0 << 11) | (abs1 << 12) | (result_type << 14) | (derived_9 << 9) | (derived_13 << 13); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c1_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f3e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_hadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); |
| unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; |
| assert(round < 2); |
| |
| unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swap1_temp = 0; |
| if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0; |
| else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned swap1 = swap1_temp; |
| assert(swap1 < 2); |
| |
| unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swap0_temp = 0; |
| if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0; |
| else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned swap0 = swap0_temp; |
| assert(swap0 < 2); |
| |
| return 0xbc8c0 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_acmpstore_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); |
| unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; |
| assert(seg < 2); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x648c0 | (src0 << 0) | (src1 << 3) | (seg << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frcp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned derived_6 = 0; |
| if (widen0 == 0) derived_6 = 0; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x66000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 4); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } |
| } |
| |
| unsigned derived_6 = 0; |
| if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; |
| else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x6c0000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_var_tex_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| unsigned update = 0; |
| |
| unsigned skip = ins->skip; |
| assert(skip < 2); |
| |
| unsigned lod_mode = 1 - ins->texture.compute_lod; |
| assert(lod_mode < 2); |
| |
| unsigned sample = ins->load_vary.interp_mode; |
| assert(sample < 2); |
| |
| unsigned varying_index = bi_get_immediate(ins, 0); |
| unsigned texture_index = ins->texture.texture_index; |
| bi_write_staging_register(clause, ins); |
| unsigned derived_5 = 0; |
| if ((sample == 0) && (update == 0)) derived_5 = 0; |
| else if ((sample == 1) && (update == 1)) derived_5 = 1; |
| else unreachable("No pattern match at pos 5"); |
| |
| return 0xca100 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; |
| assert(not_result < 2); |
| |
| if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 0) derived_9 = 0; |
| else if (lanes2 == 1) derived_9 = 1; |
| else if (lanes2 == 2) derived_9 = 2; |
| else if (lanes2 == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x310800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 4) derived_9 = 1; |
| else if (lanes2 == 5) derived_9 = 2; |
| else if (lanes2 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_lshift_and_v2i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| return 0x3d900 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_iabs_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0x3deb0 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x3ccc8 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_csel_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_12 = 0; |
| if (cmpf == 0) derived_12 = 0; |
| else if (cmpf == 1) derived_12 = 1; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x2e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_shaddxl_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned shift = 0; |
| return 0x70e600 | (src0 << 0) | (src1 << 3) | (shift << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_s32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3cbc0 | (src0 << 0) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cd00 | (src0 << 0); |
| } else { |
| unreachable("No matching state found in add_s32_to_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_fmax_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned sem = 0; |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| return 0x0 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 4); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| return 0x325000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); |
| } |
| |
| static inline unsigned |
| pan_pack_add_shift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| return 0xefe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_jump(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| return 0x6fe34 | (src0 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| unsigned derived_9 = 0; |
| if (cmpf == 2) derived_9 = 0; |
| else if (cmpf == 3) derived_9 = 1; |
| else if (cmpf == 1) derived_9 = 2; |
| else if (cmpf == 0) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if (src0 < src1) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| if (cmpf == 0) cmpf = 2; |
| else if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| else if (cmpf == 1) cmpf = 3; |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if ((src0 >= src1) && (cmpf == 2)) derived_9 = 0; |
| else if ((src0 >= src1) && (cmpf == 3)) derived_9 = 1; |
| else if ((src0 >= src1) && (cmpf == 1)) derived_9 = 2; |
| else if ((src0 >= src1) && (cmpf == 0)) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_mux_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned mux = 1; |
| |
| return 0x74000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_texs_2d_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned skip = ins->skip; |
| assert(skip < 2); |
| |
| unsigned lod_mode = 1 - ins->texture.compute_lod; |
| assert(lod_mode < 2); |
| |
| unsigned texture_index = ins->texture.texture_index; |
| unsigned sampler_index = ins->texture.sampler_index; |
| bi_write_staging_register(clause, ins); |
| return 0xd8000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 4); |
| |
| return 0x3cb40 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 4); |
| |
| return 0x700b48 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_cube_tsel(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned derived_9 = 0; |
| if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; |
| else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x3e400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fpow_sc_det_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned func = 0; |
| |
| return 0x67640 | (src0 << 0) | (src1 << 3) | (func << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_mkvec_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 8 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 8 && ins->swizzle[1][0] == 2) lane1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| unsigned lane3_sz = nir_alu_type_get_type_size(ins->src_types[3]); |
| unsigned lane3_temp = 0; |
| if (lane3_sz == 8 && ins->swizzle[3][0] == 0) lane3_temp = 0; |
| else if (lane3_sz == 8 && ins->swizzle[3][0] == 2) lane3_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane3 = lane3_temp; |
| assert(lane3 < 2); |
| |
| return 0x710000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (lane0 << 12) | (lane1 << 13) | (lane2 << 14) | (lane3 << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fmin_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned sem = 0; |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| return 0x10000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned result_type = 2; |
| |
| if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } |
| if (cmpf == 1) cmpf = 4; |
| else if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| else if (cmpf == 2) cmpf = 5; |
| } |
| |
| unsigned derived_6 = 0; |
| if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; |
| else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| unsigned derived_13 = 0; |
| if (cmpf == 0) derived_13 = 0; |
| else if (cmpf == 1) derived_13 = 1; |
| else if (cmpf == 2) derived_13 = 2; |
| else if (cmpf == 3) derived_13 = 3; |
| else if (cmpf == 4) derived_13 = 4; |
| else if (cmpf == 5) derived_13 = 5; |
| else if (cmpf == 6) derived_13 = 6; |
| else if ((cmpf == 7) && (abs0 == 0) && (abs1 == 0)) derived_13 = 7; |
| else unreachable("No pattern match at pos 13"); |
| |
| return 0x640000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (result_type << 16) | (derived_6 << 6) | (derived_13 << 13); |
| } |
| |
| static inline unsigned |
| pan_pack_add_acmpxchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); |
| unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; |
| assert(seg < 2); |
| |
| bi_read_staging_register(clause, ins); |
| assert(ins->src[0] == ins->dest); |
| return 0x64500 | (src0 << 0) | (src1 << 3) | (seg << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; |
| assert(not_result < 2); |
| |
| if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 0) derived_9 = 0; |
| else if (lanes2 == 1) derived_9 = 1; |
| else if (lanes2 == 2) derived_9 = 2; |
| else if (lanes2 == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x300800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 4) derived_9 = 1; |
| else if (lanes2 == 5) derived_9 = 2; |
| else if (lanes2 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_rshift_and_v2i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_fpow_sc_det_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned func = 0; |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; |
| else if (lane1_sz == 32) lane1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 4); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| if ((func == 0) || (func == 1)) { |
| unsigned derived_6 = 0; |
| if ((lane1 == 2) || (lane1 == 0)) derived_6 = 0; |
| else if (lane1 == 1) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| unsigned derived_8 = 0; |
| if (func == 0) derived_8 = 0; |
| else if (func == 1) derived_8 = 1; |
| else unreachable("No pattern match at pos 8"); |
| |
| return 0x67400 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_6 << 6) | (derived_8 << 8); |
| } else if (((func == 2) || (func == 3)) && (lane1 == 2)) { |
| unsigned derived_8 = 0; |
| if (func == 2) derived_8 = 0; |
| else if (func == 3) derived_8 = 1; |
| else unreachable("No pattern match at pos 8"); |
| |
| return 0x67600 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_8 << 8); |
| } else { |
| unreachable("No matching state found in add_fpow_sc_det_f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_iadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; |
| else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 2); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 0) derived_9 = 0; |
| else if (lanes1 == 1) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_10 = 0; |
| if (lanes0 == 0) derived_10 = 0; |
| else if (lanes0 == 1) derived_10 = 1; |
| else unreachable("No pattern match at pos 10"); |
| |
| return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); |
| } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 2) derived_9 = 0; |
| else if (lanes1 == 3) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 4) derived_9 = 0; |
| else if (lanes1 == 5) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_iadd_v2s16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_arshift_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0x8); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 4); |
| |
| return 0x335018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x61200 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fpclass_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x67c40 | (src0 << 0) | (lane0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_u8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 4); |
| |
| return 0x3cb88 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| return 0x701963; |
| } |
| |
| static inline unsigned |
| pan_pack_add_lea_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float16) register_format_temp = 0; |
| else if (ins->format == nir_type_float32) register_format_temp = 1; |
| else if (ins->format == nir_type_int32) register_format_temp = 2; |
| else if (ins->format == nir_type_uint32) register_format_temp = 3; |
| else if (ins->format == nir_type_int16) register_format_temp = 4; |
| else if (ins->format == nir_type_uint16) register_format_temp = 5; |
| else if (ins->format == nir_type_float64) register_format_temp = 6; |
| else if (ins->format == nir_type_int64) register_format_temp = 7; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 16); |
| |
| bi_write_staging_register(clause, ins); |
| if (register_format != 8) { |
| unsigned derived_11 = 0; |
| if (register_format == 0) derived_11 = 0; |
| else if (register_format == 1) derived_11 = 1; |
| else if (register_format == 2) derived_11 = 2; |
| else if (register_format == 3) derived_11 = 3; |
| else if (register_format == 4) derived_11 = 4; |
| else if (register_format == 5) derived_11 = 5; |
| else if (register_format == 6) derived_11 = 6; |
| else if (register_format == 7) derived_11 = 7; |
| else unreachable("No pattern match at pos 11"); |
| |
| return 0xc0600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); |
| } else if (register_format == 8) { |
| return 0xc8600 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } else { |
| unreachable("No matching state found in add_lea_attr_tex"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 2); |
| |
| return 0x70f000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fadd_lscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| return 0x70f400 | (src0 << 0) | (src1 << 3) | (abs0 << 6) | (neg0 << 7) | (abs1 << 8) | (neg1 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2f16_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c208 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3ca88 | (src0 << 0) | (swz0 << 4); |
| } else { |
| unreachable("No matching state found in add_v2f16_to_v2u16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| unsigned result_type = 2; |
| |
| if ((widen0 == 2) && (widen1 == 1)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| if (cmpf == 1) cmpf = 4; |
| else if (cmpf == 5) cmpf = 2; |
| else if (cmpf == 4) cmpf = 1; |
| else if (cmpf == 2) cmpf = 5; |
| } |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; |
| else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; |
| else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; |
| else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; |
| else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; |
| else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; |
| else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x240000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (cmpf << 13) | (result_type << 16) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fpclass_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| return 0x67c50 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned register_format_temp = 0; |
| if (ins->format == nir_type_float16) register_format_temp = 0; |
| else if (ins->format == nir_type_float32) register_format_temp = 1; |
| else if (ins->format == nir_type_int32) register_format_temp = 2; |
| else if (ins->format == nir_type_uint32) register_format_temp = 3; |
| else if (ins->format == nir_type_int16) register_format_temp = 4; |
| else if (ins->format == nir_type_uint16) register_format_temp = 5; |
| else if (ins->format == nir_type_float64) register_format_temp = 6; |
| else if (ins->format == nir_type_int64) register_format_temp = 7; |
| else unreachable("Could not pattern match register format"); |
| unsigned register_format = register_format_temp; |
| assert(register_format < 16); |
| |
| unsigned vecsize = ins->vector_channels - 1; |
| assert(vecsize < 4); |
| |
| bi_write_staging_register(clause, ins); |
| if (register_format != 8) { |
| unsigned derived_13 = 0; |
| if (register_format == 0) derived_13 = 0; |
| else if (register_format == 1) derived_13 = 1; |
| else if (register_format == 2) derived_13 = 2; |
| else if (register_format == 3) derived_13 = 3; |
| else if (register_format == 4) derived_13 = 4; |
| else if (register_format == 5) derived_13 = 5; |
| else if (register_format == 6) derived_13 = 6; |
| else if (register_format == 7) derived_13 = 7; |
| else unreachable("No pattern match at pos 13"); |
| |
| return 0x40400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); |
| } else if (register_format == 8) { |
| return 0xc4400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); |
| } else { |
| unreachable("No matching state found in add_ld_attr"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned bytes2 = 0; |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| unsigned result_word = 0; |
| |
| return 0x33d000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| unsigned derived_4 = 0; |
| if (widen0 == 2) derived_4 = 1; |
| else if (widen0 == 1) derived_4 = 2; |
| else unreachable("No pattern match at pos 4"); |
| |
| unsigned derived_9 = 0; |
| if (cmpf == 2) derived_9 = 0; |
| else if (cmpf == 3) derived_9 = 1; |
| else if (cmpf == 1) derived_9 = 2; |
| else if (cmpf == 0) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c1_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f7e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x65a00 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; |
| assert(not_result < 2); |
| |
| if (lanes2 != 0) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 1) derived_9 = 0; |
| else if (lanes2 == 2) derived_9 = 1; |
| else if (lanes2 == 3) derived_9 = 2; |
| else if (lanes2 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x300000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if (lanes2 == 0) { |
| return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); |
| } else { |
| unreachable("No matching state found in fma_rshift_and_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_frsq_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned derived_6 = 0; |
| if (widen0 == 0) derived_6 = 0; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x66100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmpf_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| return 0x7be00 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_lea_tex_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned format = 1; |
| |
| unsigned texture_index = ins->texture.texture_index; |
| bi_write_staging_register(clause, ins); |
| return 0xd6000 | (src0 << 0) | (src1 << 3) | (format << 11) | (texture_index << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_f16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c508 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cc48 | (src0 << 0) | (lane0 << 5); |
| } else { |
| unreachable("No matching state found in add_f16_to_u32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_isub_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 32) lanes1_temp = 0; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if (lanes1 == 0) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); |
| } else if ((lanes1 == 1) || (lanes1 == 2)) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { |
| unsigned derived_7 = 0; |
| if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; |
| else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 3) derived_9 = 0; |
| else if (lanes1 == 4) derived_9 = 1; |
| else if (lanes1 == 5) derived_9 = 2; |
| else if (lanes1 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_isub_u32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned derived_6 = 0; |
| if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; |
| else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| unsigned derived_7 = 0; |
| if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; |
| else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0x6e8000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_add_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; |
| else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 4); |
| |
| return 0x3cb48 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_kaboom(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0xd7858 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| return 0x701968 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| return 0x3d964; |
| } |
| |
| static inline unsigned |
| pan_pack_fma_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| if (log == 0) { |
| return 0x701c00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3); |
| } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { |
| return 0x701e00 | (src0 << 0) | (swz0 << 3); |
| } else { |
| unreachable("No matching state found in fma_frexpe_v2f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x62e00 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned sqrt = 0; |
| |
| unsigned log = 1; |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| if ((log == 0) && (neg0 == 0)) { |
| return 0x3db00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); |
| } else if ((log == 1) && (sqrt == 0)) { |
| return 0x3da00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); |
| } else { |
| unreachable("No matching state found in add_frexpm_v2f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| unsigned derived_3 = 0; |
| if (cmpf == 1) derived_3 = 0; |
| else if (cmpf == 0) derived_3 = 1; |
| else unreachable("No pattern match at pos 3"); |
| |
| return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_3 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_swz_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 0 && ins->swizzle[0][3] == 0) swz0_temp = 0; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 1; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 2) swz0_temp = 2; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 3; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 4; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 5; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 2) swz0_temp = 6; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 0) swz0_temp = 7; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 8); |
| |
| return 0x3df40 | (src0 << 0) | (swz0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| unsigned derived_4 = 0; |
| if (widen0 == 2) derived_4 = 1; |
| else if (widen0 == 1) derived_4 = 2; |
| else unreachable("No pattern match at pos 4"); |
| |
| unsigned derived_3 = 0; |
| if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; |
| else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; |
| else unreachable("No pattern match at pos 3"); |
| |
| unsigned derived_9 = 0; |
| if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; |
| else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; |
| else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_u16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x3cce8 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if ((cmpf == 2) || (cmpf == 3)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| } |
| |
| unsigned derived_6 = 0; |
| if (cmpf == 0) derived_6 = 0; |
| else if (cmpf == 1) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| return 0x7b000 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_frshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned bytes2 = 0; |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| return 0x33f000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frcbrt_approx_c_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| return 0x67ab8 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_hadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); |
| unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; |
| assert(round < 2); |
| |
| return 0xbc440 | (src0 << 0) | (src1 << 3) | (round << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_s16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x3cce0 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2u8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 16); |
| |
| return 0x3c808 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| unsigned derived_4 = 0; |
| if (widen0 == 2) derived_4 = 1; |
| else if (widen0 == 1) derived_4 = 2; |
| else unreachable("No pattern match at pos 4"); |
| |
| unsigned derived_9 = 0; |
| if (cmpf == 2) derived_9 = 0; |
| else if (cmpf == 3) derived_9 = 1; |
| else if (cmpf == 1) derived_9 = 2; |
| else if (cmpf == 0) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_imul_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned replicate0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned replicate0_temp = 0; |
| if (replicate0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) replicate0_temp = 0; |
| else unreachable("Could not pattern match widen"); |
| unsigned replicate0 = replicate0_temp; |
| assert(replicate0 < 8); |
| |
| unsigned replicate1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned replicate1_temp = 0; |
| if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) replicate1_temp = 0; |
| else if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) replicate1_temp = 1; |
| else if (replicate1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) replicate1_temp = 2; |
| else if (replicate1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) replicate1_temp = 3; |
| else if (replicate1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) replicate1_temp = 4; |
| else unreachable("Could not pattern match widen"); |
| unsigned replicate1 = replicate1_temp; |
| assert(replicate1 < 8); |
| |
| if ((replicate0 == 0) && (replicate1 == 0)) { |
| return 0x73e0c0 | (src0 << 0) | (src1 << 3); |
| } else if ((replicate0 == 0) && (replicate1 != 0)) { |
| unsigned derived_9 = 0; |
| if (replicate1 == 1) derived_9 = 0; |
| else if (replicate1 == 2) derived_9 = 1; |
| else if (replicate1 == 3) derived_9 = 2; |
| else if (replicate1 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x7380c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_imul_v4i8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x3ccc0 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_f32_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c980 | (src0 << 0) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cca0 | (src0 << 0); |
| } else { |
| unreachable("No matching state found in add_f32_to_s32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_rshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 4); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| return 0x321000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fatan_assist_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| return 0x67a00 | (src0 << 0) | (src1 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_mux_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned mux = 1; |
| |
| return 0x74800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 0) derived_9 = 0; |
| else if (lanes2 == 1) derived_9 = 1; |
| else if (lanes2 == 2) derived_9 = 2; |
| else if (lanes2 == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x324800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); |
| } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 4) derived_9 = 1; |
| else if (lanes2 == 5) derived_9 = 2; |
| else if (lanes2 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_lshift_xor_v2i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_write_staging_register(clause, ins); |
| return 0x65400 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 4); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| return 0x313000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lanes2_temp = 0; |
| if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; |
| else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes2 = lanes2_temp; |
| assert(lanes2 < 8); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; |
| assert(not_result < 2); |
| |
| if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 0) derived_9 = 0; |
| else if (lanes2 == 1) derived_9 = 1; |
| else if (lanes2 == 2) derived_9 = 2; |
| else if (lanes2 == 3) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x312800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { |
| unsigned derived_9 = 0; |
| if (lanes2 == 4) derived_9 = 1; |
| else if (lanes2 == 5) derived_9 = 2; |
| else if (lanes2 == 6) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in fma_lshift_or_v2i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_ld_gclk_u64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| |
| unsigned source = 7; |
| |
| bi_write_staging_register(clause, ins); |
| return 0xd7800 | (source << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| unsigned preserve_null = 0; |
| |
| return 0x3d500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_add_axchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); |
| unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; |
| assert(seg < 2); |
| |
| bi_read_staging_register(clause, ins); |
| assert(ins->src[0] == ins->dest); |
| return 0x64100 | (src0 << 0) | (src1 << 3) | (seg << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_isub_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 8); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if ((lanes0 == 0) && (lanes1 == 0)) { |
| return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8); |
| } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else if (lanes1 == 3) derived_9 = 2; |
| else if (lanes1 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { |
| unsigned derived_9 = 0; |
| if (lanes1 == 5) derived_9 = 0; |
| else if (lanes1 == 6) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_isub_v4s8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fma_rscale_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| unsigned src3 = bi_get_src(ins, regs, 3); |
| |
| assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ); |
| unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0; |
| assert(round < 2); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg2 = ins->src_neg[2]; |
| assert(neg2 < 2); |
| |
| unsigned special = 0; |
| |
| unsigned derived_16 = 0; |
| if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; |
| else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; |
| else unreachable("No pattern match at pos 16"); |
| |
| unsigned derived_12 = 0; |
| if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; |
| else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; |
| else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; |
| else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; |
| else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; |
| else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; |
| else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; |
| else unreachable("No pattern match at pos 12"); |
| |
| return 0x680000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fpow_sc_apply(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| return 0x75080 | (src0 << 0) | (src1 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2f16_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c200 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3ca80 | (src0 << 0) | (swz0 << 4); |
| } else { |
| unreachable("No matching state found in add_v2f16_to_v2s16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_icmp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned result_type = 1; |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| return 0x7b100 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_eureka(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| return 0xd7850 | (src0 << 0); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, 2, 3, 1, 0, ~0, ~0 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 4); |
| |
| if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 < src1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| if (cmpf == 0) cmpf = 2; |
| else if (cmpf == 3) cmpf = 1; |
| else if (cmpf == 2) cmpf = 0; |
| else if (cmpf == 1) cmpf = 3; |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; |
| else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 3; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 0; |
| else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 1; |
| else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 2; |
| else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 0))) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned derived_6 = 0; |
| if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; |
| else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; |
| else unreachable("No pattern match at pos 6"); |
| |
| unsigned derived_7 = 0; |
| if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; |
| else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0x76000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_add_frcbrt_approx_a_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned divzero = 0; |
| |
| if (widen0 == 0) { |
| return 0x67200 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); |
| } else if (widen0 != 0) { |
| unsigned derived_7 = 0; |
| if (widen0 == 1) derived_7 = 0; |
| else if (widen0 == 2) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0x67240 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); |
| } else { |
| unreachable("No matching state found in add_frcbrt_approx_a_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_fma_atom_c_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf3); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf3); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned atom_opc = 2; |
| |
| return 0x2f4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| unsigned preserve_null = 0; |
| |
| return 0x701500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x62c00 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fatan_assist_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lane1_temp = 0; |
| if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; |
| else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane1 = lane1_temp; |
| assert(lane1 < 2); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x67800 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2u16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c608 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cb08 | (src0 << 0) | (swz0 << 4); |
| } else { |
| unreachable("No matching state found in add_v2u16_to_v2f16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_iadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned saturate = 0; |
| |
| unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lanes0_temp = 0; |
| if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes0 = lanes0_temp; |
| assert(lanes0 < 8); |
| |
| unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned lanes1_temp = 0; |
| if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; |
| else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; |
| else unreachable("Could not pattern match widen"); |
| unsigned lanes1 = lanes1_temp; |
| assert(lanes1 < 8); |
| |
| if ((lanes0 == 0) && (lanes1 == 0)) { |
| unsigned derived_7 = 0; |
| if (saturate == 0) derived_7 = 0; |
| else if (saturate == 1) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); |
| } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { |
| unsigned derived_7 = 0; |
| if (saturate == 0) derived_7 = 0; |
| else if (saturate == 1) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 1) derived_9 = 0; |
| else if (lanes1 == 2) derived_9 = 1; |
| else if (lanes1 == 3) derived_9 = 2; |
| else if (lanes1 == 4) derived_9 = 3; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { |
| unsigned derived_7 = 0; |
| if (saturate == 0) derived_7 = 0; |
| else if (saturate == 1) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| unsigned derived_9 = 0; |
| if (lanes1 == 5) derived_9 = 0; |
| else if (lanes1 == 6) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_iadd_v4u8"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_store_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| bi_read_staging_register(clause, ins); |
| return 0x65c00 | (src0 << 0) | (src1 << 3) | (seg << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 4); |
| |
| unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; |
| assert(not1 < 2); |
| |
| unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; |
| assert(not_result < 2); |
| |
| return 0x311000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| return 0x700cc8 | (src0 << 0) | (lane0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_wmask(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned subgroup = 1; |
| |
| unsigned fill = 0; |
| return 0x3d700 | (src0 << 0) | (subgroup << 4) | (fill << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swz1_temp = 0; |
| if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; |
| else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz1 = swz1_temp; |
| assert(swz1 < 4); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 4); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| return 0xa0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (abs0 << 15); |
| } |
| |
| static inline unsigned |
| pan_pack_add_flog_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned mode = 0; |
| |
| unsigned precision = 0; |
| |
| unsigned neg = ins->src_neg[0]; |
| assert(neg < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned divzero = 0; |
| |
| if ((mode == 0) && (widen0 == 0) && (precision == 0)) { |
| return 0x67300 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); |
| } else if ((mode == 0) && (widen0 != 0) && (precision == 0)) { |
| unsigned derived_7 = 0; |
| if (widen0 == 1) derived_7 = 0; |
| else if (widen0 == 2) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0x67340 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); |
| } else if ((mode != 0) && (widen0 == 0) && (precision == 0) && (divzero == 0)) { |
| unsigned derived_5 = 0; |
| if (mode == 1) derived_5 = 0; |
| else if (mode == 2) derived_5 = 1; |
| else unreachable("No pattern match at pos 5"); |
| |
| return 0x67b00 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5); |
| } else if ((mode != 0) && (widen0 != 0) && (precision == 0) && (divzero == 0)) { |
| unsigned derived_5 = 0; |
| if (mode == 1) derived_5 = 0; |
| else if (mode == 2) derived_5 = 1; |
| else unreachable("No pattern match at pos 5"); |
| |
| unsigned derived_7 = 0; |
| if (widen0 == 1) derived_7 = 0; |
| else if (widen0 == 2) derived_7 = 1; |
| else unreachable("No pattern match at pos 7"); |
| |
| return 0x67b40 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5) | (derived_7 << 7); |
| } else if ((mode != 0) && (widen0 == 0) && (precision != 0) && (divzero == 0) && (abs0 == 0) && (neg == 0)) { |
| unsigned derived_3 = 0; |
| if (mode == 2) derived_3 = 0; |
| else if (mode == 1) derived_3 = 1; |
| else unreachable("No pattern match at pos 3"); |
| |
| unsigned derived_4 = 0; |
| if (precision == 1) derived_4 = 0; |
| else if (precision == 2) derived_4 = 1; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x67ae0 | (src0 << 0) | (derived_3 << 3) | (derived_4 << 4); |
| } else { |
| unreachable("No matching state found in add_flog_table_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| unsigned derived_4 = 0; |
| if (widen0 == 2) derived_4 = 1; |
| else if (widen0 == 1) derived_4 = 2; |
| else unreachable("No pattern match at pos 4"); |
| |
| unsigned derived_3 = 0; |
| if (cmpf == 1) derived_3 = 0; |
| else if (cmpf == 0) derived_3 = 1; |
| else unreachable("No pattern match at pos 3"); |
| |
| return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_ilogb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; |
| else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 4); |
| |
| return 0x3d9c0 | (src0 << 0) | (swz0 << 3); |
| } |
| |
| static inline unsigned |
| pan_pack_add_v2s8_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swz0_temp = 0; |
| if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; |
| else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; |
| else unreachable("Could not pattern match widen"); |
| unsigned swz0 = swz0_temp; |
| assert(swz0 < 16); |
| |
| return 0x3c700 | (src0 << 0) | (swz0 << 4); |
| } |
| |
| static inline unsigned |
| pan_pack_add_u32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3cbc8 | (src0 << 0) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cd08 | (src0 << 0); |
| } else { |
| unreachable("No matching state found in add_u32_to_f32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_blend(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 1); |
| unsigned src1 = bi_get_src(ins, regs, 2); |
| assert((1 << src1) & 0xf7); |
| unsigned src2 = bi_get_src(ins, regs, 3); |
| assert((1 << src2) & 0xf7); |
| |
| bi_read_staging_register(clause, ins); |
| return 0xca800 | (src0 << 0) | (src1 << 3) | (src2 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fma_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| unsigned neg1 = ins->src_neg[1]; |
| assert(neg1 < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 4); |
| |
| unsigned clamp = ins->outmod; |
| assert(clamp < 4); |
| |
| unsigned abs1 = ins->src_abs[1]; |
| assert(abs1 < 2); |
| |
| unsigned neg2 = ins->src_neg[2]; |
| assert(neg2 < 2); |
| |
| unsigned abs2 = ins->src_abs[2]; |
| assert(abs2 < 2); |
| |
| if ((widen0 == 2) && (widen1 == 1)) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } |
| { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } |
| } |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; |
| else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; |
| else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; |
| else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; |
| else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; |
| else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; |
| else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| unsigned derived_17 = 0; |
| if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; |
| else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; |
| else unreachable("No pattern match at pos 17"); |
| |
| return 0x0 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs0 << 12) | (round << 13) | (clamp << 15) | (abs1 << 19) | (neg2 << 18) | (abs2 << 20) | (derived_9 << 9) | (derived_17 << 17); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branchz_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xf7); |
| |
| unsigned cmpf_table[] = { |
| ~0, 4, 5, 2, 1, 0, 3 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 8); |
| |
| unsigned derived_3 = 0; |
| if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; |
| else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; |
| else unreachable("No pattern match at pos 3"); |
| |
| unsigned derived_9 = 0; |
| if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; |
| else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; |
| else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_3 << 3) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_lea_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned format = 1; |
| |
| bi_write_staging_register(clause, ins); |
| return 0xd6600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (format << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xf7); |
| |
| return 0x6f83c | (src0 << 6); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_lrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned bytes2 = 0; |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| unsigned result_word = 0; |
| |
| return 0x33b000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_flshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned bytes2 = 0; |
| |
| unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned lane2_temp = 0; |
| if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; |
| else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane2 = lane2_temp; |
| assert(lane2 < 2); |
| |
| return 0x33f800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); |
| } |
| |
| static inline unsigned |
| pan_pack_fma_fmul_cslice(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| assert((1 << src0) & 0xfb); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| assert((1 << src1) & 0xfb); |
| |
| unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane0_temp = 0; |
| if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; |
| else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane0 = lane0_temp; |
| assert(lane0 < 2); |
| |
| unsigned abs0 = ins->src_abs[0]; |
| assert(abs0 < 2); |
| |
| unsigned neg0 = ins->src_neg[0]; |
| assert(neg0 < 2); |
| |
| return 0x70d000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (abs0 << 7) | (neg0 << 8); |
| } |
| |
| static inline unsigned |
| pan_pack_add_branch_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| assert((1 << src2) & 0xf7); |
| |
| unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned widen0_temp = 0; |
| if (widen0_sz == 32) widen0_temp = 0; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; |
| else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen0 = widen0_temp; |
| assert(widen0 < 4); |
| |
| unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned widen1_temp = 0; |
| if (widen1_sz == 32) widen1_temp = 0; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; |
| else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; |
| else unreachable("Could not pattern match widen"); |
| unsigned widen1 = widen1_temp; |
| assert(widen1 < 4); |
| |
| unsigned cmpf_table[] = { |
| ~0, ~0, ~0, ~0, ~0, 0, 1 |
| }; |
| unsigned cmpf = cmpf_table[ins->cond]; |
| assert(cmpf < 2); |
| |
| if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) { |
| { unsigned temp = src0; src0 = src1; src1 = temp; } |
| { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } |
| } |
| |
| unsigned derived_12 = 0; |
| if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; |
| else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; |
| else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) derived_12 = 3; |
| else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) derived_12 = 4; |
| else unreachable("No pattern match at pos 12"); |
| |
| unsigned derived_9 = 0; |
| if ((widen0 == widen1) && (src0 == src1) && (cmpf == 0)) derived_9 = 1; |
| else if (((widen0 == 2) && (widen1 == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 4; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); |
| } |
| |
| static inline unsigned |
| pan_pack_add_f32_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| |
| unsigned round = ins->roundmode; |
| assert(round < 8); |
| |
| if (round != 4) { |
| unsigned derived_4 = 0; |
| if (round == 0) derived_4 = 0; |
| else if (round == 1) derived_4 = 1; |
| else if (round == 2) derived_4 = 2; |
| else if (round == 3) derived_4 = 3; |
| else unreachable("No pattern match at pos 4"); |
| |
| return 0x3c988 | (src0 << 0) | (derived_4 << 4); |
| } else if (round == 4) { |
| return 0x3cca8 | (src0 << 0); |
| } else { |
| unreachable("No matching state found in add_f32_to_u32"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_load_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| |
| assert(ins->segment); |
| unsigned seg = ins->segment; |
| assert(seg < 8); |
| |
| unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned lane_temp = 0; |
| if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 0; |
| else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 1; |
| else if (lane_sz == 32) lane_temp = 2; |
| else if (lane_sz == 64) lane_temp = 3; |
| else unreachable("Could not pattern match widen"); |
| unsigned lane = lane_temp; |
| assert(lane < 4); |
| |
| ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; |
| bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; |
| unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0; |
| assert(extend < 4); |
| |
| bi_write_staging_register(clause, ins); |
| if ((extend == 0) && ((lane == 0) || (lane == 1))) { |
| unsigned derived_9 = 0; |
| if (lane == 0) derived_9 = 0; |
| else if (lane == 1) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x60800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); |
| } else if ((extend != 0) && (lane == 2)) { |
| unsigned derived_9 = 0; |
| if (extend == 1) derived_9 = 0; |
| else if (extend == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x63000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); |
| } else if ((extend != 0) && (lane == 3)) { |
| unsigned derived_9 = 0; |
| if (extend == 1) derived_9 = 0; |
| else if (extend == 2) derived_9 = 1; |
| else unreachable("No pattern match at pos 9"); |
| |
| return 0x61800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); |
| } else { |
| unreachable("No matching state found in add_load_i16"); |
| } |
| } |
| |
| static inline unsigned |
| pan_pack_add_mux_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) |
| { |
| unsigned src0 = bi_get_src(ins, regs, 0); |
| unsigned src1 = bi_get_src(ins, regs, 1); |
| unsigned src2 = bi_get_src(ins, regs, 2); |
| |
| unsigned mux = 1; |
| |
| unsigned swap2_sz = nir_alu_type_get_type_size(ins->src_types[2]); |
| unsigned swap2_temp = 0; |
| if (swap2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swap2_temp = 0; |
| else if (swap2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swap2_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned swap2 = swap2_temp; |
| assert(swap2 < 2); |
| |
| unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]); |
| unsigned swap1_temp = 0; |
| if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0; |
| else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned swap1 = swap1_temp; |
| assert(swap1 < 2); |
| |
| unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]); |
| unsigned swap0_temp = 0; |
| if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0; |
| else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1; |
| else unreachable("Could not pattern match widen"); |
| unsigned swap0 = swap0_temp; |
| assert(swap0 < 2); |
| |
| return 0x70000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9) | (swap2 << 11) | (swap1 << 12) | (swap0 << 13); |
| } |
| |
| #endif |