| /* |
| * Copyright © 2015 Intel Corporation |
| * Copyright © 2019 Valve Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * Authors: |
| * Jason Ekstrand (jason@jlekstrand.net) |
| * Samuel Pitoiset (samuel.pitoiset@gmail.com> |
| */ |
| |
| #include "nir.h" |
| #include "nir_builder.h" |
| |
| static nir_ssa_def * |
| lower_frexp_sig(nir_builder *b, nir_ssa_def *x) |
| { |
| nir_ssa_def *abs_x = nir_fabs(b, x); |
| nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); |
| nir_ssa_def *sign_mantissa_mask, *exponent_value; |
| nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); |
| |
| switch (x->bit_size) { |
| case 16: |
| /* Half-precision floating-point values are stored as |
| * 1 sign bit; |
| * 5 exponent bits; |
| * 10 mantissa bits. |
| * |
| * An exponent shift of 10 will shift the mantissa out, leaving only the |
| * exponent and sign bit (which itself may be zero, if the absolute value |
| * was taken before the bitcast and shift). |
| */ |
| sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16); |
| /* Exponent of floating-point values in the range [0.5, 1.0). */ |
| exponent_value = nir_imm_intN_t(b, 0x3800u, 16); |
| break; |
| case 32: |
| /* Single-precision floating-point values are stored as |
| * 1 sign bit; |
| * 8 exponent bits; |
| * 23 mantissa bits. |
| * |
| * An exponent shift of 23 will shift the mantissa out, leaving only the |
| * exponent and sign bit (which itself may be zero, if the absolute value |
| * was taken before the bitcast and shift. |
| */ |
| sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); |
| /* Exponent of floating-point values in the range [0.5, 1.0). */ |
| exponent_value = nir_imm_int(b, 0x3f000000u); |
| break; |
| case 64: |
| /* Double-precision floating-point values are stored as |
| * 1 sign bit; |
| * 11 exponent bits; |
| * 52 mantissa bits. |
| * |
| * An exponent shift of 20 will shift the remaining mantissa bits out, |
| * leaving only the exponent and sign bit (which itself may be zero, if |
| * the absolute value was taken before the bitcast and shift. |
| */ |
| sign_mantissa_mask = nir_imm_int(b, 0x800fffffu); |
| /* Exponent of floating-point values in the range [0.5, 1.0). */ |
| exponent_value = nir_imm_int(b, 0x3fe00000u); |
| break; |
| default: |
| unreachable("Invalid bitsize"); |
| } |
| |
| if (x->bit_size == 64) { |
| /* We only need to deal with the exponent so first we extract the upper |
| * 32 bits using nir_unpack_64_2x32_split_y. |
| */ |
| nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x); |
| nir_ssa_def *zero32 = nir_imm_int(b, 0); |
| |
| nir_ssa_def *new_upper = |
| nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask), |
| nir_bcsel(b, is_not_zero, exponent_value, zero32)); |
| |
| nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x); |
| |
| return nir_pack_64_2x32_split(b, lower_x, new_upper); |
| } else { |
| return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), |
| nir_bcsel(b, is_not_zero, exponent_value, zero)); |
| } |
| } |
| |
| static nir_ssa_def * |
| lower_frexp_exp(nir_builder *b, nir_ssa_def *x) |
| { |
| nir_ssa_def *abs_x = nir_fabs(b, x); |
| nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); |
| nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); |
| nir_ssa_def *exponent; |
| |
| switch (x->bit_size) { |
| case 16: { |
| nir_ssa_def *exponent_shift = nir_imm_int(b, 10); |
| nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16); |
| |
| /* Significand return must be of the same type as the input, but the |
| * exponent must be a 32-bit integer. |
| */ |
| exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), |
| nir_bcsel(b, is_not_zero, exponent_bias, zero))); |
| break; |
| } |
| case 32: { |
| nir_ssa_def *exponent_shift = nir_imm_int(b, 23); |
| nir_ssa_def *exponent_bias = nir_imm_int(b, -126); |
| |
| exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), |
| nir_bcsel(b, is_not_zero, exponent_bias, zero)); |
| break; |
| } |
| case 64: { |
| nir_ssa_def *exponent_shift = nir_imm_int(b, 20); |
| nir_ssa_def *exponent_bias = nir_imm_int(b, -1022); |
| |
| nir_ssa_def *zero32 = nir_imm_int(b, 0); |
| nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x); |
| |
| exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift), |
| nir_bcsel(b, is_not_zero, exponent_bias, zero32)); |
| break; |
| } |
| default: |
| unreachable("Invalid bitsize"); |
| } |
| |
| return exponent; |
| } |
| |
| static bool |
| lower_frexp_impl(nir_function_impl *impl) |
| { |
| bool progress = false; |
| |
| nir_builder b; |
| nir_builder_init(&b, impl); |
| |
| nir_foreach_block(block, impl) { |
| nir_foreach_instr_safe(instr, block) { |
| if (instr->type != nir_instr_type_alu) |
| continue; |
| |
| nir_alu_instr *alu_instr = nir_instr_as_alu(instr); |
| nir_ssa_def *lower; |
| |
| b.cursor = nir_before_instr(instr); |
| |
| switch (alu_instr->op) { |
| case nir_op_frexp_sig: |
| lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); |
| break; |
| case nir_op_frexp_exp: |
| lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); |
| break; |
| default: |
| continue; |
| } |
| |
| nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, |
| nir_src_for_ssa(lower)); |
| nir_instr_remove(instr); |
| progress = true; |
| } |
| } |
| |
| if (progress) { |
| nir_metadata_preserve(impl, nir_metadata_block_index | |
| nir_metadata_dominance); |
| } |
| |
| return progress; |
| } |
| |
| bool |
| nir_lower_frexp(nir_shader *shader) |
| { |
| bool progress = false; |
| |
| nir_foreach_function(function, shader) { |
| if (function->impl) |
| progress |= lower_frexp_impl(function->impl); |
| } |
| |
| return progress; |
| } |