src/compiler/nir/nir_lower_frexp.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2015 Intel Corporation
  * Copyright © 2019 Valve Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  *
  * Authors:
  *    Jason Ekstrand (jason@jlekstrand.net)
  *    Samuel Pitoiset (samuel.pitoiset@gmail.com>
  */

 #include "nir.h"
 #include "nir_builder.h"

 static nir_ssa_def *
 lower_frexp_sig(nir_builder *b, nir_ssa_def *x)
 {
    nir_ssa_def *abs_x = nir_fabs(b, x);
    nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
    nir_ssa_def *sign_mantissa_mask, *exponent_value;
    nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);

    switch (x->bit_size) {
    case 16:
       /* Half-precision floating-point values are stored as
        *   1 sign bit;
        *   5 exponent bits;
        *   10 mantissa bits.
        *
        * An exponent shift of 10 will shift the mantissa out, leaving only the
        * exponent and sign bit (which itself may be zero, if the absolute value
        * was taken before the bitcast and shift).
        */
       sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
       /* Exponent of floating-point values in the range [0.5, 1.0). */
       exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
       break;
    case 32:
       /* Single-precision floating-point values are stored as
        *   1 sign bit;
        *   8 exponent bits;
        *   23 mantissa bits.
        *
        * An exponent shift of 23 will shift the mantissa out, leaving only the
        * exponent and sign bit (which itself may be zero, if the absolute value
        * was taken before the bitcast and shift.
        */
       sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
       /* Exponent of floating-point values in the range [0.5, 1.0). */
       exponent_value = nir_imm_int(b, 0x3f000000u);
       break;
    case 64:
       /* Double-precision floating-point values are stored as
        *   1 sign bit;
        *   11 exponent bits;
        *   52 mantissa bits.
        *
        * An exponent shift of 20 will shift the remaining mantissa bits out,
        * leaving only the exponent and sign bit (which itself may be zero, if
        * the absolute value was taken before the bitcast and shift.
        */
       sign_mantissa_mask = nir_imm_int(b, 0x800fffffu);
       /* Exponent of floating-point values in the range [0.5, 1.0). */
       exponent_value = nir_imm_int(b, 0x3fe00000u);
       break;
    default:
       unreachable("Invalid bitsize");
    }

    if (x->bit_size == 64) {
       /* We only need to deal with the exponent so first we extract the upper
        * 32 bits using nir_unpack_64_2x32_split_y.
        */
       nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x);
       nir_ssa_def *zero32 = nir_imm_int(b, 0);

       nir_ssa_def *new_upper =
          nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask),
                     nir_bcsel(b, is_not_zero, exponent_value, zero32));

       nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x);

       return nir_pack_64_2x32_split(b, lower_x, new_upper);
    } else {
       return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
                         nir_bcsel(b, is_not_zero, exponent_value, zero));
    }
 }

 static nir_ssa_def *
 lower_frexp_exp(nir_builder *b, nir_ssa_def *x)
 {
    nir_ssa_def *abs_x = nir_fabs(b, x);
    nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
    nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
    nir_ssa_def *exponent;

    switch (x->bit_size) {
    case 16: {
       nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
       nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);

       /* Significand return must be of the same type as the input, but the
        * exponent must be a 32-bit integer.
        */
       exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
                               nir_bcsel(b, is_not_zero, exponent_bias, zero)));
       break;
    }
    case 32: {
       nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
       nir_ssa_def *exponent_bias = nir_imm_int(b, -126);

       exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
                              nir_bcsel(b, is_not_zero, exponent_bias, zero));
       break;
    }
    case 64: {
       nir_ssa_def *exponent_shift = nir_imm_int(b, 20);
       nir_ssa_def *exponent_bias = nir_imm_int(b, -1022);

       nir_ssa_def *zero32 = nir_imm_int(b, 0);
       nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x);

       exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift),
                              nir_bcsel(b, is_not_zero, exponent_bias, zero32));
       break;
    }
    default:
       unreachable("Invalid bitsize");
    }

    return exponent;
 }

 static bool
 lower_frexp_impl(nir_function_impl *impl)
 {
    bool progress = false;

    nir_builder b;
    nir_builder_init(&b, impl);

    nir_foreach_block(block, impl) {
       nir_foreach_instr_safe(instr, block) {
          if (instr->type != nir_instr_type_alu)
             continue;

          nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
          nir_ssa_def *lower;

          b.cursor = nir_before_instr(instr);

          switch (alu_instr->op) {
          case nir_op_frexp_sig:
             lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
             break;
          case nir_op_frexp_exp:
             lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
             break;
          default:
             continue;
          }

          nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa,
                                   nir_src_for_ssa(lower));
          nir_instr_remove(instr);
          progress = true;
       }
    }

    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
    }

    return progress;
 }

 bool
 nir_lower_frexp(nir_shader *shader)
 {
    bool progress = false;

    nir_foreach_function(function, shader) {
       if (function->impl)
          progress |= lower_frexp_impl(function->impl);
    }

    return progress;
 }
	/*
	* Copyright © 2015 Intel Corporation
	* Copyright © 2019 Valve Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*
	* Authors:
	* Jason Ekstrand (jason@jlekstrand.net)
	* Samuel Pitoiset (samuel.pitoiset@gmail.com>
	*/

	#include "nir.h"
	#include "nir_builder.h"

	static nir_ssa_def *
	lower_frexp_sig(nir_builder b, nir_ssa_def x)
	{
	nir_ssa_def *abs_x = nir_fabs(b, x);
	nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
	nir_ssa_def sign_mantissa_mask, exponent_value;
	nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);

	switch (x->bit_size) {
	case 16:
	/* Half-precision floating-point values are stored as
	* 1 sign bit;
	* 5 exponent bits;
	* 10 mantissa bits.
	*
	* An exponent shift of 10 will shift the mantissa out, leaving only the
	* exponent and sign bit (which itself may be zero, if the absolute value
	* was taken before the bitcast and shift).
	*/
	sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
	/* Exponent of floating-point values in the range [0.5, 1.0). */
	exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
	break;
	case 32:
	/* Single-precision floating-point values are stored as
	* 1 sign bit;
	* 8 exponent bits;
	* 23 mantissa bits.
	*
	* An exponent shift of 23 will shift the mantissa out, leaving only the
	* exponent and sign bit (which itself may be zero, if the absolute value
	* was taken before the bitcast and shift.
	*/
	sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
	/* Exponent of floating-point values in the range [0.5, 1.0). */
	exponent_value = nir_imm_int(b, 0x3f000000u);
	break;
	case 64:
	/* Double-precision floating-point values are stored as
	* 1 sign bit;
	* 11 exponent bits;
	* 52 mantissa bits.
	*
	* An exponent shift of 20 will shift the remaining mantissa bits out,
	* leaving only the exponent and sign bit (which itself may be zero, if
	* the absolute value was taken before the bitcast and shift.
	*/
	sign_mantissa_mask = nir_imm_int(b, 0x800fffffu);
	/* Exponent of floating-point values in the range [0.5, 1.0). */
	exponent_value = nir_imm_int(b, 0x3fe00000u);
	break;
	default:
	unreachable("Invalid bitsize");
	}

	if (x->bit_size == 64) {
	/* We only need to deal with the exponent so first we extract the upper
	* 32 bits using nir_unpack_64_2x32_split_y.
	*/
	nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x);
	nir_ssa_def *zero32 = nir_imm_int(b, 0);

	nir_ssa_def *new_upper =
	nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask),
	nir_bcsel(b, is_not_zero, exponent_value, zero32));

	nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x);

	return nir_pack_64_2x32_split(b, lower_x, new_upper);
	} else {
	return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
	nir_bcsel(b, is_not_zero, exponent_value, zero));
	}
	}

	static nir_ssa_def *
	lower_frexp_exp(nir_builder b, nir_ssa_def x)
	{
	nir_ssa_def *abs_x = nir_fabs(b, x);
	nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
	nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
	nir_ssa_def *exponent;

	switch (x->bit_size) {
	case 16: {
	nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
	nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);

	/* Significand return must be of the same type as the input, but the
	* exponent must be a 32-bit integer.
	*/
	exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
	nir_bcsel(b, is_not_zero, exponent_bias, zero)));
	break;
	}
	case 32: {
	nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
	nir_ssa_def *exponent_bias = nir_imm_int(b, -126);

	exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
	nir_bcsel(b, is_not_zero, exponent_bias, zero));
	break;
	}
	case 64: {
	nir_ssa_def *exponent_shift = nir_imm_int(b, 20);
	nir_ssa_def *exponent_bias = nir_imm_int(b, -1022);

	nir_ssa_def *zero32 = nir_imm_int(b, 0);
	nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x);

	exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift),
	nir_bcsel(b, is_not_zero, exponent_bias, zero32));
	break;
	}
	default:
	unreachable("Invalid bitsize");
	}

	return exponent;
	}

	static bool
	lower_frexp_impl(nir_function_impl *impl)
	{
	bool progress = false;

	nir_builder b;
	nir_builder_init(&b, impl);

	nir_foreach_block(block, impl) {
	nir_foreach_instr_safe(instr, block) {
	if (instr->type != nir_instr_type_alu)
	continue;

	nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
	nir_ssa_def *lower;

	b.cursor = nir_before_instr(instr);

	switch (alu_instr->op) {
	case nir_op_frexp_sig:
	lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
	break;
	case nir_op_frexp_exp:
	lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
	break;
	default:
	continue;
	}

	nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa,
	nir_src_for_ssa(lower));
	nir_instr_remove(instr);
	progress = true;
	}
	}

	if (progress) {
	nir_metadata_preserve(impl, nir_metadata_block_index \|
	nir_metadata_dominance);
	}

	return progress;
	}

	bool
	nir_lower_frexp(nir_shader *shader)
	{
	bool progress = false;

	nir_foreach_function(function, shader) {
	if (function->impl)
	progress \|= lower_frexp_impl(function->impl);
	}

	return progress;
	}