src/compiler/glsl/lower_precision.cpp - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2019 Google, Inc
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */

 /**
  * \file lower_precision.cpp
  */

 #include "main/macros.h"
 #include "main/mtypes.h"
 #include "compiler/glsl_types.h"
 #include "ir.h"
 #include "ir_builder.h"
 #include "ir_optimization.h"
 #include "ir_rvalue_visitor.h"
 #include "util/half_float.h"
 #include "util/set.h"
 #include "util/hash_table.h"
 #include <vector>

 namespace {

 class find_precision_visitor : public ir_rvalue_enter_visitor {
 public:
    find_precision_visitor(const struct gl_shader_compiler_options *options);
    ~find_precision_visitor();

    virtual void handle_rvalue(ir_rvalue **rvalue);
    virtual ir_visitor_status visit_enter(ir_call *ir);

    ir_function_signature *map_builtin(ir_function_signature *sig);

    /* Set of rvalues that can be lowered. This will be filled in by
     * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
     * will be added to this set.
     */
    struct set *lowerable_rvalues;

    /**
     * A mapping of builtin signature functions to lowered versions. This is
     * filled in lazily when a lowered version is needed.
     */
    struct hash_table *lowered_builtins;
    /**
     * A temporary hash table only used in order to clone functions.
     */
    struct hash_table *clone_ht;

    void *lowered_builtin_mem_ctx;

    const struct gl_shader_compiler_options *options;
 };

 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
 public:
    enum can_lower_state {
       UNKNOWN,
       CANT_LOWER,
       SHOULD_LOWER,
    };

    enum parent_relation {
       /* The parent performs a further operation involving the result from the
        * child and can be lowered along with it.
        */
       COMBINED_OPERATION,
       /* The parent instruction’s operation is independent of the child type so
        * the child should be lowered separately.
        */
       INDEPENDENT_OPERATION,
    };

    struct stack_entry {
       ir_instruction *instr;
       enum can_lower_state state;
       /* List of child rvalues that can be lowered. When this stack entry is
        * popped, if this node itself can’t be lowered than all of the children
        * are root nodes to lower so we will add them to lowerable_rvalues.
        * Otherwise if this node can also be lowered then we won’t add the
        * children because we only want to add the topmost lowerable nodes to
        * lowerable_rvalues and the children will be lowered as part of lowering
        * this node.
        */
       std::vector<ir_instruction *> lowerable_children;
    };

    find_lowerable_rvalues_visitor(struct set *result,
                                   const struct gl_shader_compiler_options *options);

    static void stack_enter(class ir_instruction *ir, void *data);
    static void stack_leave(class ir_instruction *ir, void *data);

    virtual ir_visitor_status visit(ir_constant *ir);
    virtual ir_visitor_status visit(ir_dereference_variable *ir);

    virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
    virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
    virtual ir_visitor_status visit_enter(ir_texture *ir);
    virtual ir_visitor_status visit_enter(ir_expression *ir);

    virtual ir_visitor_status visit_leave(ir_assignment *ir);
    virtual ir_visitor_status visit_leave(ir_call *ir);

    can_lower_state handle_precision(const glsl_type *type,
                                     int precision) const;

    static parent_relation get_parent_relation(ir_instruction *parent,
                                               ir_instruction *child);

    std::vector<stack_entry> stack;
    struct set *lowerable_rvalues;
    const struct gl_shader_compiler_options *options;

    void pop_stack_entry();
    void add_lowerable_children(const stack_entry &entry);
 };

 class lower_precision_visitor : public ir_rvalue_visitor {
 public:
    virtual void handle_rvalue(ir_rvalue **rvalue);
    virtual ir_visitor_status visit_enter(ir_dereference_array *);
    virtual ir_visitor_status visit_enter(ir_dereference_record *);
    virtual ir_visitor_status visit_enter(ir_call *ir);
    virtual ir_visitor_status visit_enter(ir_texture *ir);
    virtual ir_visitor_status visit_leave(ir_expression *);
 };

 static bool
 can_lower_type(const struct gl_shader_compiler_options *options,
                const glsl_type *type)
 {
    /* Don’t lower any expressions involving non-float types except bool and
     * texture samplers. This will rule out operations that change the type such
     * as conversion to ints. Instead it will end up lowering the arguments
     * instead and adding a final conversion to float32. We want to handle
     * boolean types so that it will do comparisons as 16-bit.
     */

    switch (type->without_array()->base_type) {
    /* TODO: should we do anything for these two with regard to Int16 vs FP16
     * support?
     */
    case GLSL_TYPE_BOOL:
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_IMAGE:
       return true;

    case GLSL_TYPE_FLOAT:
       return options->LowerPrecisionFloat16;

    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
       return options->LowerPrecisionInt16;

    default:
       return false;
    }
 }

 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
                                  const struct gl_shader_compiler_options *opts)
 {
    lowerable_rvalues = res;
    options = opts;
    callback_enter = stack_enter;
    callback_leave = stack_leave;
    data_enter = this;
    data_leave = this;
 }

 void
 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
                                             void *data)
 {
    find_lowerable_rvalues_visitor *state =
       (find_lowerable_rvalues_visitor *) data;

    /* Add a new stack entry for this instruction */
    stack_entry entry;

    entry.instr = ir;
    entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;

    state->stack.push_back(entry);
 }

 void
 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
 {
    /* We can’t lower this node so if there were any pending children then they
     * are all root lowerable nodes and we should add them to the set.
     */
    for (auto &it : entry.lowerable_children)
       _mesa_set_add(lowerable_rvalues, it);
 }

 void
 find_lowerable_rvalues_visitor::pop_stack_entry()
 {
    const stack_entry &entry = stack.back();

    if (stack.size() >= 2) {
       /* Combine this state into the parent state, unless the parent operation
        * doesn’t have any relation to the child operations
        */
       stack_entry &parent = stack.end()[-2];
       parent_relation rel = get_parent_relation(parent.instr, entry.instr);

       if (rel == COMBINED_OPERATION) {
          switch (entry.state) {
          case CANT_LOWER:
             parent.state = CANT_LOWER;
             break;
          case SHOULD_LOWER:
             if (parent.state == UNKNOWN)
                parent.state = SHOULD_LOWER;
             break;
          case UNKNOWN:
             break;
          }
       }
    }

    if (entry.state == SHOULD_LOWER) {
       ir_rvalue *rv = entry.instr->as_rvalue();

       if (rv == NULL) {
          add_lowerable_children(entry);
       } else if (stack.size() >= 2) {
          stack_entry &parent = stack.end()[-2];

          switch (get_parent_relation(parent.instr, rv)) {
          case COMBINED_OPERATION:
             /* We only want to add the toplevel lowerable instructions to the
              * lowerable set. Therefore if there is a parent then instead of
              * adding this instruction to the set we will queue depending on
              * the result of the parent instruction.
              */
             parent.lowerable_children.push_back(entry.instr);
             break;
          case INDEPENDENT_OPERATION:
             _mesa_set_add(lowerable_rvalues, rv);
             break;
          }
       } else {
          /* This is a toplevel node so add it directly to the lowerable
           * set.
           */
          _mesa_set_add(lowerable_rvalues, rv);
       }
    } else if (entry.state == CANT_LOWER) {
       add_lowerable_children(entry);
    }

    stack.pop_back();
 }

 void
 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
                                             void *data)
 {
    find_lowerable_rvalues_visitor *state =
       (find_lowerable_rvalues_visitor *) data;

    state->pop_stack_entry();
 }

 enum find_lowerable_rvalues_visitor::can_lower_state
 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
                                                  int precision) const
 {
    if (!can_lower_type(options, type))
       return CANT_LOWER;

    switch (precision) {
    case GLSL_PRECISION_NONE:
       return UNKNOWN;
    case GLSL_PRECISION_HIGH:
       return CANT_LOWER;
    case GLSL_PRECISION_MEDIUM:
    case GLSL_PRECISION_LOW:
       return SHOULD_LOWER;
    }

    return CANT_LOWER;
 }

 enum find_lowerable_rvalues_visitor::parent_relation
 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
                                                     ir_instruction *child)
 {
    /* If the parent is a dereference instruction then the only child could be
     * for example an array dereference and that should be lowered independently
     * of the parent.
     */
    if (parent->as_dereference())
       return INDEPENDENT_OPERATION;

    /* The precision of texture sampling depend on the precision of the sampler.
     * The rest of the arguments don’t matter so we can treat it as an
     * independent operation.
     */
    if (parent->as_texture())
       return INDEPENDENT_OPERATION;

    return COMBINED_OPERATION;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
 {
    stack_enter(ir, this);

    if (!can_lower_type(options, ir->type))
       stack.back().state = CANT_LOWER;

    stack_leave(ir, this);

    return visit_continue;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
 {
    stack_enter(ir, this);

    if (stack.back().state == UNKNOWN)
       stack.back().state = handle_precision(ir->type, ir->precision());

    stack_leave(ir, this);

    return visit_continue;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
 {
    ir_hierarchical_visitor::visit_enter(ir);

    if (stack.back().state == UNKNOWN)
       stack.back().state = handle_precision(ir->type, ir->precision());

    return visit_continue;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
 {
    ir_hierarchical_visitor::visit_enter(ir);

    if (stack.back().state == UNKNOWN)
       stack.back().state = handle_precision(ir->type, ir->precision());

    return visit_continue;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
 {
    ir_hierarchical_visitor::visit_enter(ir);

    /* The precision of the sample value depends on the precision of the
     * sampler.
     */
    stack.back().state = handle_precision(ir->type,
                                          ir->sampler->precision());
    return visit_continue;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
 {
    ir_hierarchical_visitor::visit_enter(ir);

    if (!can_lower_type(options, ir->type))
       stack.back().state = CANT_LOWER;

    /* Don't lower precision for derivative calculations */
    if (!options->LowerPrecisionDerivatives &&
        (ir->operation == ir_unop_dFdx ||
         ir->operation == ir_unop_dFdx_coarse ||
         ir->operation == ir_unop_dFdx_fine ||
         ir->operation == ir_unop_dFdy ||
         ir->operation == ir_unop_dFdy_coarse ||
         ir->operation == ir_unop_dFdy_fine)) {
       stack.back().state = CANT_LOWER;
    }

    return visit_continue;
 }

 static bool
 function_always_returns_mediump_or_lowp(const char *name)
 {
    return !strcmp(name, "bitCount") ||
           !strcmp(name, "findLSB") ||
           !strcmp(name, "findMSB") ||
           !strcmp(name, "unpackHalf2x16") ||
           !strcmp(name, "unpackUnorm4x8") ||
           !strcmp(name, "unpackSnorm4x8");
 }

 static bool
 is_lowerable_builtin(ir_call *ir,
                      const struct set *lowerable_rvalues)
 {
    /* The intrinsic call is inside the wrapper imageLoad function that will
     * be inlined. We have to handle both of them.
     */
    if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
        (ir->callee->is_builtin() &&
         !strcmp(ir->callee_name(), "imageLoad"))) {
       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
       ir_variable *resource = param->variable_referenced();

       assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
       assert(resource->type->without_array()->is_image());

       /* GLSL ES 3.20 requires that images have a precision modifier, but if
        * you set one, it doesn't do anything, because all intrinsics are
        * defined with highp. This seems to be a spec bug.
        *
        * In theory we could set the return value to mediump if the image
        * format has a lower precision. This appears to be the most sensible
        * thing to do.
        */
       const struct util_format_description *desc =
          util_format_description(resource->data.image_format);
       int i =
          util_format_get_first_non_void_channel(resource->data.image_format);
       assert(i >= 0);

       if (desc->channel[i].pure_integer ||
           desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
          return desc->channel[i].size <= 16;
       else
          return desc->channel[i].size <= 10; /* unorm/snorm */
    }

    /* Handle special calls. */
    if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
       ir_variable *var = param->variable_referenced();

       /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
        * be inlined by lower_precision() if we return true here, so that we can
        * get to ir_texture later and do proper lowering.
        *
        * We should lower the type of the return value if the sampler type
        * uses lower precision. The function parameters don't matter.
        */
       if (var && var->type->without_array()->is_sampler()) {
          /* textureSize always returns highp. */
          if (!strcmp(ir->callee_name(), "textureSize"))
             return false;

          return var->data.precision == GLSL_PRECISION_MEDIUM ||
                 var->data.precision == GLSL_PRECISION_LOW;
       }
    }

    if (!ir->callee->is_builtin() ||
        /* Parameters are always highp: */
        !strcmp(ir->callee_name(), "floatBitsToInt") ||
        !strcmp(ir->callee_name(), "floatBitsToUint") ||
        !strcmp(ir->callee_name(), "intBitsToFloat") ||
        !strcmp(ir->callee_name(), "uintBitsToFloat") ||
        !strcmp(ir->callee_name(), "bitfieldReverse") ||
        !strcmp(ir->callee_name(), "frexp") ||
        !strcmp(ir->callee_name(), "ldexp") ||
        /* Parameters and outputs are always highp: */
        /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
        !strcmp(ir->callee_name(), "uaddCarry") ||
        !strcmp(ir->callee_name(), "usubBorrow") ||
        !strcmp(ir->callee_name(), "imulExtended") ||
        !strcmp(ir->callee_name(), "umulExtended") ||
        !strcmp(ir->callee_name(), "unpackUnorm2x16") ||
        !strcmp(ir->callee_name(), "unpackSnorm2x16") ||
        /* Outputs are highp: */
        !strcmp(ir->callee_name(), "packUnorm2x16") ||
        !strcmp(ir->callee_name(), "packSnorm2x16") ||
        /* Parameters are mediump and outputs are highp. The parameters should
         * be optimized in NIR, not here, e.g:
         * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
         * - Other opcodes don't have to convert parameters to highp if the hw
         *   has f16 versions. Optimize in NIR accordingly.
         */
        !strcmp(ir->callee_name(), "packHalf2x16") ||
        !strcmp(ir->callee_name(), "packUnorm4x8") ||
        !strcmp(ir->callee_name(), "packSnorm4x8"))
       return false;

    assert(ir->callee->return_precision == GLSL_PRECISION_NONE);

    /* Number of parameters to check if they are lowerable. */
    unsigned check_parameters = ir->actual_parameters.length();

    /* Interpolation functions only consider the precision of the interpolant. */
    /* Bitfield functions ignore the precision of "offset" and "bits". */
    if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
        !strcmp(ir->callee_name(), "interpolateAtSample") ||
        !strcmp(ir->callee_name(), "bitfieldExtract")) {
       check_parameters = 1;
    } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
       check_parameters = 2;
    } if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
       /* These only lower the return value. Parameters keep their precision,
        * which is preserved in map_builtin.
        */
       check_parameters = 0;
    }

    foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
       if (!check_parameters)
          break;

       if (!param->as_constant() &&
           _mesa_set_search(lowerable_rvalues, param) == NULL)
          return false;

       --check_parameters;
    }

    return true;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
 {
    ir_hierarchical_visitor::visit_leave(ir);

    /* Special case for handling temporary variables generated by the compiler
     * for function calls. If we assign to one of these using a function call
     * that has a lowerable return type then we can assume the temporary
     * variable should have a medium precision too.
     */

    /* Do nothing if the return type is void. */
    if (!ir->return_deref)
       return visit_continue;

    ir_variable *var = ir->return_deref->variable_referenced();

    assert(var->data.mode == ir_var_temporary);

    unsigned return_precision = ir->callee->return_precision;

    /* If the call is to a builtin, then the function won’t have a return
     * precision and we should determine it from the precision of the arguments.
     */
    if (is_lowerable_builtin(ir, lowerable_rvalues))
       return_precision = GLSL_PRECISION_MEDIUM;

    can_lower_state lower_state =
       handle_precision(var->type, return_precision);

    if (lower_state == SHOULD_LOWER) {
       /* There probably shouldn’t be any situations where multiple ir_call
        * instructions write to the same temporary?
        */
       assert(var->data.precision == GLSL_PRECISION_NONE);
       var->data.precision = GLSL_PRECISION_MEDIUM;
    } else {
       var->data.precision = GLSL_PRECISION_HIGH;
    }

    return visit_continue;
 }

 ir_visitor_status
 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
 {
    ir_hierarchical_visitor::visit_leave(ir);

    /* Special case for handling temporary variables generated by the compiler.
     * If we assign to one of these using a lowered precision then we can assume
     * the temporary variable should have a medium precision too.
     */
    ir_variable *var = ir->lhs->variable_referenced();

    if (var->data.mode == ir_var_temporary) {
       if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
          /* Only override the precision if this is the first assignment. For
           * temporaries such as the ones generated for the ?: operator there
           * can be multiple assignments with different precisions. This way we
           * get the highest precision of all of the assignments.
           */
          if (var->data.precision == GLSL_PRECISION_NONE)
             var->data.precision = GLSL_PRECISION_MEDIUM;
       } else if (!ir->rhs->as_constant()) {
          var->data.precision = GLSL_PRECISION_HIGH;
       }
    }

    return visit_continue;
 }

 void
 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
                        exec_list *instructions,
                        struct set *result)
 {
    find_lowerable_rvalues_visitor v(result, options);

    visit_list_elements(&v, instructions);

    assert(v.stack.empty());
 }

 static const glsl_type *
 convert_type(bool up, const glsl_type *type)
 {
    if (type->is_array()) {
       return glsl_type::get_array_instance(convert_type(up, type->fields.array),
                                            type->array_size(),
                                            type->explicit_stride);
    }

    glsl_base_type new_base_type;

    if (up) {
       switch (type->base_type) {
       case GLSL_TYPE_FLOAT16:
          new_base_type = GLSL_TYPE_FLOAT;
          break;
       case GLSL_TYPE_INT16:
          new_base_type = GLSL_TYPE_INT;
          break;
       case GLSL_TYPE_UINT16:
          new_base_type = GLSL_TYPE_UINT;
          break;
       default:
          unreachable("invalid type");
          return NULL;
       }
    } else {
       switch (type->base_type) {
       case GLSL_TYPE_FLOAT:
          new_base_type = GLSL_TYPE_FLOAT16;
          break;
       case GLSL_TYPE_INT:
          new_base_type = GLSL_TYPE_INT16;
          break;
       case GLSL_TYPE_UINT:
          new_base_type = GLSL_TYPE_UINT16;
          break;
       default:
          unreachable("invalid type");
          return NULL;
       }
    }

    return glsl_type::get_instance(new_base_type,
                                   type->vector_elements,
                                   type->matrix_columns,
                                   type->explicit_stride,
                                   type->interface_row_major);
 }

 static const glsl_type *
 lower_glsl_type(const glsl_type *type)
 {
    return convert_type(false, type);
 }

 static ir_rvalue *
 convert_precision(bool up, ir_rvalue *ir)
 {
    unsigned op;

    if (up) {
       switch (ir->type->without_array()->base_type) {
       case GLSL_TYPE_FLOAT16:
          op = ir_unop_f162f;
          break;
       case GLSL_TYPE_INT16:
          op = ir_unop_i2i;
          break;
       case GLSL_TYPE_UINT16:
          op = ir_unop_u2u;
          break;
       default:
          unreachable("invalid type");
          return NULL;
       }
    } else {
       switch (ir->type->without_array()->base_type) {
       case GLSL_TYPE_FLOAT:
          op = ir_unop_f2fmp;
          break;
       case GLSL_TYPE_INT:
          op = ir_unop_i2imp;
          break;
       case GLSL_TYPE_UINT:
          op = ir_unop_u2ump;
          break;
       default:
          unreachable("invalid type");
          return NULL;
       }
    }

    const glsl_type *desired_type = convert_type(up, ir->type);
    void *mem_ctx = ralloc_parent(ir);
    return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
 }

 void
 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 {
    ir_rvalue *ir = *rvalue;

    if (ir == NULL)
       return;

    if (ir->as_dereference()) {
       if (!ir->type->is_boolean())
          *rvalue = convert_precision(false, ir);
    } else if (ir->type->is_32bit()) {
       ir->type = lower_glsl_type(ir->type);

       ir_constant *const_ir = ir->as_constant();

       if (const_ir) {
          ir_constant_data value;

          if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
             for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
                value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
          } else if (ir->type->base_type == GLSL_TYPE_INT16) {
             for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
                value.i16[i] = const_ir->value.i[i];
          } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
             for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
                value.u16[i] = const_ir->value.u[i];
          } else {
             unreachable("invalid type");
          }

          const_ir->value = value;
       }
    }
 }

 ir_visitor_status
 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
 {
    /* We don’t want to lower the variable */
    return visit_continue_with_parent;
 }

 ir_visitor_status
 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
 {
    /* We don’t want to convert the array index or the variable. If the array
     * index itself is lowerable that will be handled separately.
     */
    return visit_continue_with_parent;
 }

 ir_visitor_status
 lower_precision_visitor::visit_enter(ir_call *ir)
 {
    /* We don’t want to convert the arguments. These will be handled separately.
     */
    return visit_continue_with_parent;
 }

 ir_visitor_status
 lower_precision_visitor::visit_enter(ir_texture *ir)
 {
    /* We don’t want to convert the arguments. These will be handled separately.
     */
    return visit_continue_with_parent;
 }

 ir_visitor_status
 lower_precision_visitor::visit_leave(ir_expression *ir)
 {
    ir_rvalue_visitor::visit_leave(ir);

    /* If the expression is a conversion operation to or from bool then fix the
     * operation.
     */
    switch (ir->operation) {
    case ir_unop_b2f:
       ir->operation = ir_unop_b2f16;
       break;
    case ir_unop_f2b:
       ir->operation = ir_unop_f162b;
       break;
    case ir_unop_b2i:
    case ir_unop_i2b:
       /* Nothing to do - they both support int16. */
       break;
    default:
       break;
    }

    return visit_continue;
 }

 void
 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 {
    /* Checking the precision of rvalue can be lowered first throughout
     * find_lowerable_rvalues_visitor.
     * Once it found the precision of rvalue can be lowered, then we can
     * add conversion f2fmp, etc. through lower_precision_visitor.
     */
    if (*rvalue == NULL)
       return;

    struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);

    if (!entry)
       return;

    _mesa_set_remove(lowerable_rvalues, entry);

    /* If the entire expression is just a variable dereference then trying to
     * lower it will just directly add pointless to and from conversions without
     * any actual operation in-between. Although these will eventually get
     * optimised out, avoiding generating them here also avoids breaking inout
     * parameters to functions.
     */
    if ((*rvalue)->as_dereference())
       return;

    lower_precision_visitor v;

    (*rvalue)->accept(&v);
    v.handle_rvalue(rvalue);

    /* We don’t need to add the final conversion if the final type has been
     * converted to bool
     */
    if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
       *rvalue = convert_precision(true, *rvalue);
    }
 }

 ir_visitor_status
 find_precision_visitor::visit_enter(ir_call *ir)
 {
    ir_rvalue_enter_visitor::visit_enter(ir);

    ir_variable *return_var =
       ir->return_deref ? ir->return_deref->variable_referenced() : NULL;

    /* Don't do anything for image_load here. We have only changed the return
     * value to mediump/lowp, so that following instructions can use reduced
     * precision.
     *
     * The return value type of the intrinsic itself isn't changed here, but
     * can be changed in NIR if all users use the *2*mp opcode.
     */
    if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
       return visit_continue;

    /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
     * overrode the precision of the temporary return variable, then we can
     * replace the builtin implementation with a lowered version.
     */

    if (!ir->callee->is_builtin() ||
        return_var == NULL ||
        (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
         return_var->data.precision != GLSL_PRECISION_LOW))
       return visit_continue;

    ir->callee = map_builtin(ir->callee);
    ir->generate_inline(ir);
    ir->remove();

    return visit_continue_with_parent;
 }

 ir_function_signature *
 find_precision_visitor::map_builtin(ir_function_signature *sig)
 {
    if (lowered_builtins == NULL) {
       lowered_builtins = _mesa_pointer_hash_table_create(NULL);
       clone_ht =_mesa_pointer_hash_table_create(NULL);
       lowered_builtin_mem_ctx = ralloc_context(NULL);
    } else {
       struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
       if (entry)
          return (ir_function_signature *) entry->data;
    }

    ir_function_signature *lowered_sig =
       sig->clone(lowered_builtin_mem_ctx, clone_ht);

    /* Functions that always return mediump or lowp should keep their
     * parameters intact, because they can be highp. NIR can lower
     * the up-conversion for parameters if needed.
     */
    if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
       foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
          param->data.precision = GLSL_PRECISION_MEDIUM;
       }
    }

    lower_precision(options, &lowered_sig->body);

    _mesa_hash_table_clear(clone_ht, NULL);

    _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);

    return lowered_sig;
 }

 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
    : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
      lowered_builtins(NULL),
      clone_ht(NULL),
      lowered_builtin_mem_ctx(NULL),
      options(options)
 {
 }

 find_precision_visitor::~find_precision_visitor()
 {
    _mesa_set_destroy(lowerable_rvalues, NULL);

    if (lowered_builtins) {
       _mesa_hash_table_destroy(lowered_builtins, NULL);
       _mesa_hash_table_destroy(clone_ht, NULL);
       ralloc_free(lowered_builtin_mem_ctx);
    }
 }

 /* Lowering opcodes to 16 bits is not enough for programs with control flow
  * (and the ?: operator, which is represented by if-then-else in the IR),
  * because temporary variables, which are used for passing values between
  * code blocks, are not lowered, resulting in 32-bit phis in NIR.
  *
  * First change the variable types to 16 bits, then change all ir_dereference
  * types to 16 bits.
  */
 class lower_variables_visitor : public ir_rvalue_enter_visitor {
 public:
    lower_variables_visitor(const struct gl_shader_compiler_options *options)
       : options(options) {
       lower_vars = _mesa_pointer_set_create(NULL);
    }

    virtual ~lower_variables_visitor()
    {
       _mesa_set_destroy(lower_vars, NULL);
    }

    virtual ir_visitor_status visit(ir_variable *var);
    virtual ir_visitor_status visit_enter(ir_assignment *ir);
    virtual ir_visitor_status visit_enter(ir_return *ir);
    virtual ir_visitor_status visit_enter(ir_call *ir);
    virtual void handle_rvalue(ir_rvalue **rvalue);

    void fix_types_in_deref_chain(ir_dereference *ir);
    void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
                                  bool insert_before);

    const struct gl_shader_compiler_options *options;
    set *lower_vars;
 };

 static void
 lower_constant(ir_constant *ir)
 {
    if (ir->type->is_array()) {
       for (int i = 0; i < ir->type->array_size(); i++)
          lower_constant(ir->get_array_element(i));

       ir->type = lower_glsl_type(ir->type);
       return;
    }

    ir->type = lower_glsl_type(ir->type);
    ir_constant_data value;

    if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
       for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
          value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
    } else if (ir->type->base_type == GLSL_TYPE_INT16) {
       for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
          value.i16[i] = ir->value.i[i];
    } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
       for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
          value.u16[i] = ir->value.u[i];
    } else {
       unreachable("invalid type");
    }

    ir->value = value;
 }

 ir_visitor_status
 lower_variables_visitor::visit(ir_variable *var)
 {
    if ((var->data.mode != ir_var_temporary &&
         var->data.mode != ir_var_auto) ||
        !var->type->without_array()->is_32bit() ||
        (var->data.precision != GLSL_PRECISION_MEDIUM &&
         var->data.precision != GLSL_PRECISION_LOW) ||
        !can_lower_type(options, var->type))
       return visit_continue;

    /* Lower constant initializers. */
    if (var->constant_value &&
        var->type == var->constant_value->type) {
       if (!options->LowerPrecisionConstants)
          return visit_continue;
       var->constant_value =
          var->constant_value->clone(ralloc_parent(var), NULL);
       lower_constant(var->constant_value);
    }

    if (var->constant_initializer &&
        var->type == var->constant_initializer->type) {
       if (!options->LowerPrecisionConstants)
          return visit_continue;
       var->constant_initializer =
          var->constant_initializer->clone(ralloc_parent(var), NULL);
       lower_constant(var->constant_initializer);
    }

    var->type = lower_glsl_type(var->type);
    _mesa_set_add(lower_vars, var);

    return visit_continue;
 }

 void
 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
 {
    assert(ir->type->without_array()->is_32bit());
    assert(_mesa_set_search(lower_vars, ir->variable_referenced()));

    /* Fix the type in the dereference node. */
    ir->type = lower_glsl_type(ir->type);

    /* If it's an array, fix the types in the whole dereference chain. */
    for (ir_dereference_array *deref_array = ir->as_dereference_array();
         deref_array;
         deref_array = deref_array->array->as_dereference_array()) {
       assert(deref_array->array->type->without_array()->is_32bit());
       deref_array->array->type = lower_glsl_type(deref_array->array->type);
    }
 }

 void
 lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
                                                   ir_rvalue *rhs,
                                                   bool insert_before)
 {
    void *mem_ctx = ralloc_parent(lhs);

    if (lhs->type->is_array()) {
       for (unsigned i = 0; i < lhs->type->length; i++) {
          ir_dereference *l, *r;

          l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
                                                new(mem_ctx) ir_constant(i));
          r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
                                                new(mem_ctx) ir_constant(i));
          convert_split_assignment(l, r, insert_before);
       }
       return;
    }

    assert(lhs->type->is_16bit() || lhs->type->is_32bit());
    assert(rhs->type->is_16bit() || rhs->type->is_32bit());
    assert(lhs->type->is_16bit() != rhs->type->is_16bit());

    ir_assignment *assign =
       new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs));

    if (insert_before)
       base_ir->insert_before(assign);
    else
       base_ir->insert_after(assign);
 }

 ir_visitor_status
 lower_variables_visitor::visit_enter(ir_assignment *ir)
 {
    ir_dereference *lhs = ir->lhs;
    ir_variable *var = lhs->variable_referenced();
    ir_dereference *rhs_deref = ir->rhs->as_dereference();
    ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
    ir_constant *rhs_const = ir->rhs->as_constant();

    /* Legalize array assignments between lowered and non-lowered variables. */
    if (lhs->type->is_array() &&
        (rhs_var || rhs_const) &&
        (!rhs_var ||
         var->type->without_array()->is_16bit() !=
         rhs_var->type->without_array()->is_16bit()) &&
        (!rhs_const ||
         (var->type->without_array()->is_16bit() &&
          rhs_const->type->without_array()->is_32bit()))) {
       assert(ir->rhs->type->is_array());

       /* Fix array assignments from lowered to non-lowered. */
       if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
          fix_types_in_deref_chain(rhs_deref);
          /* Convert to 32 bits for LHS. */
          convert_split_assignment(lhs, rhs_deref, true);
          ir->remove();
          return visit_continue;
       }

       /* Fix array assignments from non-lowered to lowered. */
       if (_mesa_set_search(lower_vars, var) &&
           ir->rhs->type->without_array()->is_32bit()) {
          fix_types_in_deref_chain(lhs);
          /* Convert to 16 bits for LHS. */
          convert_split_assignment(lhs, ir->rhs, true);
          ir->remove();
          return visit_continue;
       }
    }

    /* Fix assignment types. */
    if (_mesa_set_search(lower_vars, var)) {
       /* Fix the LHS type. */
       if (lhs->type->without_array()->is_32bit())
          fix_types_in_deref_chain(lhs);

       /* Fix the RHS type if it's a lowered variable. */
       if (rhs_var &&
           _mesa_set_search(lower_vars, rhs_var) &&
           rhs_deref->type->without_array()->is_32bit())
          fix_types_in_deref_chain(rhs_deref);

       /* Fix the RHS type if it's a non-array expression. */
       if (ir->rhs->type->is_32bit()) {
          ir_expression *expr = ir->rhs->as_expression();

          /* Convert the RHS to the LHS type. */
          if (expr &&
              (expr->operation == ir_unop_f162f ||
               expr->operation == ir_unop_i2i ||
               expr->operation == ir_unop_u2u) &&
              expr->operands[0]->type->is_16bit()) {
             /* If there is an "up" conversion, just remove it.
              * This is optional. We could as well execute the else statement and
              * let NIR eliminate the up+down conversions.
              */
             ir->rhs = expr->operands[0];
          } else {
             /* Add a "down" conversion operation to fix the type of RHS. */
             ir->rhs = convert_precision(false, ir->rhs);
          }
       }
    }

    return ir_rvalue_enter_visitor::visit_enter(ir);
 }

 ir_visitor_status
 lower_variables_visitor::visit_enter(ir_return *ir)
 {
    void *mem_ctx = ralloc_parent(ir);

    ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
    if (deref) {
       ir_variable *var = deref->variable_referenced();

       /* Fix the type of the return value. */
       if (_mesa_set_search(lower_vars, var) &&
           deref->type->without_array()->is_32bit()) {
          /* Create a 32-bit temporary variable. */
          ir_variable *new_var =
             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
          base_ir->insert_before(new_var);

          /* Fix types in dereferences. */
          fix_types_in_deref_chain(deref);

          /* Convert to 32 bits for the return value. */
          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
                                   deref, true);
          ir->value = new(mem_ctx) ir_dereference_variable(new_var);
       }
    }

    return ir_rvalue_enter_visitor::visit_enter(ir);
 }

 void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
 {
    ir_rvalue *ir = *rvalue;

    if (in_assignee || ir == NULL)
       return;

    ir_expression *expr = ir->as_expression();
    ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;

    /* Remove f2fmp(float16). Same for int16 and uint16. */
    if (expr &&
        expr_op0_deref &&
        (expr->operation == ir_unop_f2fmp ||
         expr->operation == ir_unop_i2imp ||
         expr->operation == ir_unop_u2ump ||
         expr->operation == ir_unop_f2f16 ||
         expr->operation == ir_unop_i2i ||
         expr->operation == ir_unop_u2u) &&
        expr->type->without_array()->is_16bit() &&
        expr_op0_deref->type->without_array()->is_32bit() &&
        _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
       fix_types_in_deref_chain(expr_op0_deref);

       /* Remove f2fmp/i2imp/u2ump. */
       *rvalue = expr_op0_deref;
       return;
    }

    ir_dereference *deref = ir->as_dereference();

    if (deref) {
       ir_variable *var = deref->variable_referenced();
       assert(var);

       if (_mesa_set_search(lower_vars, var) &&
           deref->type->without_array()->is_32bit()) {
          fix_types_in_deref_chain(deref);

          /* Then convert the type up. Optimizations should eliminate this. */
          *rvalue = convert_precision(true, deref);
       }
    }
 }

 ir_visitor_status
 lower_variables_visitor::visit_enter(ir_call *ir)
 {
    void *mem_ctx = ralloc_parent(ir);

    /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
    foreach_two_lists(formal_node, &ir->callee->parameters,
                      actual_node, &ir->actual_parameters) {
       ir_dereference *param_deref =
          ((ir_rvalue *)actual_node)->as_dereference();
       ir_variable *param = (ir_variable *)formal_node;

       if (!param_deref)
             continue;

       ir_variable *var = param_deref->variable_referenced();

       if (_mesa_set_search(lower_vars, var) &&
           param->type->without_array()->is_32bit()) {
          fix_types_in_deref_chain(param_deref);

          /* Create a 32-bit temporary variable for the parameter. */
          ir_variable *new_var =
             new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
          base_ir->insert_before(new_var);

          /* Replace the parameter. */
          actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));

          if (param->data.mode == ir_var_function_in ||
              param->data.mode == ir_var_function_inout) {
             /* Convert to 32 bits for passing in. */
             convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
                                      param_deref->clone(mem_ctx, NULL), true);
          }
          if (param->data.mode == ir_var_function_out ||
              param->data.mode == ir_var_function_inout) {
             /* Convert to 16 bits after returning. */
             convert_split_assignment(param_deref,
                                      new(mem_ctx) ir_dereference_variable(new_var),
                                      false);
          }
       }
    }

    /* Fix the type of return value dereferencies. */
    ir_dereference_variable *ret_deref = ir->return_deref;
    ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;

    if (ret_var &&
        _mesa_set_search(lower_vars, ret_var) &&
        ret_deref->type->without_array()->is_32bit()) {
       /* Create a 32-bit temporary variable. */
       ir_variable *new_var =
          new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
                                   ir_var_temporary);
       base_ir->insert_before(new_var);

       /* Replace the return variable. */
       ret_deref->var = new_var;

       /* Convert to 16 bits after returning. */
       convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
                                new(mem_ctx) ir_dereference_variable(new_var),
                                false);
    }

    return ir_rvalue_enter_visitor::visit_enter(ir);
 }

 }

 void
 lower_precision(const struct gl_shader_compiler_options *options,
                 exec_list *instructions)
 {
    find_precision_visitor v(options);
    find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
    visit_list_elements(&v, instructions);

    lower_variables_visitor vars(options);
    visit_list_elements(&vars, instructions);
 }