src/compiler/nir/nir_opt_comparison_pre.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2018 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #include "nir_instr_set.h"
 #include "nir_search_helpers.h"
 #include "nir_builder.h"
 #include "util/u_vector.h"

 /* Partial redundancy elimination of compares
  *
  * Seaches for comparisons of the form 'a cmp b' that dominate arithmetic
  * instructions like 'b - a'.  The comparison is replaced by the arithmetic
  * instruction, and the result is compared with zero.  For example,
  *
  *       vec1 32 ssa_111 = flt 0.37, ssa_110.w
  *       if ssa_111 {
  *               block block_1:
  *              vec1 32 ssa_112 = fadd ssa_110.w, -0.37
  *              ...
  *
  * becomes
  *
  *       vec1 32 ssa_111 = fadd ssa_110.w, -0.37
  *       vec1 32 ssa_112 = flt 0.0, ssa_111
  *       if ssa_112 {
  *               block block_1:
  *              ...
  */

 struct block_queue {
    /**
     * Stack of blocks from the current location in the CFG to the entry point
     * of the function.
     *
     * This is sort of a poor man's dominator tree.
     */
    struct exec_list blocks;

    /** List of freed block_instructions structures that can be reused. */
    struct exec_list reusable_blocks;
 };

 struct block_instructions {
    struct exec_node node;

    /**
     * Set of comparison instructions from the block that are candidates for
     * being replaced by add instructions.
     */
    struct u_vector instructions;
 };

 static void
 block_queue_init(struct block_queue *bq)
 {
    exec_list_make_empty(&bq->blocks);
    exec_list_make_empty(&bq->reusable_blocks);
 }

 static void
 block_queue_finish(struct block_queue *bq)
 {
    struct block_instructions *n;

    while ((n = (struct block_instructions *) exec_list_pop_head(&bq->blocks)) != NULL) {
       u_vector_finish(&n->instructions);
       free(n);
    }

    while ((n = (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks)) != NULL) {
       free(n);
    }
 }

 static struct block_instructions *
 push_block(struct block_queue *bq)
 {
    struct block_instructions *bi =
       (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks);

    if (bi == NULL) {
       bi = calloc(1, sizeof(struct block_instructions));

       if (bi == NULL)
          return NULL;
    }

    if (!u_vector_init(&bi->instructions,
                       sizeof(nir_alu_instr *),
                       8 * sizeof(nir_alu_instr *))) {
       free(bi);
       return NULL;
    }

    exec_list_push_tail(&bq->blocks, &bi->node);

    return bi;
 }

 static void
 pop_block(struct block_queue *bq, struct block_instructions *bi)
 {
    u_vector_finish(&bi->instructions);
    exec_node_remove(&bi->node);
    exec_list_push_head(&bq->reusable_blocks, &bi->node);
 }

 static void
 add_instruction_for_block(struct block_instructions *bi,
                           nir_alu_instr *alu)
 {
    nir_alu_instr **data =
       u_vector_add(&bi->instructions);

    *data = alu;
 }

 static void
 rewrite_compare_instruction(nir_builder *bld, nir_alu_instr *orig_cmp,
                             nir_alu_instr *orig_add, bool zero_on_left)
 {
    void *const mem_ctx = ralloc_parent(orig_cmp);

    bld->cursor = nir_before_instr(&orig_cmp->instr);

    /* This is somewhat tricky.  The compare instruction may be something like
     * (fcmp, a, b) while the add instruction is something like (fadd, fneg(a),
     * b).  This is problematic because the SSA value for the fneg(a) may not
     * exist yet at the compare instruction.
     *
     * We fabricate the operands of the new add.  This is done using
     * information provided by zero_on_left.  If zero_on_left is true, we know
     * the resulting compare instruction is (fcmp, 0.0, (fadd, x, y)).  If the
     * original compare instruction was (fcmp, a, b), x = b and y = -a.  If
     * zero_on_left is false, the resulting compare instruction is (fcmp,
     * (fadd, x, y), 0.0) and x = a and y = -b.
     */
    nir_ssa_def *const a = nir_ssa_for_alu_src(bld, orig_cmp, 0);
    nir_ssa_def *const b = nir_ssa_for_alu_src(bld, orig_cmp, 1);

    nir_ssa_def *const fadd = zero_on_left
       ? nir_fadd(bld, b, nir_fneg(bld, a))
       : nir_fadd(bld, a, nir_fneg(bld, b));

    nir_ssa_def *const zero =
       nir_imm_floatN_t(bld, 0.0, orig_add->dest.dest.ssa.bit_size);

    nir_ssa_def *const cmp = zero_on_left
       ? nir_build_alu(bld, orig_cmp->op, zero, fadd, NULL, NULL)
       : nir_build_alu(bld, orig_cmp->op, fadd, zero, NULL, NULL);

    /* Generating extra moves of the results is the easy way to make sure the
     * writemasks match the original instructions.  Later optimization passes
     * will clean these up.  This is similar to nir_replace_instr (in
     * nir_search.c).
     */
    nir_alu_instr *mov_add = nir_alu_instr_create(mem_ctx, nir_op_mov);
    mov_add->dest.write_mask = orig_add->dest.write_mask;
    nir_ssa_dest_init(&mov_add->instr, &mov_add->dest.dest,
                      orig_add->dest.dest.ssa.num_components,
                      orig_add->dest.dest.ssa.bit_size, NULL);
    mov_add->src[0].src = nir_src_for_ssa(fadd);

    nir_builder_instr_insert(bld, &mov_add->instr);

    nir_alu_instr *mov_cmp = nir_alu_instr_create(mem_ctx, nir_op_mov);
    mov_cmp->dest.write_mask = orig_cmp->dest.write_mask;
    nir_ssa_dest_init(&mov_cmp->instr, &mov_cmp->dest.dest,
                      orig_cmp->dest.dest.ssa.num_components,
                      orig_cmp->dest.dest.ssa.bit_size, NULL);
    mov_cmp->src[0].src = nir_src_for_ssa(cmp);

    nir_builder_instr_insert(bld, &mov_cmp->instr);

    nir_ssa_def_rewrite_uses(&orig_cmp->dest.dest.ssa,
                             nir_src_for_ssa(&mov_cmp->dest.dest.ssa));
    nir_ssa_def_rewrite_uses(&orig_add->dest.dest.ssa,
                             nir_src_for_ssa(&mov_add->dest.dest.ssa));

    /* We know these have no more uses because we just rewrote them all, so we
     * can remove them.
     */
    nir_instr_remove(&orig_cmp->instr);
    nir_instr_remove(&orig_add->instr);
 }

 static bool
 comparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld)
 {
    bool progress = false;

    struct block_instructions *bi = push_block(bq);
    if (bi == NULL)
       return false;

    /* Starting with the current block, examine each instruction.  If the
     * instruction is a comparison that matches the '±a cmp ±b' pattern, add it
     * to the block_instructions::instructions set.  If the instruction is an
     * add instruction, walk up the block queue looking at the stored
     * instructions.  If a matching comparison is found, move the addition and
     * replace the comparison with a different comparison based on the result
     * of the addition.  All of the blocks in the queue are guaranteed to be
     * dominators of the current block.
     *
     * After processing the current block, recurse into the blocks dominated by
     * the current block.
     */
    nir_foreach_instr_safe(instr, block) {
       if (instr->type != nir_instr_type_alu)
          continue;

       nir_alu_instr *const alu = nir_instr_as_alu(instr);

       if (alu->dest.dest.ssa.num_components != 1)
          continue;

       if (alu->dest.saturate)
          continue;

       static const uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = {0};

       switch (alu->op) {
       case nir_op_fadd: {
          /* If the instruction is fadd, check it against comparison
           * instructions that dominate it.
           */
          struct block_instructions *b =
             (struct block_instructions *) exec_list_get_head_raw(&bq->blocks);

          while (b->node.next != NULL) {
             nir_alu_instr **a;
             bool rewrote_compare = false;

             u_vector_foreach(a, &b->instructions) {
                nir_alu_instr *const cmp = *a;

                if (cmp == NULL)
                   continue;

                /* The operands of both instructions are, with some liberty,
                 * commutative.  Check all four permutations.  The third and
                 * fourth permutations are negations of the first two.
                 */
                if ((nir_alu_srcs_equal(cmp, alu, 0, 0) &&
                     nir_alu_srcs_negative_equal(cmp, alu, 1, 1)) ||
                    (nir_alu_srcs_equal(cmp, alu, 0, 1) &&
                     nir_alu_srcs_negative_equal(cmp, alu, 1, 0))) {
                   /* These are the cases where (A cmp B) matches either (A +
                    * -B) or (-B + A)
                    *
                    *    A cmp B <=> A + -B cmp 0
                    */
                   rewrite_compare_instruction(bld, cmp, alu, false);

                   *a = NULL;
                   rewrote_compare = true;
                   break;
                } else if ((nir_alu_srcs_equal(cmp, alu, 1, 0) &&
                            nir_alu_srcs_negative_equal(cmp, alu, 0, 1)) ||
                           (nir_alu_srcs_equal(cmp, alu, 1, 1) &&
                            nir_alu_srcs_negative_equal(cmp, alu, 0, 0))) {
                   /* This is the case where (A cmp B) matches (B + -A) or (-A
                    * + B).
                    *
                    *    A cmp B <=> 0 cmp B + -A
                    */
                   rewrite_compare_instruction(bld, cmp, alu, true);

                   *a = NULL;
                   rewrote_compare = true;
                   break;
                }
             }

             /* Bail after a compare in the most dominating block is found.
              * This is necessary because 'alu' has been removed from the
              * instruction stream.  Should there be a matching compare in
              * another block, calling rewrite_compare_instruction again will
              * try to operate on a node that is not in the list as if it were
              * in the list.
              *
              * FINISHME: There may be opportunity for additional optimization
              * here.  I discovered this problem due to a shader in Guacamelee.
              * It may be possible to rewrite the matching compares that are
              * encountered later to reuse the result from the compare that was
              * first rewritten.  It's also possible that this is just taken
              * care of by calling the optimization pass repeatedly.
              */
             if (rewrote_compare) {
                progress = true;
                break;
             }

             b = (struct block_instructions *) b->node.next;
          }

          break;
       }

       case nir_op_flt:
       case nir_op_fge:
       case nir_op_fneu:
       case nir_op_feq:
          /* If the instruction is a comparison that is used by an if-statement
           * and neither operand is immediate value 0, add it to the set.
           */
          if (is_used_by_if(alu) &&
              is_not_const_zero(NULL, alu, 0, 1, swizzle) &&
              is_not_const_zero(NULL, alu, 1, 1, swizzle))
             add_instruction_for_block(bi, alu);

          break;

       default:
          break;
       }
    }

    for (unsigned i = 0; i < block->num_dom_children; i++) {
       nir_block *child = block->dom_children[i];

       if (comparison_pre_block(child, bq, bld))
          progress = true;
    }

    pop_block(bq, bi);

    return progress;
 }

 bool
 nir_opt_comparison_pre_impl(nir_function_impl *impl)
 {
    struct block_queue bq;
    nir_builder bld;

    block_queue_init(&bq);
    nir_builder_init(&bld, impl);

    nir_metadata_require(impl, nir_metadata_dominance);

    const bool progress =
       comparison_pre_block(nir_start_block(impl), &bq, &bld);

    block_queue_finish(&bq);

    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
    } else {
       nir_metadata_preserve(impl, nir_metadata_all);
    }

    return progress;
 }

 bool
 nir_opt_comparison_pre(nir_shader *shader)
 {
    bool progress = false;

    nir_foreach_function(function, shader) {
       if (function->impl)
          progress |= nir_opt_comparison_pre_impl(function->impl);
    }

    return progress;
 }
	/*
	* Copyright © 2018 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#include "nir_instr_set.h"
	#include "nir_search_helpers.h"
	#include "nir_builder.h"
	#include "util/u_vector.h"

	/* Partial redundancy elimination of compares
	*
	* Seaches for comparisons of the form 'a cmp b' that dominate arithmetic
	* instructions like 'b - a'. The comparison is replaced by the arithmetic
	* instruction, and the result is compared with zero. For example,
	*
	* vec1 32 ssa_111 = flt 0.37, ssa_110.w
	* if ssa_111 {
	* block block_1:
	* vec1 32 ssa_112 = fadd ssa_110.w, -0.37
	* ...
	*
	* becomes
	*
	* vec1 32 ssa_111 = fadd ssa_110.w, -0.37
	* vec1 32 ssa_112 = flt 0.0, ssa_111
	* if ssa_112 {
	* block block_1:
	* ...
	*/

	struct block_queue {
	/**
	* Stack of blocks from the current location in the CFG to the entry point
	* of the function.
	*
	* This is sort of a poor man's dominator tree.
	*/
	struct exec_list blocks;

	/** List of freed block_instructions structures that can be reused. */
	struct exec_list reusable_blocks;
	};

	struct block_instructions {
	struct exec_node node;

	/**
	* Set of comparison instructions from the block that are candidates for
	* being replaced by add instructions.
	*/
	struct u_vector instructions;
	};

	static void
	block_queue_init(struct block_queue *bq)
	{
	exec_list_make_empty(&bq->blocks);
	exec_list_make_empty(&bq->reusable_blocks);
	}

	static void
	block_queue_finish(struct block_queue *bq)
	{
	struct block_instructions *n;

	while ((n = (struct block_instructions *) exec_list_pop_head(&bq->blocks)) != NULL) {
	u_vector_finish(&n->instructions);
	free(n);
	}

	while ((n = (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks)) != NULL) {
	free(n);
	}
	}

	static struct block_instructions *
	push_block(struct block_queue *bq)
	{
	struct block_instructions *bi =
	(struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks);

	if (bi == NULL) {
	bi = calloc(1, sizeof(struct block_instructions));

	if (bi == NULL)
	return NULL;
	}

	if (!u_vector_init(&bi->instructions,
	sizeof(nir_alu_instr *),
	8 * sizeof(nir_alu_instr *))) {
	free(bi);
	return NULL;
	}

	exec_list_push_tail(&bq->blocks, &bi->node);

	return bi;
	}

	static void
	pop_block(struct block_queue bq, struct block_instructions bi)
	{
	u_vector_finish(&bi->instructions);
	exec_node_remove(&bi->node);
	exec_list_push_head(&bq->reusable_blocks, &bi->node);
	}

	static void
	add_instruction_for_block(struct block_instructions *bi,
	nir_alu_instr *alu)
	{
	nir_alu_instr **data =
	u_vector_add(&bi->instructions);

	*data = alu;
	}

	static void
	rewrite_compare_instruction(nir_builder bld, nir_alu_instr orig_cmp,
	nir_alu_instr *orig_add, bool zero_on_left)
	{
	void *const mem_ctx = ralloc_parent(orig_cmp);

	bld->cursor = nir_before_instr(&orig_cmp->instr);

	/* This is somewhat tricky. The compare instruction may be something like
	* (fcmp, a, b) while the add instruction is something like (fadd, fneg(a),
	* b). This is problematic because the SSA value for the fneg(a) may not
	* exist yet at the compare instruction.
	*
	* We fabricate the operands of the new add. This is done using
	* information provided by zero_on_left. If zero_on_left is true, we know
	* the resulting compare instruction is (fcmp, 0.0, (fadd, x, y)). If the
	* original compare instruction was (fcmp, a, b), x = b and y = -a. If
	* zero_on_left is false, the resulting compare instruction is (fcmp,
	* (fadd, x, y), 0.0) and x = a and y = -b.
	*/
	nir_ssa_def *const a = nir_ssa_for_alu_src(bld, orig_cmp, 0);
	nir_ssa_def *const b = nir_ssa_for_alu_src(bld, orig_cmp, 1);

	nir_ssa_def *const fadd = zero_on_left
	? nir_fadd(bld, b, nir_fneg(bld, a))
	: nir_fadd(bld, a, nir_fneg(bld, b));

	nir_ssa_def *const zero =
	nir_imm_floatN_t(bld, 0.0, orig_add->dest.dest.ssa.bit_size);

	nir_ssa_def *const cmp = zero_on_left
	? nir_build_alu(bld, orig_cmp->op, zero, fadd, NULL, NULL)
	: nir_build_alu(bld, orig_cmp->op, fadd, zero, NULL, NULL);

	/* Generating extra moves of the results is the easy way to make sure the
	* writemasks match the original instructions. Later optimization passes
	* will clean these up. This is similar to nir_replace_instr (in
	* nir_search.c).
	*/
	nir_alu_instr *mov_add = nir_alu_instr_create(mem_ctx, nir_op_mov);
	mov_add->dest.write_mask = orig_add->dest.write_mask;
	nir_ssa_dest_init(&mov_add->instr, &mov_add->dest.dest,
	orig_add->dest.dest.ssa.num_components,
	orig_add->dest.dest.ssa.bit_size, NULL);
	mov_add->src[0].src = nir_src_for_ssa(fadd);

	nir_builder_instr_insert(bld, &mov_add->instr);

	nir_alu_instr *mov_cmp = nir_alu_instr_create(mem_ctx, nir_op_mov);
	mov_cmp->dest.write_mask = orig_cmp->dest.write_mask;
	nir_ssa_dest_init(&mov_cmp->instr, &mov_cmp->dest.dest,
	orig_cmp->dest.dest.ssa.num_components,
	orig_cmp->dest.dest.ssa.bit_size, NULL);
	mov_cmp->src[0].src = nir_src_for_ssa(cmp);

	nir_builder_instr_insert(bld, &mov_cmp->instr);

	nir_ssa_def_rewrite_uses(&orig_cmp->dest.dest.ssa,
	nir_src_for_ssa(&mov_cmp->dest.dest.ssa));
	nir_ssa_def_rewrite_uses(&orig_add->dest.dest.ssa,
	nir_src_for_ssa(&mov_add->dest.dest.ssa));

	/* We know these have no more uses because we just rewrote them all, so we
	* can remove them.
	*/
	nir_instr_remove(&orig_cmp->instr);
	nir_instr_remove(&orig_add->instr);
	}

	static bool
	comparison_pre_block(nir_block block, struct block_queue bq, nir_builder *bld)
	{
	bool progress = false;

	struct block_instructions *bi = push_block(bq);
	if (bi == NULL)
	return false;

	/* Starting with the current block, examine each instruction. If the
	* instruction is a comparison that matches the '±a cmp ±b' pattern, add it
	* to the block_instructions::instructions set. If the instruction is an
	* add instruction, walk up the block queue looking at the stored
	* instructions. If a matching comparison is found, move the addition and
	* replace the comparison with a different comparison based on the result
	* of the addition. All of the blocks in the queue are guaranteed to be
	* dominators of the current block.
	*
	* After processing the current block, recurse into the blocks dominated by
	* the current block.
	*/
	nir_foreach_instr_safe(instr, block) {
	if (instr->type != nir_instr_type_alu)
	continue;

	nir_alu_instr *const alu = nir_instr_as_alu(instr);

	if (alu->dest.dest.ssa.num_components != 1)
	continue;

	if (alu->dest.saturate)
	continue;

	static const uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = {0};

	switch (alu->op) {
	case nir_op_fadd: {
	/* If the instruction is fadd, check it against comparison
	* instructions that dominate it.
	*/
	struct block_instructions *b =
	(struct block_instructions *) exec_list_get_head_raw(&bq->blocks);

	while (b->node.next != NULL) {
	nir_alu_instr **a;
	bool rewrote_compare = false;

	u_vector_foreach(a, &b->instructions) {
	nir_alu_instr const cmp = a;

	if (cmp == NULL)
	continue;

	/* The operands of both instructions are, with some liberty,
	* commutative. Check all four permutations. The third and
	* fourth permutations are negations of the first two.
	*/
	if ((nir_alu_srcs_equal(cmp, alu, 0, 0) &&
	nir_alu_srcs_negative_equal(cmp, alu, 1, 1)) \|\|
	(nir_alu_srcs_equal(cmp, alu, 0, 1) &&
	nir_alu_srcs_negative_equal(cmp, alu, 1, 0))) {
	/* These are the cases where (A cmp B) matches either (A +
	* -B) or (-B + A)
	*
	* A cmp B <=> A + -B cmp 0
	*/
	rewrite_compare_instruction(bld, cmp, alu, false);

	*a = NULL;
	rewrote_compare = true;
	break;
	} else if ((nir_alu_srcs_equal(cmp, alu, 1, 0) &&
	nir_alu_srcs_negative_equal(cmp, alu, 0, 1)) \|\|
	(nir_alu_srcs_equal(cmp, alu, 1, 1) &&
	nir_alu_srcs_negative_equal(cmp, alu, 0, 0))) {
	/* This is the case where (A cmp B) matches (B + -A) or (-A
	* + B).
	*
	* A cmp B <=> 0 cmp B + -A
	*/
	rewrite_compare_instruction(bld, cmp, alu, true);

	*a = NULL;
	rewrote_compare = true;
	break;
	}
	}

	/* Bail after a compare in the most dominating block is found.
	* This is necessary because 'alu' has been removed from the
	* instruction stream. Should there be a matching compare in
	* another block, calling rewrite_compare_instruction again will
	* try to operate on a node that is not in the list as if it were
	* in the list.
	*
	* FINISHME: There may be opportunity for additional optimization
	* here. I discovered this problem due to a shader in Guacamelee.
	* It may be possible to rewrite the matching compares that are
	* encountered later to reuse the result from the compare that was
	* first rewritten. It's also possible that this is just taken
	* care of by calling the optimization pass repeatedly.
	*/
	if (rewrote_compare) {
	progress = true;
	break;
	}

	b = (struct block_instructions *) b->node.next;
	}

	break;
	}

	case nir_op_flt:
	case nir_op_fge:
	case nir_op_fneu:
	case nir_op_feq:
	/* If the instruction is a comparison that is used by an if-statement
	* and neither operand is immediate value 0, add it to the set.
	*/
	if (is_used_by_if(alu) &&
	is_not_const_zero(NULL, alu, 0, 1, swizzle) &&
	is_not_const_zero(NULL, alu, 1, 1, swizzle))
	add_instruction_for_block(bi, alu);

	break;

	default:
	break;
	}
	}

	for (unsigned i = 0; i < block->num_dom_children; i++) {
	nir_block *child = block->dom_children[i];

	if (comparison_pre_block(child, bq, bld))
	progress = true;
	}

	pop_block(bq, bi);

	return progress;
	}

	bool
	nir_opt_comparison_pre_impl(nir_function_impl *impl)
	{
	struct block_queue bq;
	nir_builder bld;

	block_queue_init(&bq);
	nir_builder_init(&bld, impl);

	nir_metadata_require(impl, nir_metadata_dominance);

	const bool progress =
	comparison_pre_block(nir_start_block(impl), &bq, &bld);

	block_queue_finish(&bq);

	if (progress) {
	nir_metadata_preserve(impl, nir_metadata_block_index \|
	nir_metadata_dominance);
	} else {
	nir_metadata_preserve(impl, nir_metadata_all);
	}

	return progress;
	}

	bool
	nir_opt_comparison_pre(nir_shader *shader)
	{
	bool progress = false;

	nir_foreach_function(function, shader) {
	if (function->impl)
	progress \|= nir_opt_comparison_pre_impl(function->impl);
	}

	return progress;
	}