src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2016 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #include "compiler/nir/nir_builder.h"
 #include "brw_nir.h"

 /**
  * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
  *
  * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
  * definition of the patch header layouts):
  *
  *    "HW Bug: The Tessellation stage will incorrectly add domain points
  *     along patch edges under the following conditions, which may result
  *     in conformance failures and/or cracking artifacts:
  *
  *       * QUAD domain
  *       * INTEGER partitioning
  *       * All three TessFactors in a given U or V direction (e.g., V
  *         direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
  *       * All three TessFactors in the other direction are > 1.0 and all
  *         round up to the same integer value (e.g, U direction:
  *         VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
  *
  *     The suggested workaround (to be implemented as part of the postamble
  *     to the HS shader in the HS kernel) is:
  *
  *     if (
  *        (TF[UEQ0] > 1.0) ||
  *        (TF[VEQ0] > 1.0) ||
  *        (TF[UEQ1] > 1.0) ||
  *        (TF[VEQ1] > 1.0) ||
  *        (TF[INSIDE_U] > 1.0) ||
  *        (TF[INSIDE_V] > 1.0) )
  *     {
  *        TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
  *        TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
  *     }"
  *
  * There's a subtlety here.  Intel internal HSD-ES bug 1208668495 notes
  * that the above workaround fails to fix certain GL/ES CTS tests which
  * have inside tessellation factors of -1.0.  This can be explained by
  * a quote from the ARB_tessellation_shader specification:
  *
  *    "If "equal_spacing" is used, the floating-point tessellation level is
  *     first clamped to the range [1,<max>], where <max> is implementation-
  *     dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
  *
  * In other words, the actual inner tessellation factor used is
  * clamp(TF[INSIDE_*], 1.0, 64.0).  So we want to compare the clamped
  * value against 1.0.  To accomplish this, we change the comparison from
  * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
  */

 static inline nir_ssa_def *
 load_output(nir_builder *b, int num_components, int offset)
 {
    nir_intrinsic_instr *load =
       nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
    nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
    load->num_components = num_components;
    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
    nir_intrinsic_set_base(load, offset);

    nir_builder_instr_insert(b, &load->instr);

    return &load->dest.ssa;
 }

 static inline void
 store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps)
 {
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
    store->num_components = comps;
    nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
    store->src[0] = nir_src_for_ssa(value);
    store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
    nir_builder_instr_insert(b, &store->instr);
 }

 static void
 emit_quads_workaround(nir_builder *b, nir_block *block)
 {
    b->cursor = nir_after_block_before_jump(block);

    nir_ssa_def *inner = load_output(b, 2, 0);
    nir_ssa_def *outer = load_output(b, 4, 1);

    nir_ssa_def *any_greater_than_1 =
        nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
                   nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));

    nir_if *if_stmt = nir_if_create(b->shader);
    if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
    nir_builder_cf_insert(b, &if_stmt->cf_node);

    /* Fill out the new then-block */
    b->cursor = nir_after_cf_list(&if_stmt->then_list);

    store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
                                 nir_imm_float(b, 2.0f), inner), 0, 2);
 }

 void
 brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
 {
    assert(nir->stage == MESA_SHADER_TESS_CTRL);

    nir_function_impl *impl = nir_shader_get_entrypoint(nir);

    nir_builder b;
    nir_builder_init(&b, impl);

    /* emit_quads_workaround() inserts an if statement into each block,
     * which splits it in two.  This changes the set of predecessors of
     * the end block.  We want to process the original set, so to be safe,
     * save it off to an array first.
     */
    const unsigned num_end_preds = impl->end_block->predecessors->entries;
    nir_block *end_preds[num_end_preds];
    unsigned i = 0;
    struct set_entry *entry;

    set_foreach(impl->end_block->predecessors, entry) {
       end_preds[i++] = (nir_block *) entry->key;
    }

    for (i = 0; i < num_end_preds; i++) {
       emit_quads_workaround(&b, end_preds[i]);
    }

    nir_metadata_preserve(impl, 0);
 }
	/*
	* Copyright © 2016 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#include "compiler/nir/nir_builder.h"
	#include "brw_nir.h"

	/**
	* Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
	*
	* From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
	* definition of the patch header layouts):
	*
	* "HW Bug: The Tessellation stage will incorrectly add domain points
	* along patch edges under the following conditions, which may result
	* in conformance failures and/or cracking artifacts:
	*
	* * QUAD domain
	* * INTEGER partitioning
	* * All three TessFactors in a given U or V direction (e.g., V
	* direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
	* * All three TessFactors in the other direction are > 1.0 and all
	* round up to the same integer value (e.g, U direction:
	* VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
	*
	* The suggested workaround (to be implemented as part of the postamble
	* to the HS shader in the HS kernel) is:
	*
	* if (
	* (TF[UEQ0] > 1.0) \|\|
	* (TF[VEQ0] > 1.0) \|\|
	* (TF[UEQ1] > 1.0) \|\|
	* (TF[VEQ1] > 1.0) \|\|
	* (TF[INSIDE_U] > 1.0) \|\|
	* (TF[INSIDE_V] > 1.0) )
	* {
	* TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
	* TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
	* }"
	*
	* There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes
	* that the above workaround fails to fix certain GL/ES CTS tests which
	* have inside tessellation factors of -1.0. This can be explained by
	* a quote from the ARB_tessellation_shader specification:
	*
	* "If "equal_spacing" is used, the floating-point tessellation level is
	* first clamped to the range [1,<max>], where <max> is implementation-
	* dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
	*
	* In other words, the actual inner tessellation factor used is
	* clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped
	* value against 1.0. To accomplish this, we change the comparison from
	* (TF[INSIDE_] == 1.0) to (TF[INSIDE_] <= 1.0).
	*/

	static inline nir_ssa_def *
	load_output(nir_builder *b, int num_components, int offset)
	{
	nir_intrinsic_instr *load =
	nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
	nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
	load->num_components = num_components;
	load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
	nir_intrinsic_set_base(load, offset);

	nir_builder_instr_insert(b, &load->instr);

	return &load->dest.ssa;
	}

	static inline void
	store_output(nir_builder b, nir_ssa_def value, int offset, unsigned comps)
	{
	nir_intrinsic_instr *store =
	nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
	store->num_components = comps;
	nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
	store->src[0] = nir_src_for_ssa(value);
	store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
	nir_builder_instr_insert(b, &store->instr);
	}

	static void
	emit_quads_workaround(nir_builder b, nir_block block)
	{
	b->cursor = nir_after_block_before_jump(block);

	nir_ssa_def *inner = load_output(b, 2, 0);
	nir_ssa_def *outer = load_output(b, 4, 1);

	nir_ssa_def *any_greater_than_1 =
	nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
	nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));

	nir_if *if_stmt = nir_if_create(b->shader);
	if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
	nir_builder_cf_insert(b, &if_stmt->cf_node);

	/* Fill out the new then-block */
	b->cursor = nir_after_cf_list(&if_stmt->then_list);

	store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
	nir_imm_float(b, 2.0f), inner), 0, 2);
	}

	void
	brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
	{
	assert(nir->stage == MESA_SHADER_TESS_CTRL);

	nir_function_impl *impl = nir_shader_get_entrypoint(nir);

	nir_builder b;
	nir_builder_init(&b, impl);

	/* emit_quads_workaround() inserts an if statement into each block,
	* which splits it in two. This changes the set of predecessors of
	* the end block. We want to process the original set, so to be safe,
	* save it off to an array first.
	*/
	const unsigned num_end_preds = impl->end_block->predecessors->entries;
	nir_block *end_preds[num_end_preds];
	unsigned i = 0;
	struct set_entry *entry;

	set_foreach(impl->end_block->predecessors, entry) {
	end_preds[i++] = (nir_block *) entry->key;
	}

	for (i = 0; i < num_end_preds; i++) {
	emit_quads_workaround(&b, end_preds[i]);
	}

	nir_metadata_preserve(impl, 0);
	}