blob: 85663c83615ee7b4b8cc7a5a9fd9a43769e71223 [file] [log] [blame]
/*
* Copyright © 2018 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "nir.h"
/* This pass computes for each ssa definition if it is uniform.
* That is, the variable has the same value for all invocations
* of the group.
*
* This divergence analysis pass expects the shader to be in LCSSA-form.
*
* This algorithm implements "The Simple Divergence Analysis" from
* Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
* Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
* ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
*/
struct divergence_state {
const nir_divergence_options options;
const gl_shader_stage stage;
/** current control flow state */
/* True if some loop-active invocations might take a different control-flow path.
* A divergent break does not cause subsequent control-flow to be considered
* divergent because those invocations are no longer active in the loop.
* For a divergent if, both sides are considered divergent flow because
* the other side is still loop-active. */
bool divergent_loop_cf;
/* True if a divergent continue happened since the loop header */
bool divergent_loop_continue;
/* True if a divergent break happened since the loop header */
bool divergent_loop_break;
/* True if we visit the block for the fist time */
bool first_visit;
};
static bool
visit_cf_list(struct exec_list *list, struct divergence_state *state);
static bool
visit_alu(nir_alu_instr *instr)
{
if (instr->dest.dest.ssa.divergent)
return false;
unsigned num_src = nir_op_infos[instr->op].num_inputs;
for (unsigned i = 0; i < num_src; i++) {
if (instr->src[i].src.ssa->divergent) {
instr->dest.dest.ssa.divergent = true;
return true;
}
}
return false;
}
static bool
visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
{
if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
return false;
if (instr->dest.ssa.divergent)
return false;
nir_divergence_options options = state->options;
gl_shader_stage stage = state->stage;
bool is_divergent = false;
switch (instr->intrinsic) {
/* Intrinsics which are always uniform */
case nir_intrinsic_shader_clock:
case nir_intrinsic_ballot:
case nir_intrinsic_read_invocation:
case nir_intrinsic_read_first_invocation:
case nir_intrinsic_vote_any:
case nir_intrinsic_vote_all:
case nir_intrinsic_vote_feq:
case nir_intrinsic_vote_ieq:
case nir_intrinsic_load_work_dim:
case nir_intrinsic_load_work_group_id:
case nir_intrinsic_load_num_work_groups:
case nir_intrinsic_load_local_group_size:
case nir_intrinsic_load_subgroup_id:
case nir_intrinsic_load_num_subgroups:
case nir_intrinsic_load_subgroup_size:
case nir_intrinsic_load_subgroup_eq_mask:
case nir_intrinsic_load_subgroup_ge_mask:
case nir_intrinsic_load_subgroup_gt_mask:
case nir_intrinsic_load_subgroup_le_mask:
case nir_intrinsic_load_subgroup_lt_mask:
case nir_intrinsic_first_invocation:
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_first_vertex:
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_is_indexed_draw:
case nir_intrinsic_load_viewport_scale:
case nir_intrinsic_load_alpha_ref_float:
case nir_intrinsic_load_user_clip_plane:
case nir_intrinsic_load_viewport_x_scale:
case nir_intrinsic_load_viewport_y_scale:
case nir_intrinsic_load_viewport_z_scale:
case nir_intrinsic_load_viewport_offset:
case nir_intrinsic_load_viewport_z_offset:
case nir_intrinsic_load_blend_const_color_a_float:
case nir_intrinsic_load_blend_const_color_b_float:
case nir_intrinsic_load_blend_const_color_g_float:
case nir_intrinsic_load_blend_const_color_r_float:
case nir_intrinsic_load_blend_const_color_rgba:
case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
is_divergent = false;
break;
/* Intrinsics with divergence depending on shader stage and hardware */
case nir_intrinsic_load_input:
is_divergent = instr->src[0].ssa->divergent;
if (stage == MESA_SHADER_FRAGMENT)
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
else if (stage == MESA_SHADER_TESS_EVAL)
is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
else
is_divergent = true;
break;
case nir_intrinsic_load_per_vertex_input:
is_divergent = instr->src[0].ssa->divergent ||
instr->src[1].ssa->divergent;
if (stage == MESA_SHADER_TESS_CTRL)
is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
if (stage == MESA_SHADER_TESS_EVAL)
is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
else
is_divergent = true;
break;
case nir_intrinsic_load_input_vertex:
is_divergent = instr->src[1].ssa->divergent;
assert(stage == MESA_SHADER_FRAGMENT);
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
break;
case nir_intrinsic_load_output:
assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
is_divergent = instr->src[0].ssa->divergent;
if (stage == MESA_SHADER_TESS_CTRL)
is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
else
is_divergent = true;
break;
case nir_intrinsic_load_per_vertex_output:
assert(stage == MESA_SHADER_TESS_CTRL);
is_divergent = instr->src[0].ssa->divergent ||
instr->src[1].ssa->divergent ||
!(options & nir_divergence_single_patch_per_tcs_subgroup);
break;
case nir_intrinsic_load_layer_id:
case nir_intrinsic_load_front_face:
assert(stage == MESA_SHADER_FRAGMENT);
is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
break;
case nir_intrinsic_load_view_index:
assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
if (options & nir_divergence_view_index_uniform)
is_divergent = false;
else if (stage == MESA_SHADER_FRAGMENT)
is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
break;
case nir_intrinsic_load_fs_input_interp_deltas:
assert(stage == MESA_SHADER_FRAGMENT);
is_divergent = instr->src[0].ssa->divergent;
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
break;
case nir_intrinsic_load_primitive_id:
if (stage == MESA_SHADER_FRAGMENT)
is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
else if (stage == MESA_SHADER_TESS_CTRL)
is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
else if (stage == MESA_SHADER_TESS_EVAL)
is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
else if (stage == MESA_SHADER_GEOMETRY)
is_divergent = true;
else
unreachable("Invalid stage for load_primitive_id");
break;
case nir_intrinsic_load_tess_level_inner:
case nir_intrinsic_load_tess_level_outer:
if (stage == MESA_SHADER_TESS_CTRL)
is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
else if (stage == MESA_SHADER_TESS_EVAL)
is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
else
unreachable("Invalid stage for load_primitive_tess_level_*");
break;
case nir_intrinsic_load_patch_vertices_in:
if (stage == MESA_SHADER_TESS_EVAL)
is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
else
assert(stage == MESA_SHADER_TESS_CTRL);
break;
/* Clustered reductions are uniform if cluster_size == subgroup_size or
* the source is uniform and the operation is invariant.
* Inclusive scans are uniform if
* the source is uniform and the operation is invariant
*/
case nir_intrinsic_reduce:
if (nir_intrinsic_cluster_size(instr) == 0)
return false;
/* fallthrough */
case nir_intrinsic_inclusive_scan: {
nir_op op = nir_intrinsic_reduction_op(instr);
is_divergent = instr->src[0].ssa->divergent;
if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
op != nir_op_iand && op != nir_op_ior)
is_divergent = true;
break;
}
/* Intrinsics with divergence depending on sources */
case nir_intrinsic_ballot_bitfield_extract:
case nir_intrinsic_ballot_find_lsb:
case nir_intrinsic_ballot_find_msb:
case nir_intrinsic_ballot_bit_count_reduce:
case nir_intrinsic_shuffle_xor:
case nir_intrinsic_shuffle_up:
case nir_intrinsic_shuffle_down:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_load_deref:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_shared:
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_push_constant:
case nir_intrinsic_load_constant:
case nir_intrinsic_load_sample_pos_from_id:
case nir_intrinsic_load_kernel_input:
case nir_intrinsic_image_load:
case nir_intrinsic_image_deref_load:
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_image_samples:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_bindless_image_samples:
case nir_intrinsic_get_buffer_size:
case nir_intrinsic_image_size:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_bindless_image_size:
case nir_intrinsic_copy_deref:
case nir_intrinsic_deref_buffer_array_length:
case nir_intrinsic_vulkan_resource_index:
case nir_intrinsic_vulkan_resource_reindex:
case nir_intrinsic_load_vulkan_descriptor:
case nir_intrinsic_atomic_counter_read:
case nir_intrinsic_atomic_counter_read_deref:
case nir_intrinsic_quad_swizzle_amd:
case nir_intrinsic_masked_swizzle_amd: {
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
for (unsigned i = 0; i < num_srcs; i++) {
if (instr->src[i].ssa->divergent) {
is_divergent = true;
break;
}
}
break;
}
case nir_intrinsic_shuffle:
is_divergent = instr->src[0].ssa->divergent &&
instr->src[1].ssa->divergent;
break;
/* Intrinsics which are always divergent */
case nir_intrinsic_load_color0:
case nir_intrinsic_load_color1:
case nir_intrinsic_load_param:
case nir_intrinsic_load_sample_id:
case nir_intrinsic_load_sample_id_no_per_sample:
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_interpolated_input:
case nir_intrinsic_load_barycentric_pixel:
case nir_intrinsic_load_barycentric_centroid:
case nir_intrinsic_load_barycentric_sample:
case nir_intrinsic_load_barycentric_model:
case nir_intrinsic_load_barycentric_at_sample:
case nir_intrinsic_load_barycentric_at_offset:
case nir_intrinsic_interp_deref_at_offset:
case nir_intrinsic_interp_deref_at_sample:
case nir_intrinsic_interp_deref_at_centroid:
case nir_intrinsic_interp_deref_at_vertex:
case nir_intrinsic_load_tess_coord:
case nir_intrinsic_load_point_coord:
case nir_intrinsic_load_line_coord:
case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_sample_pos:
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_local_invocation_id:
case nir_intrinsic_load_local_invocation_index:
case nir_intrinsic_load_global_invocation_id:
case nir_intrinsic_load_global_invocation_index:
case nir_intrinsic_load_subgroup_invocation:
case nir_intrinsic_load_helper_invocation:
case nir_intrinsic_is_helper_invocation:
case nir_intrinsic_load_scratch:
case nir_intrinsic_deref_atomic_add:
case nir_intrinsic_deref_atomic_imin:
case nir_intrinsic_deref_atomic_umin:
case nir_intrinsic_deref_atomic_imax:
case nir_intrinsic_deref_atomic_umax:
case nir_intrinsic_deref_atomic_and:
case nir_intrinsic_deref_atomic_or:
case nir_intrinsic_deref_atomic_xor:
case nir_intrinsic_deref_atomic_exchange:
case nir_intrinsic_deref_atomic_comp_swap:
case nir_intrinsic_deref_atomic_fadd:
case nir_intrinsic_deref_atomic_fmin:
case nir_intrinsic_deref_atomic_fmax:
case nir_intrinsic_deref_atomic_fcomp_swap:
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_ssbo_atomic_fadd:
case nir_intrinsic_ssbo_atomic_fmax:
case nir_intrinsic_ssbo_atomic_fmin:
case nir_intrinsic_ssbo_atomic_fcomp_swap:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_imin:
case nir_intrinsic_image_deref_atomic_umin:
case nir_intrinsic_image_deref_atomic_imax:
case nir_intrinsic_image_deref_atomic_umax:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_atomic_fadd:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_atomic_fadd:
case nir_intrinsic_bindless_image_atomic_add:
case nir_intrinsic_bindless_image_atomic_imin:
case nir_intrinsic_bindless_image_atomic_umin:
case nir_intrinsic_bindless_image_atomic_imax:
case nir_intrinsic_bindless_image_atomic_umax:
case nir_intrinsic_bindless_image_atomic_and:
case nir_intrinsic_bindless_image_atomic_or:
case nir_intrinsic_bindless_image_atomic_xor:
case nir_intrinsic_bindless_image_atomic_exchange:
case nir_intrinsic_bindless_image_atomic_comp_swap:
case nir_intrinsic_bindless_image_atomic_fadd:
case nir_intrinsic_shared_atomic_add:
case nir_intrinsic_shared_atomic_imin:
case nir_intrinsic_shared_atomic_umin:
case nir_intrinsic_shared_atomic_imax:
case nir_intrinsic_shared_atomic_umax:
case nir_intrinsic_shared_atomic_and:
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_shared_atomic_exchange:
case nir_intrinsic_shared_atomic_comp_swap:
case nir_intrinsic_shared_atomic_fadd:
case nir_intrinsic_shared_atomic_fmin:
case nir_intrinsic_shared_atomic_fmax:
case nir_intrinsic_shared_atomic_fcomp_swap:
case nir_intrinsic_global_atomic_add:
case nir_intrinsic_global_atomic_imin:
case nir_intrinsic_global_atomic_umin:
case nir_intrinsic_global_atomic_imax:
case nir_intrinsic_global_atomic_umax:
case nir_intrinsic_global_atomic_and:
case nir_intrinsic_global_atomic_or:
case nir_intrinsic_global_atomic_xor:
case nir_intrinsic_global_atomic_exchange:
case nir_intrinsic_global_atomic_comp_swap:
case nir_intrinsic_global_atomic_fadd:
case nir_intrinsic_global_atomic_fmin:
case nir_intrinsic_global_atomic_fmax:
case nir_intrinsic_global_atomic_fcomp_swap:
case nir_intrinsic_atomic_counter_add:
case nir_intrinsic_atomic_counter_min:
case nir_intrinsic_atomic_counter_max:
case nir_intrinsic_atomic_counter_and:
case nir_intrinsic_atomic_counter_or:
case nir_intrinsic_atomic_counter_xor:
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_pre_dec:
case nir_intrinsic_atomic_counter_post_dec:
case nir_intrinsic_atomic_counter_exchange:
case nir_intrinsic_atomic_counter_comp_swap:
case nir_intrinsic_atomic_counter_add_deref:
case nir_intrinsic_atomic_counter_min_deref:
case nir_intrinsic_atomic_counter_max_deref:
case nir_intrinsic_atomic_counter_and_deref:
case nir_intrinsic_atomic_counter_or_deref:
case nir_intrinsic_atomic_counter_xor_deref:
case nir_intrinsic_atomic_counter_inc_deref:
case nir_intrinsic_atomic_counter_pre_dec_deref:
case nir_intrinsic_atomic_counter_post_dec_deref:
case nir_intrinsic_atomic_counter_exchange_deref:
case nir_intrinsic_atomic_counter_comp_swap_deref:
case nir_intrinsic_exclusive_scan:
case nir_intrinsic_ballot_bit_count_exclusive:
case nir_intrinsic_ballot_bit_count_inclusive:
case nir_intrinsic_write_invocation_amd:
case nir_intrinsic_mbcnt_amd:
case nir_intrinsic_elect:
is_divergent = true;
break;
default:
#ifdef NDEBUG
is_divergent = true;
break;
#else
nir_print_instr(&instr->instr, stderr);
unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
#endif
}
instr->dest.ssa.divergent = is_divergent;
return is_divergent;
}
static bool
visit_tex(nir_tex_instr *instr)
{
if (instr->dest.ssa.divergent)
return false;
bool is_divergent = false;
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
case nir_tex_src_sampler_deref:
case nir_tex_src_sampler_handle:
case nir_tex_src_sampler_offset:
is_divergent |= instr->src[i].src.ssa->divergent &&
instr->sampler_non_uniform;
break;
case nir_tex_src_texture_deref:
case nir_tex_src_texture_handle:
case nir_tex_src_texture_offset:
is_divergent |= instr->src[i].src.ssa->divergent &&
instr->texture_non_uniform;
break;
default:
is_divergent |= instr->src[i].src.ssa->divergent;
break;
}
}
instr->dest.ssa.divergent = is_divergent;
return is_divergent;
}
static bool
visit_load_const(nir_load_const_instr *instr)
{
return false;
}
static bool
visit_ssa_undef(nir_ssa_undef_instr *instr)
{
return false;
}
static bool
nir_variable_mode_is_uniform(nir_variable_mode mode) {
switch (mode) {
case nir_var_uniform:
case nir_var_mem_ubo:
case nir_var_mem_ssbo:
case nir_var_mem_shared:
case nir_var_mem_global:
return true;
default:
return false;
}
}
static bool
nir_variable_is_uniform(nir_variable *var, struct divergence_state *state)
{
if (nir_variable_mode_is_uniform(var->data.mode))
return true;
if (state->stage == MESA_SHADER_FRAGMENT &&
(state->options & nir_divergence_single_prim_per_subgroup) &&
var->data.mode == nir_var_shader_in &&
var->data.interpolation == INTERP_MODE_FLAT)
return true;
if (state->stage == MESA_SHADER_TESS_CTRL &&
(state->options & nir_divergence_single_patch_per_tcs_subgroup) &&
var->data.mode == nir_var_shader_out && var->data.patch)
return true;
if (state->stage == MESA_SHADER_TESS_EVAL &&
(state->options & nir_divergence_single_patch_per_tes_subgroup) &&
var->data.mode == nir_var_shader_in && var->data.patch)
return true;
return false;
}
static bool
visit_deref(nir_deref_instr *deref, struct divergence_state *state)
{
if (deref->dest.ssa.divergent)
return false;
bool is_divergent = false;
switch (deref->deref_type) {
case nir_deref_type_var:
is_divergent = !nir_variable_is_uniform(deref->var, state);
break;
case nir_deref_type_array:
case nir_deref_type_ptr_as_array:
is_divergent = deref->arr.index.ssa->divergent;
/* fallthrough */
case nir_deref_type_struct:
case nir_deref_type_array_wildcard:
is_divergent |= deref->parent.ssa->divergent;
break;
case nir_deref_type_cast:
is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
deref->parent.ssa->divergent;
break;
}
deref->dest.ssa.divergent = is_divergent;
return is_divergent;
}
static bool
visit_jump(nir_jump_instr *jump, struct divergence_state *state)
{
switch (jump->type) {
case nir_jump_continue:
if (state->divergent_loop_continue)
return false;
if (state->divergent_loop_cf)
state->divergent_loop_continue = true;
return state->divergent_loop_continue;
case nir_jump_break:
if (state->divergent_loop_break)
return false;
if (state->divergent_loop_cf)
state->divergent_loop_break = true;
return state->divergent_loop_break;
case nir_jump_return:
unreachable("NIR divergence analysis: Unsupported return instruction.");
break;
case nir_jump_goto:
case nir_jump_goto_if:
unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
break;
}
return false;
}
static bool
set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
{
def->divergent = false;
return true;
}
static bool
visit_block(nir_block *block, struct divergence_state *state)
{
bool has_changed = false;
nir_foreach_instr(instr, block) {
/* phis are handled when processing the branches */
if (instr->type == nir_instr_type_phi)
continue;
if (state->first_visit)
nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
switch (instr->type) {
case nir_instr_type_alu:
has_changed |= visit_alu(nir_instr_as_alu(instr));
break;
case nir_instr_type_intrinsic:
has_changed |= visit_intrinsic(nir_instr_as_intrinsic(instr), state);
break;
case nir_instr_type_tex:
has_changed |= visit_tex(nir_instr_as_tex(instr));
break;
case nir_instr_type_load_const:
has_changed |= visit_load_const(nir_instr_as_load_const(instr));
break;
case nir_instr_type_ssa_undef:
has_changed |= visit_ssa_undef(nir_instr_as_ssa_undef(instr));
break;
case nir_instr_type_deref:
has_changed |= visit_deref(nir_instr_as_deref(instr), state);
break;
case nir_instr_type_jump:
has_changed |= visit_jump(nir_instr_as_jump(instr), state);
break;
case nir_instr_type_phi:
case nir_instr_type_call:
case nir_instr_type_parallel_copy:
unreachable("NIR divergence analysis: Unsupported instruction type.");
}
}
return has_changed;
}
/* There are 3 types of phi instructions:
* (1) gamma: represent the joining point of different paths
* created by an “if-then-else” branch.
* The resulting value is divergent if the branch condition
* or any of the source values is divergent. */
static bool
visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
{
if (phi->dest.ssa.divergent)
return false;
unsigned defined_srcs = 0;
nir_foreach_phi_src(src, phi) {
/* if any source value is divergent, the resulting value is divergent */
if (src->src.ssa->divergent) {
phi->dest.ssa.divergent = true;
return true;
}
if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
defined_srcs++;
}
}
/* if the condition is divergent and two sources defined, the definition is divergent */
if (defined_srcs > 1 && if_cond_divergent) {
phi->dest.ssa.divergent = true;
return true;
}
return false;
}
/* There are 3 types of phi instructions:
* (2) mu: which only exist at loop headers,
* merge initial and loop-carried values.
* The resulting value is divergent if any source value
* is divergent or a divergent loop continue condition
* is associated with a different ssa-def. */
static bool
visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
{
if (phi->dest.ssa.divergent)
return false;
nir_ssa_def* same = NULL;
nir_foreach_phi_src(src, phi) {
/* if any source value is divergent, the resulting value is divergent */
if (src->src.ssa->divergent) {
phi->dest.ssa.divergent = true;
return true;
}
/* if this loop is uniform, we're done here */
if (!divergent_continue)
continue;
/* skip the loop preheader */
if (src->pred == preheader)
continue;
/* skip undef values */
if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
continue;
/* check if all loop-carried values are from the same ssa-def */
if (!same)
same = src->src.ssa;
else if (same != src->src.ssa) {
phi->dest.ssa.divergent = true;
return true;
}
}
return false;
}
/* There are 3 types of phi instructions:
* (3) eta: represent values that leave a loop.
* The resulting value is divergent if the source value is divergent
* or any loop exit condition is divergent for a value which is
* not loop-invariant.
* (note: there should be no phi for loop-invariant variables.) */
static bool
visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
{
if (phi->dest.ssa.divergent)
return false;
if (divergent_break) {
phi->dest.ssa.divergent = true;
return true;
}
/* if any source value is divergent, the resulting value is divergent */
nir_foreach_phi_src(src, phi) {
if (src->src.ssa->divergent) {
phi->dest.ssa.divergent = true;
return true;
}
}
return false;
}
static bool
visit_if(nir_if *if_stmt, struct divergence_state *state)
{
bool progress = false;
struct divergence_state then_state = *state;
then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
progress |= visit_cf_list(&if_stmt->then_list, &then_state);
struct divergence_state else_state = *state;
else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
progress |= visit_cf_list(&if_stmt->else_list, &else_state);
/* handle phis after the IF */
nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
if (instr->type != nir_instr_type_phi)
break;
if (state->first_visit)
nir_instr_as_phi(instr)->dest.ssa.divergent = false;
progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
if_stmt->condition.ssa->divergent);
}
/* join loop divergence information from both branch legs */
state->divergent_loop_continue |= then_state.divergent_loop_continue ||
else_state.divergent_loop_continue;
state->divergent_loop_break |= then_state.divergent_loop_break ||
else_state.divergent_loop_break;
/* A divergent continue makes succeeding loop CF divergent:
* not all loop-active invocations participate in the remaining loop-body
* which means that a following break might be taken by some invocations, only */
state->divergent_loop_cf |= state->divergent_loop_continue;
return progress;
}
static bool
visit_loop(nir_loop *loop, struct divergence_state *state)
{
bool progress = false;
nir_block *loop_header = nir_loop_first_block(loop);
nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
/* handle loop header phis first: we have no knowledge yet about
* the loop's control flow or any loop-carried sources. */
nir_foreach_instr(instr, loop_header) {
if (instr->type != nir_instr_type_phi)
break;
nir_phi_instr *phi = nir_instr_as_phi(instr);
if (!state->first_visit && phi->dest.ssa.divergent)
continue;
nir_foreach_phi_src(src, phi) {
if (src->pred == loop_preheader) {
phi->dest.ssa.divergent = src->src.ssa->divergent;
break;
}
}
progress |= phi->dest.ssa.divergent;
}
/* setup loop state */
struct divergence_state loop_state = *state;
loop_state.divergent_loop_cf = false;
loop_state.divergent_loop_continue = false;
loop_state.divergent_loop_break = false;
/* process loop body until no further changes are made */
bool repeat;
do {
progress |= visit_cf_list(&loop->body, &loop_state);
repeat = false;
/* revisit loop header phis to see if something has changed */
nir_foreach_instr(instr, loop_header) {
if (instr->type != nir_instr_type_phi)
break;
repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
loop_preheader,
loop_state.divergent_loop_continue);
}
loop_state.divergent_loop_cf = false;
loop_state.first_visit = false;
} while (repeat);
/* handle phis after the loop */
nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
if (instr->type != nir_instr_type_phi)
break;
if (state->first_visit)
nir_instr_as_phi(instr)->dest.ssa.divergent = false;
progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
loop_state.divergent_loop_break);
}
return progress;
}
static bool
visit_cf_list(struct exec_list *list, struct divergence_state *state)
{
bool has_changed = false;
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
case nir_cf_node_block:
has_changed |= visit_block(nir_cf_node_as_block(node), state);
break;
case nir_cf_node_if:
has_changed |= visit_if(nir_cf_node_as_if(node), state);
break;
case nir_cf_node_loop:
has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
break;
case nir_cf_node_function:
unreachable("NIR divergence analysis: Unsupported cf_node type.");
}
}
return has_changed;
}
void
nir_divergence_analysis(nir_shader *shader, nir_divergence_options options)
{
struct divergence_state state = {
.options = options,
.stage = shader->info.stage,
.divergent_loop_cf = false,
.divergent_loop_continue = false,
.divergent_loop_break = false,
.first_visit = true,
};
visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
}