| /* |
| * Copyright © 2019 Google, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include "nir.h" |
| #include "nir_vla.h" |
| |
| /* Lowering for amul instructions, for drivers that support imul24. |
| * This pass will analyze indirect derefs, and convert corresponding |
| * amul instructions to either imul or imul24, depending on the |
| * required range. |
| * |
| * 1) Analyze the uniform variables and build a table of UBOs and SSBOs |
| * that are either too large, or might be too large (unknown size) |
| * for imul24 |
| * |
| * 2) Loop thru looking at all the intrinsics, finding dereferences of |
| * large variables, and recursively replacing all amul instructions |
| * used with imul |
| * |
| * 3) Finally loop again thru all instructions replacing any remaining |
| * amul with imul24. At this point any remaining amul instructions |
| * are not involved in calculating an offset into a large variable, |
| * thanks to the 2nd step, so they can be safely replace with imul24. |
| * |
| * Using two passes over all the instructions lets us handle the case |
| * where, due to CSE, an amul is used to calculate an offset into both |
| * a large and small variable. |
| */ |
| |
| typedef struct { |
| nir_shader *shader; |
| |
| int (*type_size)(const struct glsl_type *, bool); |
| |
| /* Tables of UBOs and SSBOs mapping driver_location/base whether |
| * they are too large to use imul24: |
| */ |
| bool *large_ubos; |
| bool *large_ssbos; |
| |
| /* for cases that we cannot determine UBO/SSBO index, track if *any* |
| * UBO/SSBO is too large for imul24: |
| */ |
| bool has_large_ubo; |
| bool has_large_ssbo; |
| |
| unsigned max_slot; |
| } lower_state; |
| |
| /* Lower 'amul's in offset src of large variables to 'imul': */ |
| static bool |
| lower_large_src(nir_src *src, void *s) |
| { |
| lower_state *state = s; |
| |
| assert(src->is_ssa); |
| |
| nir_instr *parent = src->ssa->parent_instr; |
| |
| /* No need to visit instructions we've already visited.. this also |
| * avoids infinite recursion when phi's are involved: |
| */ |
| if (parent->pass_flags) |
| return false; |
| |
| bool progress = nir_foreach_src(parent, lower_large_src, state); |
| |
| if (parent->type == nir_instr_type_alu) { |
| nir_alu_instr *alu = nir_instr_as_alu(parent); |
| if (alu->op == nir_op_amul) { |
| alu->op = nir_op_imul; |
| progress = true; |
| } |
| } |
| |
| parent->pass_flags = 1; |
| |
| return progress; |
| } |
| |
| static bool |
| large_ubo(lower_state *state, nir_src src) |
| { |
| if (!nir_src_is_const(src)) |
| return state->has_large_ubo; |
| unsigned idx = nir_src_as_uint(src); |
| assert(idx < state->shader->info.num_ubos); |
| return state->large_ubos[idx]; |
| } |
| |
| static bool |
| large_ssbo(lower_state *state, nir_src src) |
| { |
| if (!nir_src_is_const(src)) |
| return state->has_large_ssbo; |
| unsigned idx = nir_src_as_uint(src); |
| assert(idx < state->shader->info.num_ssbos); |
| return state->large_ssbos[idx]; |
| } |
| |
| static bool |
| lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr) |
| { |
| switch (intr->intrinsic) { |
| case nir_intrinsic_load_ubo: |
| //# src[] = { buffer_index, offset }. |
| if (large_ubo(state, intr->src[0])) |
| return lower_large_src(&intr->src[1], state); |
| return false; |
| |
| case nir_intrinsic_load_ssbo: |
| //# src[] = { buffer_index, offset }. |
| if (large_ssbo(state, intr->src[0])) |
| return lower_large_src(&intr->src[1], state); |
| return false; |
| |
| case nir_intrinsic_store_ssbo: |
| //# src[] = { value, block_index, offset } |
| if (large_ssbo(state, intr->src[1])) |
| return lower_large_src(&intr->src[2], state); |
| return false; |
| |
| case nir_intrinsic_ssbo_atomic_add: |
| case nir_intrinsic_ssbo_atomic_imin: |
| case nir_intrinsic_ssbo_atomic_umin: |
| case nir_intrinsic_ssbo_atomic_imax: |
| case nir_intrinsic_ssbo_atomic_umax: |
| case nir_intrinsic_ssbo_atomic_and: |
| case nir_intrinsic_ssbo_atomic_or: |
| case nir_intrinsic_ssbo_atomic_xor: |
| case nir_intrinsic_ssbo_atomic_exchange: |
| case nir_intrinsic_ssbo_atomic_comp_swap: |
| case nir_intrinsic_ssbo_atomic_fadd: |
| case nir_intrinsic_ssbo_atomic_fmin: |
| case nir_intrinsic_ssbo_atomic_fmax: |
| case nir_intrinsic_ssbo_atomic_fcomp_swap: |
| /* 0: SSBO index |
| * 1: offset |
| */ |
| if (large_ssbo(state, intr->src[0])) |
| return lower_large_src(&intr->src[1], state); |
| return false; |
| |
| case nir_intrinsic_global_atomic_add: |
| case nir_intrinsic_global_atomic_imin: |
| case nir_intrinsic_global_atomic_umin: |
| case nir_intrinsic_global_atomic_imax: |
| case nir_intrinsic_global_atomic_umax: |
| case nir_intrinsic_global_atomic_and: |
| case nir_intrinsic_global_atomic_or: |
| case nir_intrinsic_global_atomic_xor: |
| case nir_intrinsic_global_atomic_exchange: |
| case nir_intrinsic_global_atomic_comp_swap: |
| case nir_intrinsic_global_atomic_fadd: |
| case nir_intrinsic_global_atomic_fmin: |
| case nir_intrinsic_global_atomic_fmax: |
| case nir_intrinsic_global_atomic_fcomp_swap: |
| /* just assume we that 24b is not sufficient: */ |
| return lower_large_src(&intr->src[0], state); |
| |
| /* These should all be small enough to unconditionally use imul24: */ |
| case nir_intrinsic_shared_atomic_add: |
| case nir_intrinsic_shared_atomic_imin: |
| case nir_intrinsic_shared_atomic_umin: |
| case nir_intrinsic_shared_atomic_imax: |
| case nir_intrinsic_shared_atomic_umax: |
| case nir_intrinsic_shared_atomic_and: |
| case nir_intrinsic_shared_atomic_or: |
| case nir_intrinsic_shared_atomic_xor: |
| case nir_intrinsic_shared_atomic_exchange: |
| case nir_intrinsic_shared_atomic_comp_swap: |
| case nir_intrinsic_shared_atomic_fadd: |
| case nir_intrinsic_shared_atomic_fmin: |
| case nir_intrinsic_shared_atomic_fmax: |
| case nir_intrinsic_shared_atomic_fcomp_swap: |
| case nir_intrinsic_load_uniform: |
| case nir_intrinsic_load_input: |
| case nir_intrinsic_load_output: |
| case nir_intrinsic_store_output: |
| default: |
| return false; |
| } |
| } |
| |
| static bool |
| lower_instr(lower_state *state, nir_instr *instr) |
| { |
| bool progress = false; |
| |
| if (instr->type == nir_instr_type_intrinsic) { |
| progress |= lower_intrinsic(state, nir_instr_as_intrinsic(instr)); |
| } |
| |
| return progress; |
| } |
| |
| static bool |
| is_large(lower_state *state, nir_variable *var) |
| { |
| const struct glsl_type *type = glsl_without_array(var->type); |
| unsigned size = state->type_size(type, false); |
| |
| /* if size is not known (ie. VLA) then assume the worst: */ |
| if (!size) |
| return true; |
| |
| return size >= (1 << 23); |
| } |
| |
| bool |
| nir_lower_amul(nir_shader *shader, |
| int (*type_size)(const struct glsl_type *, bool)) |
| { |
| assert(shader->options->has_imul24); |
| assert(type_size); |
| |
| NIR_VLA_FILL(bool, large_ubos, shader->info.num_ubos, 0); |
| NIR_VLA_FILL(bool, large_ssbos, shader->info.num_ssbos, 0); |
| |
| lower_state state = { |
| .shader = shader, |
| .type_size = type_size, |
| .large_ubos = large_ubos, |
| .large_ssbos = large_ssbos, |
| }; |
| |
| /* Figure out which UBOs or SSBOs are large enough to be |
| * disqualified from imul24: |
| */ |
| nir_foreach_variable_in_shader (var, shader) { |
| if (var->data.mode == nir_var_mem_ubo) { |
| if (is_large(&state, var)) { |
| state.has_large_ubo = true; |
| unsigned size = MAX2(1, glsl_array_size(var->type)); |
| for (unsigned i = 0; i < size; i++) |
| state.large_ubos[var->data.binding + i] = true; |
| } |
| } else if (var->data.mode == nir_var_mem_ssbo) { |
| if (is_large(&state, var)) { |
| state.has_large_ssbo = true; |
| unsigned size = MAX2(1, glsl_array_size(var->type)); |
| for (unsigned i = 0; i < size; i++) |
| state.large_ssbos[var->data.binding + i] = true; |
| } |
| } |
| } |
| |
| /* clear pass flags: */ |
| nir_foreach_function(function, shader) { |
| nir_function_impl *impl = function->impl; |
| if (!impl) |
| continue; |
| |
| nir_foreach_block(block, impl) { |
| nir_foreach_instr(instr, block) { |
| instr->pass_flags = 0; |
| } |
| } |
| } |
| |
| bool progress = false; |
| nir_foreach_function(function, shader) { |
| nir_function_impl *impl = function->impl; |
| |
| if (!impl) |
| continue; |
| |
| nir_foreach_block(block, impl) { |
| nir_foreach_instr(instr, block) { |
| progress |= lower_instr(&state, instr); |
| } |
| } |
| } |
| |
| /* At this point, all 'amul's used in calculating an offset into |
| * a large variable have been replaced with 'imul'. So remaining |
| * 'amul's can be replaced with 'imul24': |
| */ |
| nir_foreach_function(function, shader) { |
| nir_function_impl *impl = function->impl; |
| |
| if (!impl) |
| continue; |
| |
| nir_foreach_block(block, impl) { |
| nir_foreach_instr(instr, block) { |
| if (instr->type != nir_instr_type_alu) |
| continue; |
| |
| nir_alu_instr *alu = nir_instr_as_alu(instr); |
| if (alu->op != nir_op_amul) |
| continue; |
| |
| alu->op = nir_op_imul24; |
| progress |= true; |
| } |
| } |
| |
| nir_metadata_preserve(impl, nir_metadata_block_index | |
| nir_metadata_dominance); |
| |
| } |
| |
| return progress; |
| } |