| /* |
| * Copyright © 2022 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "anv_private.h" |
| |
| #include "compiler/intel_nir.h" |
| #include "compiler/brw_compiler.h" |
| #include "compiler/brw_nir.h" |
| #include "compiler/nir/nir.h" |
| #include "compiler/nir/nir_builder.h" |
| #include "dev/intel_debug.h" |
| #include "intel/compiler/intel_nir.h" |
| #include "util/macros.h" |
| |
| #include "vk_nir.h" |
| |
| #include "anv_internal_kernels.h" |
| |
| static bool |
| lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin, |
| UNUSED void *data) |
| { |
| if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id) |
| return false; |
| |
| b->cursor = nir_instr_remove(&intrin->instr); |
| nir_def_rewrite_uses(&intrin->def, nir_imm_zero(b, 3, 32)); |
| return true; |
| } |
| |
| static void |
| link_libanv(nir_shader *nir, const nir_shader *libanv) |
| { |
| nir_link_shader_functions(nir, libanv); |
| NIR_PASS_V(nir, nir_inline_functions); |
| NIR_PASS_V(nir, nir_remove_non_entrypoints); |
| NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, |
| glsl_get_cl_type_size_align); |
| NIR_PASS_V(nir, nir_opt_deref); |
| NIR_PASS_V(nir, nir_lower_vars_to_ssa); |
| NIR_PASS_V(nir, nir_lower_explicit_io, |
| nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | |
| nir_var_mem_global, |
| nir_address_format_62bit_generic); |
| } |
| |
| static struct anv_shader_bin * |
| compile_shader(struct anv_device *device, |
| const nir_shader *libanv, |
| enum anv_internal_kernel_name shader_name, |
| gl_shader_stage stage, |
| const char *name, |
| const void *hash_key, |
| uint32_t hash_key_size, |
| uint32_t sends_count_expectation) |
| { |
| const nir_shader_compiler_options *nir_options = |
| device->physical->compiler->nir_options[stage]; |
| |
| nir_builder b = nir_builder_init_simple_shader(stage, nir_options, |
| "%s", name); |
| |
| uint32_t uniform_size = |
| anv_genX(device->info, call_internal_shader)(&b, shader_name); |
| |
| nir_shader *nir = b.shader; |
| |
| link_libanv(nir, libanv); |
| |
| if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) { |
| nir_lower_printf_options printf_opts = { |
| .ptr_bit_size = 64, |
| .use_printf_base_identifier = true, |
| }; |
| NIR_PASS_V(nir, nir_lower_printf, &printf_opts); |
| } |
| |
| NIR_PASS_V(nir, nir_lower_vars_to_ssa); |
| NIR_PASS_V(nir, nir_opt_cse); |
| NIR_PASS_V(nir, nir_opt_gcm, true); |
| NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false); |
| |
| NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); |
| |
| NIR_PASS_V(nir, nir_split_var_copies); |
| NIR_PASS_V(nir, nir_split_per_member_structs); |
| |
| if (stage == MESA_SHADER_COMPUTE) { |
| nir->info.workgroup_size[0] = 16; |
| nir->info.workgroup_size[1] = 1; |
| nir->info.workgroup_size[2] = 1; |
| } |
| |
| struct brw_compiler *compiler = device->physical->compiler; |
| struct brw_nir_compiler_opts opts = {}; |
| brw_preprocess_nir(compiler, nir, &opts); |
| |
| NIR_PASS_V(nir, nir_propagate_invariant, false); |
| |
| if (stage == MESA_SHADER_FRAGMENT) { |
| NIR_PASS_V(nir, nir_lower_input_attachments, |
| &(nir_input_attachment_options) { |
| .use_fragcoord_sysval = true, |
| .use_layer_id_sysval = true, |
| }); |
| } else { |
| nir_lower_compute_system_values_options options = { |
| .has_base_workgroup_id = true, |
| .lower_cs_local_id_to_index = true, |
| .lower_workgroup_id_to_index = gl_shader_stage_is_mesh(stage), |
| }; |
| NIR_PASS_V(nir, nir_lower_compute_system_values, &options); |
| NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_base_workgroup_id, |
| nir_metadata_control_flow, NULL); |
| } |
| |
| /* Reset sizes before gathering information */ |
| nir->global_mem_size = 0; |
| nir->scratch_size = 0; |
| nir->info.shared_size = 0; |
| nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); |
| |
| NIR_PASS_V(nir, nir_copy_prop); |
| NIR_PASS_V(nir, nir_opt_constant_folding); |
| NIR_PASS_V(nir, nir_opt_dce); |
| |
| union brw_any_prog_key key; |
| memset(&key, 0, sizeof(key)); |
| |
| union brw_any_prog_data prog_data; |
| memset(&prog_data, 0, sizeof(prog_data)); |
| |
| if (stage == MESA_SHADER_COMPUTE) { |
| NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, |
| device->info, &prog_data.cs); |
| } |
| |
| /* Do vectorizing here. For some reason when trying to do it in the back |
| * this just isn't working. |
| */ |
| nir_load_store_vectorize_options options = { |
| .modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global, |
| .callback = brw_nir_should_vectorize_mem, |
| .robust_modes = (nir_variable_mode)0, |
| }; |
| NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options); |
| |
| nir->num_uniforms = uniform_size; |
| |
| prog_data.base.nr_params = nir->num_uniforms / 4; |
| |
| brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges); |
| |
| void *temp_ctx = ralloc_context(NULL); |
| |
| const unsigned *program; |
| if (stage == MESA_SHADER_FRAGMENT) { |
| struct brw_compile_stats stats[3]; |
| struct brw_compile_fs_params params = { |
| .base = { |
| .nir = nir, |
| .log_data = device, |
| .debug_flag = DEBUG_WM, |
| .stats = stats, |
| .mem_ctx = temp_ctx, |
| }, |
| .key = &key.wm, |
| .prog_data = &prog_data.wm, |
| }; |
| program = brw_compile_fs(compiler, ¶ms); |
| |
| if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) { |
| unsigned stat_idx = 0; |
| if (prog_data.wm.dispatch_8) { |
| assert(stats[stat_idx].spills == 0); |
| assert(stats[stat_idx].fills == 0); |
| assert(stats[stat_idx].sends == sends_count_expectation); |
| stat_idx++; |
| } |
| if (prog_data.wm.dispatch_16) { |
| assert(stats[stat_idx].spills == 0); |
| assert(stats[stat_idx].fills == 0); |
| assert(stats[stat_idx].sends == sends_count_expectation); |
| stat_idx++; |
| } |
| if (prog_data.wm.dispatch_32) { |
| assert(stats[stat_idx].spills == 0); |
| assert(stats[stat_idx].fills == 0); |
| assert(stats[stat_idx].sends == |
| sends_count_expectation * |
| (device->info->ver < 20 ? 2 : 1)); |
| stat_idx++; |
| } |
| } |
| } else { |
| struct brw_compile_stats stats; |
| struct brw_compile_cs_params params = { |
| .base = { |
| .nir = nir, |
| .stats = &stats, |
| .log_data = device, |
| .debug_flag = DEBUG_CS, |
| .mem_ctx = temp_ctx, |
| }, |
| .key = &key.cs, |
| .prog_data = &prog_data.cs, |
| }; |
| program = brw_compile_cs(compiler, ¶ms); |
| |
| if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) { |
| assert(stats.spills == 0); |
| assert(stats.fills == 0); |
| assert(stats.sends == sends_count_expectation); |
| } |
| } |
| |
| assert(prog_data.base.total_scratch == 0); |
| assert(program != NULL); |
| struct anv_shader_bin *kernel = NULL; |
| if (program == NULL) |
| goto exit; |
| |
| struct anv_pipeline_bind_map empty_bind_map = {}; |
| struct anv_push_descriptor_info empty_push_desc_info = {}; |
| struct anv_shader_upload_params upload_params = { |
| .stage = nir->info.stage, |
| .key_data = hash_key, |
| .key_size = hash_key_size, |
| .kernel_data = program, |
| .kernel_size = prog_data.base.program_size, |
| .prog_data = &prog_data.base, |
| .prog_data_size = sizeof(prog_data), |
| .bind_map = &empty_bind_map, |
| .push_desc_info = &empty_push_desc_info, |
| }; |
| |
| kernel = anv_device_upload_kernel(device, device->internal_cache, &upload_params); |
| |
| exit: |
| ralloc_free(temp_ctx); |
| ralloc_free(nir); |
| |
| return kernel; |
| } |
| |
| VkResult |
| anv_device_get_internal_shader(struct anv_device *device, |
| enum anv_internal_kernel_name name, |
| struct anv_shader_bin **out_bin) |
| { |
| const struct { |
| struct { |
| char name[40]; |
| } key; |
| |
| gl_shader_stage stage; |
| |
| uint32_t send_count; |
| } internal_kernels[] = { |
| [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = { |
| .key = { |
| .name = "anv-generated-indirect-draws", |
| }, |
| .stage = MESA_SHADER_FRAGMENT, |
| .send_count = (device->info->ver == 9 ? |
| /* 1 load + |
| * 4 stores + |
| * 2 * (2 loads + 2 stores) + |
| * 3 stores |
| */ |
| 16 : |
| /* 1 load + |
| * 2 * (2 loads + 3 stores) + |
| * 3 stores |
| */ |
| 14) + |
| /* 3 loads + 3 stores */ |
| (intel_needs_workaround(device->info, 16011107343) ? 6 : 0) + |
| /* 3 loads + 3 stores */ |
| (intel_needs_workaround(device->info, 22018402687) ? 6 : 0), |
| }, |
| [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = { |
| .key = { |
| .name = "anv-copy-query-compute", |
| }, |
| .stage = MESA_SHADER_COMPUTE, |
| .send_count = device->info->verx10 >= 125 ? |
| 9 /* 4 loads + 4 stores + 1 EOT */ : |
| 8 /* 3 loads + 4 stores + 1 EOT */, |
| }, |
| [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = { |
| .key = { |
| .name = "anv-copy-query-fragment", |
| }, |
| .stage = MESA_SHADER_FRAGMENT, |
| .send_count = 8 /* 3 loads + 4 stores + 1 EOT */, |
| }, |
| [ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = { |
| .key = { |
| .name = "anv-memcpy-compute", |
| }, |
| .stage = MESA_SHADER_COMPUTE, |
| .send_count = device->info->verx10 >= 125 ? |
| 10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ : |
| 9 /* 4 loads + 4 stores + 1 EOT */, |
| }, |
| }; |
| |
| struct anv_shader_bin *bin = |
| p_atomic_read(&device->internal_kernels[name]); |
| if (bin != NULL) { |
| *out_bin = bin; |
| return VK_SUCCESS; |
| } |
| |
| bin = |
| anv_device_search_for_kernel(device, |
| device->internal_cache, |
| &internal_kernels[name].key, |
| sizeof(internal_kernels[name].key), |
| NULL); |
| if (bin != NULL) { |
| p_atomic_set(&device->internal_kernels[name], bin); |
| *out_bin = bin; |
| return VK_SUCCESS; |
| } |
| |
| void *mem_ctx = ralloc_context(NULL); |
| |
| nir_shader *libanv_shaders = |
| anv_genX(device->info, load_libanv_shader)(device, mem_ctx); |
| |
| bin = compile_shader(device, |
| libanv_shaders, |
| name, |
| internal_kernels[name].stage, |
| internal_kernels[name].key.name, |
| &internal_kernels[name].key, |
| sizeof(internal_kernels[name].key), |
| internal_kernels[name].send_count); |
| if (bin == NULL) |
| return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, |
| "Unable to compiler internal kernel"); |
| |
| /* The cache already has a reference and it's not going anywhere so |
| * there is no need to hold a second reference. |
| */ |
| anv_shader_bin_unref(device, bin); |
| |
| p_atomic_set(&device->internal_kernels[name], bin); |
| |
| *out_bin = bin; |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| anv_device_init_internal_kernels(struct anv_device *device) |
| { |
| const struct intel_l3_weights w = |
| intel_get_default_l3_weights(device->info, |
| true /* wants_dc_cache */, |
| false /* needs_slm */); |
| device->internal_kernels_l3_config = intel_get_l3_config(device->info, w); |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| anv_device_finish_internal_kernels(struct anv_device *device) |
| { |
| } |