blob: 4107b62dddb7a1e2e2d11eecaadd5e5458e37c05 [file] [log] [blame]
/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "compiler/intel_nir.h"
#include "compiler/brw_compiler.h"
#include "compiler/brw_nir.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "dev/intel_debug.h"
#include "intel/compiler/intel_nir.h"
#include "util/macros.h"
#include "vk_nir.h"
#include "anv_internal_kernels.h"
static bool
lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
UNUSED void *data)
{
if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id)
return false;
b->cursor = nir_instr_remove(&intrin->instr);
nir_def_rewrite_uses(&intrin->def, nir_imm_zero(b, 3, 32));
return true;
}
static void
link_libanv(nir_shader *nir, const nir_shader *libanv)
{
nir_link_shader_functions(nir, libanv);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_remove_non_entrypoints);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
glsl_get_cl_type_size_align);
NIR_PASS_V(nir, nir_opt_deref);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
nir_var_mem_global,
nir_address_format_62bit_generic);
}
static struct anv_shader_bin *
compile_shader(struct anv_device *device,
const nir_shader *libanv,
enum anv_internal_kernel_name shader_name,
gl_shader_stage stage,
const char *name,
const void *hash_key,
uint32_t hash_key_size,
uint32_t sends_count_expectation)
{
const nir_shader_compiler_options *nir_options =
device->physical->compiler->nir_options[stage];
nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
"%s", name);
uint32_t uniform_size =
anv_genX(device->info, call_internal_shader)(&b, shader_name);
nir_shader *nir = b.shader;
link_libanv(nir, libanv);
if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
nir_lower_printf_options printf_opts = {
.ptr_bit_size = 64,
.use_printf_base_identifier = true,
};
NIR_PASS_V(nir, nir_lower_printf, &printf_opts);
}
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_opt_cse);
NIR_PASS_V(nir, nir_opt_gcm, true);
NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
if (stage == MESA_SHADER_COMPUTE) {
nir->info.workgroup_size[0] = 16;
nir->info.workgroup_size[1] = 1;
nir->info.workgroup_size[2] = 1;
}
struct brw_compiler *compiler = device->physical->compiler;
struct brw_nir_compiler_opts opts = {};
brw_preprocess_nir(compiler, nir, &opts);
NIR_PASS_V(nir, nir_propagate_invariant, false);
if (stage == MESA_SHADER_FRAGMENT) {
NIR_PASS_V(nir, nir_lower_input_attachments,
&(nir_input_attachment_options) {
.use_fragcoord_sysval = true,
.use_layer_id_sysval = true,
});
} else {
nir_lower_compute_system_values_options options = {
.has_base_workgroup_id = true,
.lower_cs_local_id_to_index = true,
.lower_workgroup_id_to_index = gl_shader_stage_is_mesh(stage),
};
NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
nir_metadata_control_flow, NULL);
}
/* Reset sizes before gathering information */
nir->global_mem_size = 0;
nir->scratch_size = 0;
nir->info.shared_size = 0;
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_opt_dce);
union brw_any_prog_key key;
memset(&key, 0, sizeof(key));
union brw_any_prog_data prog_data;
memset(&prog_data, 0, sizeof(prog_data));
if (stage == MESA_SHADER_COMPUTE) {
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics,
device->info, &prog_data.cs);
}
/* Do vectorizing here. For some reason when trying to do it in the back
* this just isn't working.
*/
nir_load_store_vectorize_options options = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
.callback = brw_nir_should_vectorize_mem,
.robust_modes = (nir_variable_mode)0,
};
NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
nir->num_uniforms = uniform_size;
prog_data.base.nr_params = nir->num_uniforms / 4;
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
void *temp_ctx = ralloc_context(NULL);
const unsigned *program;
if (stage == MESA_SHADER_FRAGMENT) {
struct brw_compile_stats stats[3];
struct brw_compile_fs_params params = {
.base = {
.nir = nir,
.log_data = device,
.debug_flag = DEBUG_WM,
.stats = stats,
.mem_ctx = temp_ctx,
},
.key = &key.wm,
.prog_data = &prog_data.wm,
};
program = brw_compile_fs(compiler, &params);
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
unsigned stat_idx = 0;
if (prog_data.wm.dispatch_8) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends == sends_count_expectation);
stat_idx++;
}
if (prog_data.wm.dispatch_16) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends == sends_count_expectation);
stat_idx++;
}
if (prog_data.wm.dispatch_32) {
assert(stats[stat_idx].spills == 0);
assert(stats[stat_idx].fills == 0);
assert(stats[stat_idx].sends ==
sends_count_expectation *
(device->info->ver < 20 ? 2 : 1));
stat_idx++;
}
}
} else {
struct brw_compile_stats stats;
struct brw_compile_cs_params params = {
.base = {
.nir = nir,
.stats = &stats,
.log_data = device,
.debug_flag = DEBUG_CS,
.mem_ctx = temp_ctx,
},
.key = &key.cs,
.prog_data = &prog_data.cs,
};
program = brw_compile_cs(compiler, &params);
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
assert(stats.spills == 0);
assert(stats.fills == 0);
assert(stats.sends == sends_count_expectation);
}
}
assert(prog_data.base.total_scratch == 0);
assert(program != NULL);
struct anv_shader_bin *kernel = NULL;
if (program == NULL)
goto exit;
struct anv_pipeline_bind_map empty_bind_map = {};
struct anv_push_descriptor_info empty_push_desc_info = {};
struct anv_shader_upload_params upload_params = {
.stage = nir->info.stage,
.key_data = hash_key,
.key_size = hash_key_size,
.kernel_data = program,
.kernel_size = prog_data.base.program_size,
.prog_data = &prog_data.base,
.prog_data_size = sizeof(prog_data),
.bind_map = &empty_bind_map,
.push_desc_info = &empty_push_desc_info,
};
kernel = anv_device_upload_kernel(device, device->internal_cache, &upload_params);
exit:
ralloc_free(temp_ctx);
ralloc_free(nir);
return kernel;
}
VkResult
anv_device_get_internal_shader(struct anv_device *device,
enum anv_internal_kernel_name name,
struct anv_shader_bin **out_bin)
{
const struct {
struct {
char name[40];
} key;
gl_shader_stage stage;
uint32_t send_count;
} internal_kernels[] = {
[ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
.key = {
.name = "anv-generated-indirect-draws",
},
.stage = MESA_SHADER_FRAGMENT,
.send_count = (device->info->ver == 9 ?
/* 1 load +
* 4 stores +
* 2 * (2 loads + 2 stores) +
* 3 stores
*/
16 :
/* 1 load +
* 2 * (2 loads + 3 stores) +
* 3 stores
*/
14) +
/* 3 loads + 3 stores */
(intel_needs_workaround(device->info, 16011107343) ? 6 : 0) +
/* 3 loads + 3 stores */
(intel_needs_workaround(device->info, 22018402687) ? 6 : 0),
},
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
.key = {
.name = "anv-copy-query-compute",
},
.stage = MESA_SHADER_COMPUTE,
.send_count = device->info->verx10 >= 125 ?
9 /* 4 loads + 4 stores + 1 EOT */ :
8 /* 3 loads + 4 stores + 1 EOT */,
},
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
.key = {
.name = "anv-copy-query-fragment",
},
.stage = MESA_SHADER_FRAGMENT,
.send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
},
[ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = {
.key = {
.name = "anv-memcpy-compute",
},
.stage = MESA_SHADER_COMPUTE,
.send_count = device->info->verx10 >= 125 ?
10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ :
9 /* 4 loads + 4 stores + 1 EOT */,
},
};
struct anv_shader_bin *bin =
p_atomic_read(&device->internal_kernels[name]);
if (bin != NULL) {
*out_bin = bin;
return VK_SUCCESS;
}
bin =
anv_device_search_for_kernel(device,
device->internal_cache,
&internal_kernels[name].key,
sizeof(internal_kernels[name].key),
NULL);
if (bin != NULL) {
p_atomic_set(&device->internal_kernels[name], bin);
*out_bin = bin;
return VK_SUCCESS;
}
void *mem_ctx = ralloc_context(NULL);
nir_shader *libanv_shaders =
anv_genX(device->info, load_libanv_shader)(device, mem_ctx);
bin = compile_shader(device,
libanv_shaders,
name,
internal_kernels[name].stage,
internal_kernels[name].key.name,
&internal_kernels[name].key,
sizeof(internal_kernels[name].key),
internal_kernels[name].send_count);
if (bin == NULL)
return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
"Unable to compiler internal kernel");
/* The cache already has a reference and it's not going anywhere so
* there is no need to hold a second reference.
*/
anv_shader_bin_unref(device, bin);
p_atomic_set(&device->internal_kernels[name], bin);
*out_bin = bin;
return VK_SUCCESS;
}
VkResult
anv_device_init_internal_kernels(struct anv_device *device)
{
const struct intel_l3_weights w =
intel_get_default_l3_weights(device->info,
true /* wants_dc_cache */,
false /* needs_slm */);
device->internal_kernels_l3_config = intel_get_l3_config(device->info, w);
return VK_SUCCESS;
}
void
anv_device_finish_internal_kernels(struct anv_device *device)
{
}