| /* |
| * Copyright © 2019 Raspberry Pi |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "vk_util.h" |
| |
| #include "v3dv_debug.h" |
| #include "v3dv_private.h" |
| |
| #include "vk_format_info.h" |
| |
| #include "common/v3d_debug.h" |
| |
| #include "compiler/nir/nir_builder.h" |
| |
| #include "util/u_atomic.h" |
| |
| #include "vulkan/util/vk_format.h" |
| |
| #include "broadcom/cle/v3dx_pack.h" |
| |
| VkResult |
| v3dv_CreateShaderModule(VkDevice _device, |
| const VkShaderModuleCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, |
| VkShaderModule *pShaderModule) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| struct v3dv_shader_module *module; |
| |
| assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); |
| assert(pCreateInfo->flags == 0); |
| |
| module = vk_alloc2(&device->alloc, pAllocator, |
| sizeof(*module) + pCreateInfo->codeSize, 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (module == NULL) |
| return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| module->nir = NULL; |
| |
| module->size = pCreateInfo->codeSize; |
| memcpy(module->data, pCreateInfo->pCode, module->size); |
| |
| _mesa_sha1_compute(module->data, module->size, module->sha1); |
| |
| *pShaderModule = v3dv_shader_module_to_handle(module); |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| v3dv_DestroyShaderModule(VkDevice _device, |
| VkShaderModule _module, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_shader_module, module, _module); |
| |
| if (!module) |
| return; |
| |
| /* NIR modules (which are only created internally by the driver) are not |
| * dynamically allocated so we should never call this for them. |
| * Instead the driver is responsible for freeing the NIR code when it is |
| * no longer needed. |
| */ |
| assert(module->nir == NULL); |
| |
| vk_free2(&device->alloc, pAllocator, module); |
| } |
| |
| static void |
| destroy_pipeline_stage(struct v3dv_device *device, |
| struct v3dv_pipeline_stage *p_stage, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| if (!p_stage) |
| return; |
| |
| hash_table_foreach(p_stage->cache, entry) { |
| struct v3dv_shader_variant *variant = entry->data; |
| |
| if (variant->assembly_bo) { |
| v3dv_bo_free(device, variant->assembly_bo); |
| ralloc_free(variant->prog_data.base); |
| vk_free2(&device->alloc, pAllocator, variant); |
| } |
| } |
| |
| ralloc_free(p_stage->nir); |
| |
| _mesa_hash_table_destroy(p_stage->cache, NULL); |
| |
| vk_free2(&device->alloc, pAllocator, p_stage); |
| } |
| |
| static void |
| v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, |
| struct v3dv_device *device, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| if (!pipeline) |
| return; |
| |
| /* FIXME: we can't just use a loop over mesa stage due the bin, would be |
| * good to find an alternative. |
| */ |
| destroy_pipeline_stage(device, pipeline->vs, pAllocator); |
| destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); |
| destroy_pipeline_stage(device, pipeline->fs, pAllocator); |
| destroy_pipeline_stage(device, pipeline->cs, pAllocator); |
| |
| if (pipeline->spill.bo) { |
| assert(pipeline->spill.size_per_thread > 0); |
| v3dv_bo_free(device, pipeline->spill.bo); |
| } |
| |
| if (pipeline->default_attribute_values) { |
| v3dv_bo_free(device, pipeline->default_attribute_values); |
| pipeline->default_attribute_values = NULL; |
| } |
| |
| if (pipeline->combined_index_map) |
| _mesa_hash_table_destroy(pipeline->combined_index_map, NULL); |
| |
| if (pipeline->default_attribute_values) |
| v3dv_bo_free(device, pipeline->default_attribute_values); |
| |
| vk_free2(&device->alloc, pAllocator, pipeline); |
| } |
| |
| void |
| v3dv_DestroyPipeline(VkDevice _device, |
| VkPipeline _pipeline, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); |
| |
| if (!pipeline) |
| return; |
| |
| v3dv_destroy_pipeline(pipeline, device, pAllocator); |
| } |
| |
| static const struct spirv_to_nir_options default_spirv_options = { |
| .caps = { false }, |
| .ubo_addr_format = nir_address_format_32bit_index_offset, |
| .ssbo_addr_format = nir_address_format_32bit_index_offset, |
| .phys_ssbo_addr_format = nir_address_format_64bit_global, |
| .push_const_addr_format = nir_address_format_logical, |
| .shared_addr_format = nir_address_format_32bit_offset, |
| .frag_coord_is_sysval = false, |
| }; |
| |
| const nir_shader_compiler_options v3dv_nir_options = { |
| .lower_all_io_to_temps = true, |
| .lower_extract_byte = true, |
| .lower_extract_word = true, |
| .lower_bitfield_insert_to_shifts = true, |
| .lower_bitfield_extract_to_shifts = true, |
| .lower_bitfield_reverse = true, |
| .lower_bit_count = true, |
| .lower_cs_local_id_from_index = true, |
| .lower_ffract = true, |
| .lower_fmod = true, |
| .lower_pack_unorm_2x16 = true, |
| .lower_pack_snorm_2x16 = true, |
| .lower_unpack_unorm_2x16 = true, |
| .lower_unpack_snorm_2x16 = true, |
| .lower_pack_unorm_4x8 = true, |
| .lower_pack_snorm_4x8 = true, |
| .lower_unpack_unorm_4x8 = true, |
| .lower_unpack_snorm_4x8 = true, |
| .lower_pack_half_2x16 = true, |
| .lower_unpack_half_2x16 = true, |
| /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and |
| * get the tests to pass since it might produce slightly better code. |
| */ |
| .lower_uadd_carry = true, |
| .lower_usub_borrow = true, |
| /* FIXME: check if we can use multop + umul24 to implement mul2x32_64 |
| * without lowering. |
| */ |
| .lower_mul_2x32_64 = true, |
| .lower_fdiv = true, |
| .lower_find_lsb = true, |
| .lower_ffma16 = true, |
| .lower_ffma32 = true, |
| .lower_ffma64 = true, |
| .lower_flrp32 = true, |
| .lower_fpow = true, |
| .lower_fsat = true, |
| .lower_fsqrt = true, |
| .lower_ifind_msb = true, |
| .lower_isign = true, |
| .lower_ldexp = true, |
| .lower_mul_high = true, |
| .lower_wpos_pntc = true, |
| .lower_rotate = true, |
| .lower_to_scalar = true, |
| .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic |
| * needs to be supported */ |
| }; |
| |
| const nir_shader_compiler_options * |
| v3dv_pipeline_get_nir_options(void) |
| { |
| return &v3dv_nir_options; |
| } |
| |
| #define OPT(pass, ...) ({ \ |
| bool this_progress = false; \ |
| NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ |
| if (this_progress) \ |
| progress = true; \ |
| this_progress; \ |
| }) |
| |
| static void |
| nir_optimize(nir_shader *nir, |
| struct v3dv_pipeline_stage *stage, |
| bool allow_copies) |
| { |
| bool progress; |
| |
| do { |
| progress = false; |
| OPT(nir_split_array_vars, nir_var_function_temp); |
| OPT(nir_shrink_vec_array_vars, nir_var_function_temp); |
| OPT(nir_opt_deref); |
| OPT(nir_lower_vars_to_ssa); |
| if (allow_copies) { |
| /* Only run this pass in the first call to nir_optimize. Later calls |
| * assume that we've lowered away any copy_deref instructions and we |
| * don't want to introduce any more. |
| */ |
| OPT(nir_opt_find_array_copies); |
| } |
| OPT(nir_opt_copy_prop_vars); |
| OPT(nir_opt_dead_write_vars); |
| OPT(nir_opt_combine_stores, nir_var_all); |
| |
| OPT(nir_lower_alu_to_scalar, NULL, NULL); |
| |
| OPT(nir_copy_prop); |
| OPT(nir_lower_phis_to_scalar); |
| |
| OPT(nir_copy_prop); |
| OPT(nir_opt_dce); |
| OPT(nir_opt_cse); |
| OPT(nir_opt_combine_stores, nir_var_all); |
| |
| /* Passing 0 to the peephole select pass causes it to convert |
| * if-statements that contain only move instructions in the branches |
| * regardless of the count. |
| * |
| * Passing 1 to the peephole select pass causes it to convert |
| * if-statements that contain at most a single ALU instruction (total) |
| * in both branches. |
| */ |
| OPT(nir_opt_peephole_select, 0, false, false); |
| OPT(nir_opt_peephole_select, 8, false, true); |
| |
| OPT(nir_opt_intrinsics); |
| OPT(nir_opt_idiv_const, 32); |
| OPT(nir_opt_algebraic); |
| OPT(nir_opt_constant_folding); |
| |
| OPT(nir_opt_dead_cf); |
| |
| OPT(nir_opt_if, false); |
| OPT(nir_opt_conditional_discard); |
| |
| OPT(nir_opt_remove_phis); |
| OPT(nir_opt_undef); |
| OPT(nir_lower_pack); |
| } while (progress); |
| |
| OPT(nir_remove_dead_variables, nir_var_function_temp, NULL); |
| } |
| |
| static void |
| preprocess_nir(nir_shader *nir, |
| struct v3dv_pipeline_stage *stage) |
| { |
| /* Make sure we lower variable initializers on output variables so that |
| * nir_remove_dead_variables below sees the corresponding stores |
| */ |
| NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out); |
| |
| /* Now that we've deleted all but the main function, we can go ahead and |
| * lower the rest of the variable initializers. |
| */ |
| NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); |
| |
| /* Split member structs. We do this before lower_io_to_temporaries so that |
| * it doesn't lower system values to temporaries by accident. |
| */ |
| NIR_PASS_V(nir, nir_split_var_copies); |
| NIR_PASS_V(nir, nir_split_per_member_structs); |
| |
| if (nir->info.stage == MESA_SHADER_FRAGMENT) |
| NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out); |
| if (nir->info.stage == MESA_SHADER_FRAGMENT) { |
| NIR_PASS_V(nir, nir_lower_input_attachments, |
| &(nir_input_attachment_options) { |
| .use_fragcoord_sysval = false, |
| }); |
| } |
| |
| NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | |
| nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, |
| NULL); |
| |
| NIR_PASS_V(nir, nir_propagate_invariant); |
| NIR_PASS_V(nir, nir_lower_io_to_temporaries, |
| nir_shader_get_entrypoint(nir), true, false); |
| |
| NIR_PASS_V(nir, nir_lower_system_values); |
| NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); |
| |
| NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); |
| |
| NIR_PASS_V(nir, nir_normalize_cubemap_coords); |
| |
| NIR_PASS_V(nir, nir_lower_global_vars_to_local); |
| |
| NIR_PASS_V(nir, nir_split_var_copies); |
| NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp); |
| |
| nir_optimize(nir, stage, true); |
| |
| NIR_PASS_V(nir, nir_lower_load_const_to_scalar); |
| |
| /* Lower a bunch of stuff */ |
| NIR_PASS_V(nir, nir_lower_var_copies); |
| |
| NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | |
| nir_var_shader_out | |
| nir_var_function_temp, UINT32_MAX); |
| |
| NIR_PASS_V(nir, nir_lower_array_deref_of_vec, |
| nir_var_mem_ubo | nir_var_mem_ssbo, |
| nir_lower_direct_array_deref_of_vec_load); |
| |
| NIR_PASS_V(nir, nir_lower_frexp); |
| |
| /* Get rid of split copies */ |
| nir_optimize(nir, stage, false); |
| } |
| |
| /* FIXME: This is basically the same code at anv, tu and radv. Move to common |
| * place? |
| */ |
| static struct nir_spirv_specialization* |
| vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info, |
| uint32_t *out_num_spec_entries) |
| { |
| if (spec_info == NULL || spec_info->mapEntryCount == 0) |
| return NULL; |
| |
| uint32_t num_spec_entries = spec_info->mapEntryCount; |
| struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries)); |
| |
| for (uint32_t i = 0; i < num_spec_entries; i++) { |
| VkSpecializationMapEntry entry = spec_info->pMapEntries[i]; |
| const void *data = spec_info->pData + entry.offset; |
| assert(data + entry.size <= spec_info->pData + spec_info->dataSize); |
| |
| spec_entries[i].id = spec_info->pMapEntries[i].constantID; |
| switch (entry.size) { |
| case 8: |
| spec_entries[i].value.u64 = *(const uint64_t *)data; |
| break; |
| case 4: |
| spec_entries[i].value.u32 = *(const uint32_t *)data; |
| break; |
| case 2: |
| spec_entries[i].value.u16 = *(const uint16_t *)data; |
| break; |
| case 1: |
| spec_entries[i].value.u8 = *(const uint8_t *)data; |
| break; |
| default: |
| assert(!"Invalid spec constant size"); |
| break; |
| } |
| } |
| |
| *out_num_spec_entries = num_spec_entries; |
| return spec_entries; |
| } |
| |
| static nir_shader * |
| shader_module_compile_to_nir(struct v3dv_device *device, |
| struct v3dv_pipeline_stage *stage) |
| { |
| nir_shader *nir; |
| const nir_shader_compiler_options *nir_options = &v3dv_nir_options; |
| |
| if (!stage->module->nir) { |
| uint32_t *spirv = (uint32_t *) stage->module->data; |
| assert(stage->module->size % 4 == 0); |
| |
| if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) |
| v3dv_print_spirv(stage->module->data, stage->module->size, stderr); |
| |
| uint32_t num_spec_entries = 0; |
| struct nir_spirv_specialization *spec_entries = |
| vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries); |
| const struct spirv_to_nir_options spirv_options = default_spirv_options; |
| nir = spirv_to_nir(spirv, stage->module->size / 4, |
| spec_entries, num_spec_entries, |
| stage->stage, stage->entrypoint, |
| &spirv_options, nir_options); |
| nir_validate_shader(nir, "after spirv_to_nir"); |
| free(spec_entries); |
| } else { |
| /* For NIR modules created by the driver we can't consume the NIR |
| * directly, we need to clone it first, since ownership of the NIR code |
| * (as with SPIR-V code for SPIR-V shaders), belongs to the creator |
| * of the module and modules can be destroyed immediately after been used |
| * to create pipelines. |
| */ |
| nir = nir_shader_clone(NULL, stage->module->nir); |
| nir_validate_shader(nir, "nir module"); |
| } |
| assert(nir->info.stage == stage->stage); |
| |
| if (V3D_DEBUG & (V3D_DEBUG_NIR | |
| v3d_debug_flag_for_shader_stage(stage->stage))) { |
| fprintf(stderr, "Initial form: %s prog %d NIR:\n", |
| gl_shader_stage_name(stage->stage), |
| stage->program_id); |
| nir_print_shader(nir, stderr); |
| fprintf(stderr, "\n"); |
| } |
| |
| /* We have to lower away local variable initializers right before we |
| * inline functions. That way they get properly initialized at the top |
| * of the function and not at the top of its caller. |
| */ |
| NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); |
| NIR_PASS_V(nir, nir_lower_returns); |
| NIR_PASS_V(nir, nir_inline_functions); |
| NIR_PASS_V(nir, nir_opt_deref); |
| |
| /* Pick off the single entrypoint that we want */ |
| foreach_list_typed_safe(nir_function, func, node, &nir->functions) { |
| if (func->is_entrypoint) |
| func->name = ralloc_strdup(func, "main"); |
| else |
| exec_node_remove(&func->node); |
| } |
| assert(exec_list_length(&nir->functions) == 1); |
| |
| /* Vulkan uses the separate-shader linking model */ |
| nir->info.separate_shader = true; |
| |
| preprocess_nir(nir, stage); |
| |
| return nir; |
| } |
| |
| static int |
| type_size_vec4(const struct glsl_type *type, bool bindless) |
| { |
| return glsl_count_attribute_slots(type, false); |
| } |
| |
| static unsigned |
| descriptor_map_add(struct v3dv_descriptor_map *map, |
| int set, |
| int binding, |
| int array_index, |
| int array_size) |
| { |
| assert(array_index < array_size); |
| |
| unsigned index = 0; |
| for (unsigned i = 0; i < map->num_desc; i++) { |
| if (set == map->set[i] && |
| binding == map->binding[i] && |
| array_index == map->array_index[i]) { |
| assert(array_size == map->array_size[i]); |
| return index; |
| } |
| index++; |
| } |
| |
| assert(index == map->num_desc); |
| |
| map->set[map->num_desc] = set; |
| map->binding[map->num_desc] = binding; |
| map->array_index[map->num_desc] = array_index; |
| map->array_size[map->num_desc] = array_size; |
| map->num_desc++; |
| |
| return index; |
| } |
| |
| |
| static void |
| lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, |
| struct v3dv_pipeline *pipeline) |
| { |
| assert(instr->intrinsic == nir_intrinsic_load_push_constant); |
| instr->intrinsic = nir_intrinsic_load_uniform; |
| } |
| |
| /* Gathers info from the intrinsic (set and binding) and then lowers it so it |
| * could be used by the v3d_compiler */ |
| static void |
| lower_vulkan_resource_index(nir_builder *b, |
| nir_intrinsic_instr *instr, |
| struct v3dv_pipeline *pipeline, |
| const struct v3dv_pipeline_layout *layout) |
| { |
| assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index); |
| |
| nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); |
| |
| unsigned set = nir_intrinsic_desc_set(instr); |
| unsigned binding = nir_intrinsic_binding(instr); |
| struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout; |
| struct v3dv_descriptor_set_binding_layout *binding_layout = |
| &set_layout->binding[binding]; |
| unsigned index = 0; |
| |
| switch (nir_intrinsic_desc_type(instr)) { |
| case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: |
| case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { |
| struct v3dv_descriptor_map *descriptor_map = |
| nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ? |
| &pipeline->ubo_map : &pipeline->ssbo_map; |
| |
| if (!const_val) |
| unreachable("non-constant vulkan_resource_index array index"); |
| |
| index = descriptor_map_add(descriptor_map, set, binding, |
| const_val->u32, |
| binding_layout->array_size); |
| |
| if (nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { |
| /* skip index 0 which is used for push constants */ |
| index++; |
| } |
| break; |
| } |
| |
| default: |
| unreachable("unsupported desc_type for vulkan_resource_index"); |
| break; |
| } |
| |
| nir_ssa_def_rewrite_uses(&instr->dest.ssa, |
| nir_src_for_ssa(nir_imm_int(b, index))); |
| nir_instr_remove(&instr->instr); |
| } |
| |
| static struct hash_table * |
| pipeline_ensure_combined_index_map(struct v3dv_pipeline *pipeline) |
| { |
| if (pipeline->combined_index_map == NULL) { |
| pipeline->combined_index_map = |
| _mesa_hash_table_create(NULL, _mesa_hash_u32, _mesa_key_u32_equal); |
| pipeline->next_combined_index = 0; |
| } |
| |
| assert(pipeline->combined_index_map); |
| |
| return pipeline->combined_index_map; |
| } |
| |
| static uint32_t |
| get_combined_index(struct v3dv_pipeline *pipeline, |
| uint32_t texture_index, |
| uint32_t sampler_index) |
| { |
| struct hash_table *ht = pipeline_ensure_combined_index_map(pipeline); |
| uint32_t key = v3dv_pipeline_combined_index_key_create(texture_index, sampler_index); |
| struct hash_entry *entry = _mesa_hash_table_search(ht, &key); |
| |
| if (entry) |
| return (uint32_t)(uintptr_t) (entry->data); |
| |
| uint32_t new_index = pipeline->next_combined_index; |
| |
| _mesa_hash_table_insert(ht, &key, (void *)(uintptr_t) (new_index)); |
| pipeline->combined_index_to_key_map[new_index] = key; |
| pipeline->next_combined_index++; |
| |
| return new_index; |
| } |
| |
| static void |
| lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, |
| struct v3dv_pipeline *pipeline, |
| const struct v3dv_pipeline_layout *layout) |
| { |
| nir_ssa_def *index = NULL; |
| unsigned base_index = 0; |
| unsigned array_elements = 1; |
| nir_tex_src *src = &instr->src[src_idx]; |
| bool is_sampler = src->src_type == nir_tex_src_sampler_deref; |
| |
| /* We compute first the offsets */ |
| nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr); |
| while (deref->deref_type != nir_deref_type_var) { |
| assert(deref->parent.is_ssa); |
| nir_deref_instr *parent = |
| nir_instr_as_deref(deref->parent.ssa->parent_instr); |
| |
| assert(deref->deref_type == nir_deref_type_array); |
| |
| if (nir_src_is_const(deref->arr.index) && index == NULL) { |
| /* We're still building a direct index */ |
| base_index += nir_src_as_uint(deref->arr.index) * array_elements; |
| } else { |
| if (index == NULL) { |
| /* We used to be direct but not anymore */ |
| index = nir_imm_int(b, base_index); |
| base_index = 0; |
| } |
| |
| index = nir_iadd(b, index, |
| nir_imul(b, nir_imm_int(b, array_elements), |
| nir_ssa_for_src(b, deref->arr.index, 1))); |
| } |
| |
| array_elements *= glsl_get_length(parent->type); |
| |
| deref = parent; |
| } |
| |
| if (index) |
| index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); |
| |
| /* We have the offsets, we apply them, rewriting the source or removing |
| * instr if needed |
| */ |
| if (index) { |
| nir_instr_rewrite_src(&instr->instr, &src->src, |
| nir_src_for_ssa(index)); |
| |
| src->src_type = is_sampler ? |
| nir_tex_src_sampler_offset : |
| nir_tex_src_texture_offset; |
| } else { |
| nir_tex_instr_remove_src(instr, src_idx); |
| } |
| |
| uint32_t set = deref->var->data.descriptor_set; |
| uint32_t binding = deref->var->data.binding; |
| struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout; |
| struct v3dv_descriptor_set_binding_layout *binding_layout = |
| &set_layout->binding[binding]; |
| |
| /* For input attachments, the shader includes the attachment_idx. As we are |
| * treating them as a texture, we only want the base_index |
| */ |
| uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ? |
| deref->var->data.index + base_index : |
| base_index; |
| |
| int desc_index = |
| descriptor_map_add(is_sampler ? |
| &pipeline->sampler_map : &pipeline->texture_map, |
| deref->var->data.descriptor_set, |
| deref->var->data.binding, |
| array_index, |
| binding_layout->array_size); |
| if (is_sampler) |
| instr->sampler_index = desc_index; |
| else |
| instr->texture_index = desc_index; |
| } |
| |
| static bool |
| lower_sampler(nir_builder *b, nir_tex_instr *instr, |
| struct v3dv_pipeline *pipeline, |
| const struct v3dv_pipeline_layout *layout) |
| { |
| int texture_idx = |
| nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); |
| |
| if (texture_idx >= 0) |
| lower_tex_src_to_offset(b, instr, texture_idx, pipeline, layout); |
| |
| int sampler_idx = |
| nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); |
| |
| if (sampler_idx >= 0) |
| lower_tex_src_to_offset(b, instr, sampler_idx, pipeline, layout); |
| |
| if (texture_idx < 0 && sampler_idx < 0) |
| return false; |
| |
| int combined_index = |
| get_combined_index(pipeline, |
| instr->texture_index, |
| sampler_idx < 0 ? V3DV_NO_SAMPLER_IDX : instr->sampler_index); |
| |
| instr->texture_index = combined_index; |
| instr->sampler_index = combined_index; |
| |
| return true; |
| } |
| |
| /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */ |
| static void |
| lower_image_deref(nir_builder *b, |
| nir_intrinsic_instr *instr, |
| struct v3dv_pipeline *pipeline, |
| const struct v3dv_pipeline_layout *layout) |
| { |
| nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); |
| nir_ssa_def *index = NULL; |
| unsigned array_elements = 1; |
| unsigned base_index = 0; |
| |
| while (deref->deref_type != nir_deref_type_var) { |
| assert(deref->parent.is_ssa); |
| nir_deref_instr *parent = |
| nir_instr_as_deref(deref->parent.ssa->parent_instr); |
| |
| assert(deref->deref_type == nir_deref_type_array); |
| |
| if (nir_src_is_const(deref->arr.index) && index == NULL) { |
| /* We're still building a direct index */ |
| base_index += nir_src_as_uint(deref->arr.index) * array_elements; |
| } else { |
| if (index == NULL) { |
| /* We used to be direct but not anymore */ |
| index = nir_imm_int(b, base_index); |
| base_index = 0; |
| } |
| |
| index = nir_iadd(b, index, |
| nir_imul(b, nir_imm_int(b, array_elements), |
| nir_ssa_for_src(b, deref->arr.index, 1))); |
| } |
| |
| array_elements *= glsl_get_length(parent->type); |
| |
| deref = parent; |
| } |
| |
| if (index) |
| index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); |
| |
| uint32_t set = deref->var->data.descriptor_set; |
| uint32_t binding = deref->var->data.binding; |
| struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout; |
| struct v3dv_descriptor_set_binding_layout *binding_layout = |
| &set_layout->binding[binding]; |
| |
| uint32_t array_index = deref->var->data.index + base_index; |
| |
| assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); |
| |
| int desc_index = |
| descriptor_map_add(&pipeline->texture_map, |
| deref->var->data.descriptor_set, |
| deref->var->data.binding, |
| array_index, |
| binding_layout->array_size); |
| |
| /* We still need to get a combined_index, as we are integrating images with |
| * the rest of the texture/sampler support |
| */ |
| int combined_index = |
| get_combined_index(pipeline, desc_index, V3DV_NO_SAMPLER_IDX); |
| |
| index = nir_imm_int(b, combined_index); |
| |
| nir_rewrite_image_intrinsic(instr, index, false); |
| } |
| |
| static bool |
| lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, |
| struct v3dv_pipeline *pipeline, |
| const struct v3dv_pipeline_layout *layout) |
| { |
| switch (instr->intrinsic) { |
| case nir_intrinsic_load_layer_id: |
| /* FIXME: if layered rendering gets supported, this would need a real |
| * lowering |
| */ |
| nir_ssa_def_rewrite_uses(&instr->dest.ssa, |
| nir_src_for_ssa(nir_imm_int(b, 0))); |
| nir_instr_remove(&instr->instr); |
| return true; |
| |
| case nir_intrinsic_load_push_constant: |
| lower_load_push_constant(b, instr, pipeline); |
| pipeline->use_push_constants = true; |
| return true; |
| |
| case nir_intrinsic_vulkan_resource_index: |
| lower_vulkan_resource_index(b, instr, pipeline, layout); |
| return true; |
| |
| case nir_intrinsic_image_deref_load: |
| case nir_intrinsic_image_deref_store: |
| case nir_intrinsic_image_deref_atomic_add: |
| case nir_intrinsic_image_deref_atomic_imin: |
| case nir_intrinsic_image_deref_atomic_umin: |
| case nir_intrinsic_image_deref_atomic_imax: |
| case nir_intrinsic_image_deref_atomic_umax: |
| case nir_intrinsic_image_deref_atomic_and: |
| case nir_intrinsic_image_deref_atomic_or: |
| case nir_intrinsic_image_deref_atomic_xor: |
| case nir_intrinsic_image_deref_atomic_exchange: |
| case nir_intrinsic_image_deref_atomic_comp_swap: |
| case nir_intrinsic_image_deref_size: |
| case nir_intrinsic_image_deref_samples: |
| lower_image_deref(b, instr, pipeline, layout); |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| static bool |
| lower_impl(nir_function_impl *impl, |
| struct v3dv_pipeline *pipeline, |
| const struct v3dv_pipeline_layout *layout) |
| { |
| nir_builder b; |
| nir_builder_init(&b, impl); |
| bool progress = false; |
| |
| nir_foreach_block(block, impl) { |
| nir_foreach_instr_safe(instr, block) { |
| b.cursor = nir_before_instr(instr); |
| switch (instr->type) { |
| case nir_instr_type_tex: |
| progress |= |
| lower_sampler(&b, nir_instr_as_tex(instr), pipeline, layout); |
| break; |
| case nir_instr_type_intrinsic: |
| progress |= |
| lower_intrinsic(&b, nir_instr_as_intrinsic(instr), pipeline, layout); |
| break; |
| default: |
| break; |
| } |
| } |
| } |
| |
| return progress; |
| } |
| |
| static bool |
| lower_pipeline_layout_info(nir_shader *shader, |
| struct v3dv_pipeline *pipeline, |
| const struct v3dv_pipeline_layout *layout) |
| { |
| bool progress = false; |
| |
| nir_foreach_function(function, shader) { |
| if (function->impl) |
| progress |= lower_impl(function->impl, pipeline, layout); |
| } |
| |
| return progress; |
| } |
| |
| |
| static void |
| lower_fs_io(nir_shader *nir) |
| { |
| /* Our backend doesn't handle array fragment shader outputs */ |
| NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); |
| NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL); |
| |
| nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, |
| MESA_SHADER_FRAGMENT); |
| |
| nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, |
| MESA_SHADER_FRAGMENT); |
| |
| NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, |
| type_size_vec4, 0); |
| } |
| |
| static void |
| lower_vs_io(struct nir_shader *nir) |
| { |
| NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); |
| |
| nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, |
| MESA_SHADER_VERTEX); |
| |
| nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, |
| MESA_SHADER_VERTEX); |
| |
| /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it |
| * overlaps with v3d_nir_lower_io. Need further research though. |
| */ |
| } |
| |
| static void |
| shader_debug_output(const char *message, void *data) |
| { |
| /* FIXME: We probably don't want to debug anything extra here, and in fact |
| * the compiler is not using this callback too much, only as an alternative |
| * way to debug out the shaderdb stats, that you can already get using |
| * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d |
| * compiler to remove that callback. |
| */ |
| } |
| |
| static void |
| pipeline_populate_v3d_key(struct v3d_key *key, |
| const struct v3dv_pipeline_stage *p_stage, |
| uint32_t ucp_enables) |
| { |
| /* The following values are default values used at pipeline create, that |
| * lack the info about the real sampler/texture format used, needed to |
| * decide about lowerings and other stuff affecting the final |
| * assembly. When all that info is in place, it would be needed to check if |
| * it is needed a shader variant (if we are lucky the default values would |
| * be the same and no new compilation will be done) |
| */ |
| |
| /* We don't use the nir shader info.num_textures because that doesn't take |
| * into account input attachments, even after calling |
| * nir_lower_input_attachments. As a general rule that makes sense, but on |
| * our case we are handling them mostly as textures. We iterate through the |
| * combined_index_map that was filled with the textures sused on th sader. |
| */ |
| uint32_t tex_idx = 0; |
| if (p_stage->pipeline->combined_index_map) { |
| hash_table_foreach(p_stage->pipeline->combined_index_map, entry) { |
| key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X; |
| key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y; |
| key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z; |
| key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W; |
| |
| key->tex[tex_idx].return_size = 16; |
| key->tex[tex_idx].return_channels = 2; |
| |
| tex_idx++; |
| } |
| } |
| key->num_tex_used = tex_idx; |
| assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS); |
| |
| /* default value. Would be override on the vs/gs populate methods when GS |
| * gets supported |
| */ |
| key->is_last_geometry_stage = true; |
| |
| /* Vulkan doesn't have fixed function state for user clip planes. Instead, |
| * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler |
| * takes care of adding a single compact array variable at |
| * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering. |
| * |
| * The only lowering we are interested is specific to the fragment shader, |
| * where we want to emit discards to honor writes to gl_ClipDistance[] in |
| * previous stages. This is done via nir_lower_clip_fs() so we only set up |
| * the ucp enable mask for that stage. |
| */ |
| key->ucp_enables = ucp_enables; |
| |
| key->environment = V3D_ENVIRONMENT_VULKAN; |
| } |
| |
| /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the |
| * same. For not using prim_mode that is the one already used on v3d |
| */ |
| static const enum pipe_prim_type vk_to_pipe_prim_type[] = { |
| [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY, |
| [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY, |
| [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY, |
| }; |
| |
| static const enum pipe_logicop vk_to_pipe_logicop[] = { |
| [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR, |
| [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND, |
| [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE, |
| [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY, |
| [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED, |
| [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP, |
| [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR, |
| [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR, |
| [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR, |
| [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV, |
| [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT, |
| [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE, |
| [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED, |
| [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED, |
| [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND, |
| [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET, |
| }; |
| |
| static void |
| pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, |
| const VkGraphicsPipelineCreateInfo *pCreateInfo, |
| const struct v3dv_pipeline_stage *p_stage, |
| uint32_t ucp_enables) |
| { |
| memset(key, 0, sizeof(*key)); |
| |
| pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables); |
| |
| const VkPipelineInputAssemblyStateCreateInfo *ia_info = |
| pCreateInfo->pInputAssemblyState; |
| uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; |
| |
| key->is_points = (topology == PIPE_PRIM_POINTS); |
| key->is_lines = (topology >= PIPE_PRIM_LINES && |
| topology <= PIPE_PRIM_LINE_STRIP); |
| |
| /* Vulkan doesn't appear to specify (anv does the same) */ |
| key->clamp_color = false; |
| |
| const VkPipelineColorBlendStateCreateInfo *cb_info = |
| pCreateInfo->pColorBlendState; |
| |
| key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? |
| vk_to_pipe_logicop[cb_info->logicOp] : |
| PIPE_LOGICOP_COPY; |
| |
| const VkPipelineMultisampleStateCreateInfo *ms_info = |
| pCreateInfo->pMultisampleState; |
| |
| /* FIXME: msaa not supported yet (although we add some of the code to |
| * translate vk sample info in advance) |
| */ |
| key->msaa = false; |
| if (key->msaa & (ms_info != NULL)) { |
| uint32_t sample_mask = 0xffff; |
| |
| if (ms_info->pSampleMask) |
| sample_mask = ms_info->pSampleMask[0] & 0xffff; |
| |
| key->sample_coverage = (sample_mask != (1 << V3D_MAX_SAMPLES) - 1); |
| key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; |
| key->sample_alpha_to_one = ms_info->alphaToOneEnable; |
| } |
| |
| const VkPipelineDepthStencilStateCreateInfo *ds_info = |
| pCreateInfo->pDepthStencilState; |
| |
| key->depth_enabled = (ds_info == NULL ? false : ds_info->depthTestEnable); |
| |
| /* Vulkan doesn't support alpha test */ |
| key->alpha_test = false; |
| key->alpha_test_func = COMPARE_FUNC_NEVER; |
| |
| /* FIXME: placeholder. Final value for swap_color_rb depends on the format |
| * of the surface to be used. |
| */ |
| key->swap_color_rb = false; |
| |
| const struct v3dv_render_pass *pass = |
| v3dv_render_pass_from_handle(pCreateInfo->renderPass); |
| const struct v3dv_subpass *subpass = p_stage->pipeline->subpass; |
| for (uint32_t i = 0; i < subpass->color_count; i++) { |
| const uint32_t att_idx = subpass->color_attachments[i].attachment; |
| if (att_idx == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| key->cbufs |= 1 << i; |
| |
| VkFormat fb_format = pass->attachments[att_idx].desc.format; |
| enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); |
| |
| /* If logic operations are enabled then we might emit color reads and we |
| * need to know the color buffer format and swizzle for that |
| */ |
| if (key->logicop_func != PIPE_LOGICOP_COPY) { |
| key->color_fmt[i].format = fb_pipe_format; |
| key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format); |
| } |
| |
| const struct util_format_description *desc = |
| vk_format_description(fb_format); |
| |
| if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && |
| desc->channel[0].size == 32) { |
| key->f32_color_rb |= 1 << i; |
| } |
| |
| if (p_stage->nir->info.fs.untyped_color_outputs) { |
| if (util_format_is_pure_uint(fb_pipe_format)) |
| key->uint_color_rb |= 1 << i; |
| else if (util_format_is_pure_sint(fb_pipe_format)) |
| key->int_color_rb |= 1 << i; |
| } |
| |
| if (key->is_points) { |
| /* FIXME: The mask would need to be computed based on the shader |
| * inputs. On gallium it is done at st_atom_rasterizer |
| * (sprite_coord_enable). anv seems (need to confirm) to do that on |
| * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also |
| * better to have tests to guide filling the mask. |
| */ |
| key->point_sprite_mask = 0; |
| |
| /* Vulkan mandates upper left. */ |
| key->point_coord_upper_left = true; |
| } |
| } |
| |
| /* FIXME: we understand that this is used on GL to configure fixed-function |
| * two side lighting support, and not make sense for Vulkan. Need to |
| * confirm though. |
| */ |
| key->light_twoside = false; |
| |
| /* FIXME: ditto, although for flat lighting. Again, neet to confirm.*/ |
| key->shade_model_flat = false; |
| } |
| |
| static void |
| pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, |
| const VkGraphicsPipelineCreateInfo *pCreateInfo, |
| const struct v3dv_pipeline_stage *p_stage) |
| { |
| memset(key, 0, sizeof(*key)); |
| |
| pipeline_populate_v3d_key(&key->base, p_stage, 0); |
| |
| /* Vulkan doesn't appear to specify (anv does the same) */ |
| key->clamp_color = false; |
| |
| /* Vulkan specifies a point size per vertex, so true for if the prim are |
| * points, like on ES2) |
| */ |
| const VkPipelineInputAssemblyStateCreateInfo *ia_info = |
| pCreateInfo->pInputAssemblyState; |
| uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; |
| |
| /* FIXME: not enough to being PRIM_POINTS, on gallium the full check is |
| * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */ |
| key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS); |
| |
| key->is_coord = p_stage->is_coord; |
| if (p_stage->is_coord) { |
| /* The only output varying on coord shaders are for transform |
| * feedback. Set to 0 as VK_EXT_transform_feedback is not supported. |
| */ |
| key->num_used_outputs = 0; |
| } else { |
| struct v3dv_pipeline *pipeline = p_stage->pipeline; |
| struct v3dv_shader_variant *fs_variant = pipeline->fs->current_variant; |
| |
| key->num_used_outputs = fs_variant->prog_data.fs->num_inputs; |
| |
| STATIC_ASSERT(sizeof(key->used_outputs) == |
| sizeof(fs_variant->prog_data.fs->input_slots)); |
| memcpy(key->used_outputs, fs_variant->prog_data.fs->input_slots, |
| sizeof(key->used_outputs)); |
| } |
| |
| const VkPipelineVertexInputStateCreateInfo *vi_info = |
| pCreateInfo->pVertexInputState; |
| for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { |
| const VkVertexInputAttributeDescription *desc = |
| &vi_info->pVertexAttributeDescriptions[i]; |
| assert(desc->location < MAX_VERTEX_ATTRIBS); |
| if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) |
| key->va_swap_rb_mask |= 1 << desc->location; |
| } |
| } |
| |
| /* FIXME: following hash/compare methods are C&P from v3d. Common place? */ |
| static uint32_t |
| fs_cache_hash(const void *key) |
| { |
| return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); |
| } |
| |
| static uint32_t |
| vs_cache_hash(const void *key) |
| { |
| return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); |
| } |
| |
| static uint32_t |
| cs_cache_hash(const void *key) |
| { |
| return _mesa_hash_data(key, sizeof(struct v3d_key)); |
| } |
| |
| static bool |
| fs_cache_compare(const void *key1, const void *key2) |
| { |
| return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; |
| } |
| |
| static bool |
| vs_cache_compare(const void *key1, const void *key2) |
| { |
| return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; |
| } |
| |
| static bool |
| cs_cache_compare(const void *key1, const void *key2) |
| { |
| return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; |
| } |
| |
| static struct hash_table* |
| create_variant_cache(gl_shader_stage stage) |
| { |
| switch (stage) { |
| case MESA_SHADER_VERTEX: |
| return _mesa_hash_table_create(NULL, vs_cache_hash, vs_cache_compare); |
| case MESA_SHADER_FRAGMENT: |
| return _mesa_hash_table_create(NULL, fs_cache_hash, fs_cache_compare); |
| case MESA_SHADER_COMPUTE: |
| return _mesa_hash_table_create(NULL, cs_cache_hash, cs_cache_compare); |
| default: |
| unreachable("not supported shader stage"); |
| } |
| } |
| |
| /* |
| * Creates the pipeline_stage for the coordinate shader. Initially a clone of |
| * the vs pipeline_stage, with is_coord to true; |
| */ |
| static struct v3dv_pipeline_stage* |
| pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| struct v3dv_device *device = src->pipeline->device; |
| |
| struct v3dv_pipeline_stage *p_stage = |
| vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| |
| p_stage->pipeline = src->pipeline; |
| assert(src->stage == MESA_SHADER_VERTEX); |
| p_stage->stage = src->stage; |
| p_stage->entrypoint = src->entrypoint; |
| p_stage->module = src->module; |
| p_stage->nir = nir_shader_clone(NULL, src->nir); |
| p_stage->spec_info = src->spec_info; |
| |
| /* Technically we could share the hash_table, but having their own makes |
| * destroy p_stage more straightforward |
| */ |
| p_stage->cache = create_variant_cache(MESA_SHADER_VERTEX); |
| |
| p_stage->is_coord = true; |
| |
| return p_stage; |
| } |
| |
| /* FIXME: right now this just asks for an bo for the exact size of the qpu |
| * assembly. It would be good to be slighly smarter and having one "all |
| * shaders" bo per pipeline, so each p_stage/variant would save their offset |
| * on such. That is really relevant due the fact that bo are always aligned to |
| * 4096, so that would allow to use less memory. |
| * |
| * For now one-bo per-assembly would work. |
| * |
| * Returns false if it was not able to allocate or map the assembly bo memory. |
| */ |
| static bool |
| upload_assembly(struct v3dv_pipeline_stage *p_stage, |
| struct v3dv_shader_variant *variant, |
| const void *data, |
| uint32_t size) |
| { |
| const char *name = NULL; |
| /* We are uploading the assembly just once, so at this point we shouldn't |
| * have any bo |
| */ |
| assert(variant->assembly_bo == NULL); |
| struct v3dv_device *device = p_stage->pipeline->device; |
| |
| switch (p_stage->stage) { |
| case MESA_SHADER_VERTEX: |
| name = (p_stage->is_coord == true) ? "coord_shader_assembly" : |
| "vertex_shader_assembly"; |
| break; |
| case MESA_SHADER_FRAGMENT: |
| name = "fragment_shader_assembly"; |
| break; |
| case MESA_SHADER_COMPUTE: |
| name = "compute_shader_assembly"; |
| break; |
| default: |
| unreachable("Stage not supported\n"); |
| break; |
| }; |
| |
| struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name, true); |
| if (!bo) { |
| fprintf(stderr, "failed to allocate memory for shader\n"); |
| return false; |
| } |
| |
| bool ok = v3dv_bo_map(device, bo, size); |
| if (!ok) { |
| fprintf(stderr, "failed to map source shader buffer\n"); |
| return false; |
| } |
| |
| memcpy(bo->map, data, size); |
| |
| v3dv_bo_unmap(device, bo); |
| |
| variant->assembly_bo = bo; |
| |
| return true; |
| } |
| |
| /* For a given key, it returns the compiled version of the shader. If it was |
| * already compiled, it gets it from the p_stage cache, if not it compiles is |
| * through the v3d compiler |
| * |
| * If the method returns NULL it means that it was not able to allocate the |
| * resources for the variant. out_vk_result would return which OOM applies. |
| */ |
| struct v3dv_shader_variant* |
| v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage, |
| struct v3d_key *key, |
| size_t key_size, |
| const VkAllocationCallbacks *pAllocator, |
| VkResult *out_vk_result) |
| { |
| struct hash_table *ht = p_stage->cache; |
| struct hash_entry *entry = _mesa_hash_table_search(ht, key); |
| |
| if (entry) { |
| *out_vk_result = VK_SUCCESS; |
| return entry->data; |
| } |
| |
| struct v3dv_pipeline *pipeline = p_stage->pipeline; |
| struct v3dv_device *device = pipeline->device; |
| struct v3dv_shader_variant *variant = |
| vk_zalloc(&device->alloc, sizeof(*variant), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| |
| if (variant == NULL) { |
| *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY; |
| return NULL; |
| } |
| |
| struct v3dv_physical_device *physical_device = |
| &pipeline->device->instance->physicalDevice; |
| const struct v3d_compiler *compiler = physical_device->compiler; |
| |
| uint32_t variant_id = p_atomic_inc_return(&p_stage->compiled_variant_count); |
| |
| if (V3D_DEBUG & (V3D_DEBUG_NIR | |
| v3d_debug_flag_for_shader_stage(p_stage->stage))) { |
| fprintf(stderr, "Just before v3d_compile: %s prog %d variant %d NIR:\n", |
| gl_shader_stage_name(p_stage->stage), |
| p_stage->program_id, |
| variant_id); |
| nir_print_shader(p_stage->nir, stderr); |
| fprintf(stderr, "\n"); |
| } |
| |
| uint64_t *qpu_insts; |
| uint32_t qpu_insts_size; |
| |
| qpu_insts = v3d_compile(compiler, |
| key, &variant->prog_data.base, |
| p_stage->nir, |
| shader_debug_output, NULL, |
| p_stage->program_id, |
| variant_id, |
| &qpu_insts_size); |
| |
| if (!qpu_insts) { |
| fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n", |
| gl_shader_stage_name(p_stage->stage), |
| p_stage->program_id); |
| } else { |
| if (!upload_assembly(p_stage, variant, qpu_insts, qpu_insts_size)) { |
| free(qpu_insts); |
| vk_free2(&device->alloc, pAllocator, variant); |
| |
| *out_vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; |
| return NULL; |
| } |
| } |
| |
| free(qpu_insts); |
| |
| if (ht) { |
| struct v3d_key *dup_key; |
| dup_key = ralloc_size(ht, key_size); |
| memcpy(dup_key, key, key_size); |
| _mesa_hash_table_insert(ht, dup_key, variant); |
| } |
| |
| if (variant->prog_data.base->spill_size > pipeline->spill.size_per_thread) { |
| /* The TIDX register we use for choosing the area to access |
| * for scratch space is: (core << 6) | (qpu << 2) | thread. |
| * Even at minimum threadcount in a particular shader, that |
| * means we still multiply by qpus by 4. |
| */ |
| const uint32_t total_spill_size = |
| 4 * device->devinfo.qpu_count * variant->prog_data.base->spill_size; |
| if (pipeline->spill.bo) { |
| assert(pipeline->spill.size_per_thread > 0); |
| v3dv_bo_free(device, pipeline->spill.bo); |
| } |
| pipeline->spill.bo = |
| v3dv_bo_alloc(device, total_spill_size, "spill", true); |
| pipeline->spill.size_per_thread = variant->prog_data.base->spill_size; |
| } |
| |
| *out_vk_result = VK_SUCCESS; |
| return variant; |
| } |
| |
| /* FIXME: C&P from st, common place? */ |
| static void |
| st_nir_opts(nir_shader *nir) |
| { |
| bool progress; |
| |
| do { |
| progress = false; |
| |
| NIR_PASS_V(nir, nir_lower_vars_to_ssa); |
| |
| /* Linking deals with unused inputs/outputs, but here we can remove |
| * things local to the shader in the hopes that we can cleanup other |
| * things. This pass will also remove variables with only stores, so we |
| * might be able to make progress after it. |
| */ |
| NIR_PASS(progress, nir, nir_remove_dead_variables, |
| (nir_variable_mode)(nir_var_function_temp | |
| nir_var_shader_temp | |
| nir_var_mem_shared), |
| NULL); |
| |
| NIR_PASS(progress, nir, nir_opt_copy_prop_vars); |
| NIR_PASS(progress, nir, nir_opt_dead_write_vars); |
| |
| if (nir->options->lower_to_scalar) { |
| NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); |
| NIR_PASS_V(nir, nir_lower_phis_to_scalar); |
| } |
| |
| NIR_PASS_V(nir, nir_lower_alu); |
| NIR_PASS_V(nir, nir_lower_pack); |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_remove_phis); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| if (nir_opt_trivial_continues(nir)) { |
| progress = true; |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| } |
| NIR_PASS(progress, nir, nir_opt_if, false); |
| NIR_PASS(progress, nir, nir_opt_dead_cf); |
| NIR_PASS(progress, nir, nir_opt_cse); |
| NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); |
| |
| NIR_PASS(progress, nir, nir_opt_algebraic); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| |
| NIR_PASS(progress, nir, nir_opt_undef); |
| NIR_PASS(progress, nir, nir_opt_conditional_discard); |
| } while (progress); |
| } |
| |
| static void |
| link_shaders(nir_shader *producer, nir_shader *consumer) |
| { |
| assert(producer); |
| assert(consumer); |
| |
| if (producer->options->lower_to_scalar) { |
| NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out); |
| NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); |
| } |
| |
| nir_lower_io_arrays_to_elements(producer, consumer); |
| |
| st_nir_opts(producer); |
| st_nir_opts(consumer); |
| |
| if (nir_link_opt_varyings(producer, consumer)) |
| st_nir_opts(consumer); |
| |
| NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL); |
| NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); |
| |
| if (nir_remove_unused_varyings(producer, consumer)) { |
| NIR_PASS_V(producer, nir_lower_global_vars_to_local); |
| NIR_PASS_V(consumer, nir_lower_global_vars_to_local); |
| |
| st_nir_opts(producer); |
| st_nir_opts(consumer); |
| |
| /* Optimizations can cause varyings to become unused. |
| * nir_compact_varyings() depends on all dead varyings being removed so |
| * we need to call nir_remove_dead_variables() again here. |
| */ |
| NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL); |
| NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); |
| } |
| } |
| |
| static void |
| pipeline_lower_nir(struct v3dv_pipeline *pipeline, |
| struct v3dv_pipeline_stage *p_stage, |
| struct v3dv_pipeline_layout *layout) |
| { |
| nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); |
| |
| /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ |
| NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout); |
| } |
| |
| /** |
| * The SPIR-V compiler will insert a sized compact array for |
| * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[], |
| * where the size of the array determines the number of active clip planes. |
| */ |
| static uint32_t |
| get_ucp_enable_mask(struct v3dv_pipeline_stage **stages) |
| { |
| const nir_shader *shader = stages[MESA_SHADER_VERTEX]->nir; |
| assert(shader); |
| |
| nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { |
| if (var->data.location == VARYING_SLOT_CLIP_DIST0) { |
| assert(var->data.compact); |
| return (1 << glsl_get_length(var->type)) - 1; |
| } |
| } |
| return 0; |
| } |
| |
| static VkResult |
| pipeline_compile_graphics(struct v3dv_pipeline *pipeline, |
| const VkGraphicsPipelineCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| struct v3dv_pipeline_stage *stages[MESA_SHADER_STAGES] = { }; |
| struct v3dv_device *device = pipeline->device; |
| struct v3dv_physical_device *physical_device = |
| &device->instance->physicalDevice; |
| |
| /* First pass to get the the common info from the shader and the nir |
| * shader. We don't care of the coord shader for now. |
| */ |
| for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { |
| const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; |
| gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); |
| |
| struct v3dv_pipeline_stage *p_stage = |
| vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| |
| /* Note that we are assigning program_id slightly differently that |
| * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin |
| * would have a different program_id, while v3d would have the same for |
| * both. For the case of v3dv, it is more natural to have an id this way, |
| * as right now we are using it for debugging, not for shader-db. |
| */ |
| p_stage->program_id = |
| p_atomic_inc_return(&physical_device->next_program_id); |
| p_stage->compiled_variant_count = 0; |
| p_stage->cache = create_variant_cache(stage); |
| |
| p_stage->pipeline = pipeline; |
| p_stage->stage = stage; |
| if (stage == MESA_SHADER_VERTEX) |
| p_stage->is_coord = false; |
| p_stage->entrypoint = sinfo->pName; |
| p_stage->module = v3dv_shader_module_from_handle(sinfo->module); |
| p_stage->spec_info = sinfo->pSpecializationInfo; |
| |
| pipeline->active_stages |= sinfo->stage; |
| |
| /* FIXME: when cache support is in place, first check if for the given |
| * spirv module and options, we already have a nir shader. |
| */ |
| p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage); |
| |
| stages[stage] = p_stage; |
| } |
| |
| /* Add a no-op fragment shader if needed */ |
| if (!stages[MESA_SHADER_FRAGMENT]) { |
| nir_builder b; |
| nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, |
| &v3dv_nir_options); |
| b.shader->info.name = ralloc_strdup(b.shader, "noop_fs"); |
| |
| struct v3dv_pipeline_stage *p_stage = |
| vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| |
| p_stage->pipeline = pipeline; |
| p_stage->stage = MESA_SHADER_FRAGMENT; |
| p_stage->entrypoint = "main"; |
| p_stage->module = 0; |
| p_stage->nir = b.shader; |
| |
| p_stage->program_id = |
| p_atomic_inc_return(&physical_device->next_program_id); |
| p_stage->compiled_variant_count = 0; |
| p_stage->cache = create_variant_cache(MESA_SHADER_FRAGMENT); |
| |
| stages[MESA_SHADER_FRAGMENT] = p_stage; |
| pipeline->active_stages |= MESA_SHADER_FRAGMENT; |
| } |
| |
| /* Linking */ |
| struct v3dv_pipeline_stage *next_stage = NULL; |
| for (int stage = MESA_SHADER_STAGES - 1; stage >= 0; stage--) { |
| if (stages[stage] == NULL || stages[stage]->entrypoint == NULL) |
| continue; |
| |
| struct v3dv_pipeline_stage *p_stage = stages[stage]; |
| |
| switch(stage) { |
| case MESA_SHADER_VERTEX: |
| link_shaders(p_stage->nir, next_stage->nir); |
| break; |
| case MESA_SHADER_FRAGMENT: |
| /* FIXME: not doing any specific linking stuff here yet */ |
| break; |
| default: |
| unreachable("not supported shader stage"); |
| } |
| |
| next_stage = stages[stage]; |
| } |
| |
| /* Compiling to vir. Note that at this point we are compiling a default |
| * variant. Binding to textures, and other stuff (that would need a |
| * cmd_buffer) would need a recompile |
| */ |
| for (int stage = MESA_SHADER_STAGES - 1; stage >= 0; stage--) { |
| if (stages[stage] == NULL || stages[stage]->entrypoint == NULL) |
| continue; |
| |
| struct v3dv_pipeline_stage *p_stage = stages[stage]; |
| |
| pipeline_lower_nir(pipeline, p_stage, pipeline->layout); |
| |
| switch(stage) { |
| case MESA_SHADER_VERTEX: { |
| /* Right now we only support pipelines with both vertex and fragment |
| * shader. |
| */ |
| assert(pipeline->fs); |
| |
| /* Make sure we do all our common lowering *before* we create the vs |
| * and vs_bin pipeline stages, since from that point forward we need to |
| * run lowerings for both of them separately, since each stage will |
| * own its NIR code. |
| */ |
| lower_vs_io(p_stage->nir); |
| |
| pipeline->vs = p_stage; |
| pipeline->vs_bin = pipeline_stage_create_vs_bin(pipeline->vs, pAllocator); |
| |
| /* FIXME: likely this to be moved to a gather info method to a full |
| * struct inside pipeline_stage |
| */ |
| const VkPipelineInputAssemblyStateCreateInfo *ia_info = |
| pCreateInfo->pInputAssemblyState; |
| pipeline->vs->topology = vk_to_pipe_prim_type[ia_info->topology]; |
| |
| /* Note that at this point we would compile twice, one for vs and |
| * other for vs_bin. For now we are maintaining two pipeline_stages. |
| * |
| * FIXME: this leads to two caches, when it shouldnt, revisit |
| */ |
| struct v3d_vs_key *key = &pipeline->vs->key.vs; |
| pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs); |
| VkResult vk_result; |
| pipeline->vs->current_variant = |
| v3dv_get_shader_variant(pipeline->vs, &key->base, sizeof(*key), |
| pAllocator, &vk_result); |
| if (vk_result != VK_SUCCESS) |
| return vk_result; |
| |
| key = &pipeline->vs_bin->key.vs; |
| pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs_bin); |
| pipeline->vs_bin->current_variant = |
| v3dv_get_shader_variant(pipeline->vs_bin, &key->base, sizeof(*key), |
| pAllocator, &vk_result); |
| if (vk_result != VK_SUCCESS) |
| return vk_result; |
| |
| break; |
| } |
| case MESA_SHADER_FRAGMENT: { |
| struct v3d_fs_key *key = &p_stage->key.fs; |
| |
| pipeline->fs = p_stage; |
| |
| pipeline_populate_v3d_fs_key(key, pCreateInfo, p_stage, |
| get_ucp_enable_mask(stages)); |
| |
| lower_fs_io(p_stage->nir); |
| |
| VkResult vk_result; |
| p_stage->current_variant = |
| v3dv_get_shader_variant(p_stage, &key->base, sizeof(*key), |
| pAllocator, &vk_result); |
| if (vk_result != VK_SUCCESS) |
| return vk_result; |
| |
| break; |
| } |
| default: |
| unreachable("not supported shader stage"); |
| } |
| } |
| |
| /* FIXME: values below are default when non-GS is available. Would need to |
| * provide real values if GS gets supported |
| */ |
| pipeline->vpm_cfg_bin.As = 1; |
| pipeline->vpm_cfg_bin.Ve = 0; |
| pipeline->vpm_cfg_bin.Vc = |
| pipeline->vs_bin->current_variant->prog_data.vs->vcm_cache_size; |
| |
| pipeline->vpm_cfg.As = 1; |
| pipeline->vpm_cfg.Ve = 0; |
| pipeline->vpm_cfg.Vc = |
| pipeline->vs->current_variant->prog_data.vs->vcm_cache_size; |
| |
| return VK_SUCCESS; |
| } |
| |
| static unsigned |
| v3dv_dynamic_state_mask(VkDynamicState state) |
| { |
| switch(state) { |
| case VK_DYNAMIC_STATE_VIEWPORT: |
| return V3DV_DYNAMIC_VIEWPORT; |
| case VK_DYNAMIC_STATE_SCISSOR: |
| return V3DV_DYNAMIC_SCISSOR; |
| case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: |
| return V3DV_DYNAMIC_STENCIL_COMPARE_MASK; |
| case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: |
| return V3DV_DYNAMIC_STENCIL_WRITE_MASK; |
| case VK_DYNAMIC_STATE_STENCIL_REFERENCE: |
| return V3DV_DYNAMIC_STENCIL_REFERENCE; |
| case VK_DYNAMIC_STATE_BLEND_CONSTANTS: |
| return V3DV_DYNAMIC_BLEND_CONSTANTS; |
| case VK_DYNAMIC_STATE_DEPTH_BIAS: |
| return V3DV_DYNAMIC_DEPTH_BIAS; |
| case VK_DYNAMIC_STATE_LINE_WIDTH: |
| return V3DV_DYNAMIC_LINE_WIDTH; |
| |
| /* Depth bounds testing is not available in in V3D 4.2 so here we are just |
| * ignoring this dynamic state. We are already asserting at pipeline creation |
| * time that depth bounds testing is not enabled. |
| */ |
| case VK_DYNAMIC_STATE_DEPTH_BOUNDS: |
| return 0; |
| |
| default: |
| unreachable("Unhandled dynamic state"); |
| } |
| } |
| |
| static void |
| pipeline_init_dynamic_state( |
| struct v3dv_pipeline *pipeline, |
| const VkPipelineDynamicStateCreateInfo *pDynamicState, |
| const VkPipelineViewportStateCreateInfo *pViewportState, |
| const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState, |
| const VkPipelineColorBlendStateCreateInfo *pColorBlendState, |
| const VkPipelineRasterizationStateCreateInfo *pRasterizationState) |
| { |
| pipeline->dynamic_state = default_dynamic_state; |
| struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state; |
| |
| /* Create a mask of enabled dynamic states */ |
| uint32_t dynamic_states = 0; |
| if (pDynamicState) { |
| uint32_t count = pDynamicState->dynamicStateCount; |
| for (uint32_t s = 0; s < count; s++) { |
| dynamic_states |= |
| v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]); |
| } |
| } |
| |
| /* For any pipeline states that are not dynamic, set the dynamic state |
| * from the static pipeline state. |
| */ |
| if (pViewportState) { |
| if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) { |
| dynamic->viewport.count = pViewportState->viewportCount; |
| typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports, |
| pViewportState->viewportCount); |
| |
| for (uint32_t i = 0; i < dynamic->viewport.count; i++) { |
| v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i], |
| dynamic->viewport.scale[i], |
| dynamic->viewport.translate[i]); |
| } |
| } |
| |
| if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) { |
| dynamic->scissor.count = pViewportState->scissorCount; |
| typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors, |
| pViewportState->scissorCount); |
| } |
| } |
| |
| if (pDepthStencilState) { |
| if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) { |
| dynamic->stencil_compare_mask.front = |
| pDepthStencilState->front.compareMask; |
| dynamic->stencil_compare_mask.back = |
| pDepthStencilState->back.compareMask; |
| } |
| |
| if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) { |
| dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask; |
| dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask; |
| } |
| |
| if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) { |
| dynamic->stencil_reference.front = pDepthStencilState->front.reference; |
| dynamic->stencil_reference.back = pDepthStencilState->back.reference; |
| } |
| } |
| |
| if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) { |
| memcpy(dynamic->blend_constants, pColorBlendState->blendConstants, |
| sizeof(dynamic->blend_constants)); |
| } |
| |
| if (pRasterizationState) { |
| if (pRasterizationState->depthBiasEnable && |
| !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) { |
| dynamic->depth_bias.constant_factor = |
| pRasterizationState->depthBiasConstantFactor; |
| dynamic->depth_bias.slope_factor = |
| pRasterizationState->depthBiasSlopeFactor; |
| } |
| if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH)) |
| dynamic->line_width = pRasterizationState->lineWidth; |
| } |
| |
| pipeline->dynamic_state.mask = dynamic_states; |
| } |
| |
| static uint8_t |
| blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants) |
| { |
| switch (factor) { |
| case VK_BLEND_FACTOR_ZERO: |
| case VK_BLEND_FACTOR_ONE: |
| case VK_BLEND_FACTOR_SRC_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: |
| case VK_BLEND_FACTOR_DST_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: |
| case VK_BLEND_FACTOR_SRC_ALPHA: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: |
| case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: |
| return factor; |
| case VK_BLEND_FACTOR_CONSTANT_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: |
| case VK_BLEND_FACTOR_CONSTANT_ALPHA: |
| case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: |
| *needs_constants = true; |
| return factor; |
| case VK_BLEND_FACTOR_DST_ALPHA: |
| return dst_alpha_one ? V3D_BLEND_FACTOR_ONE : |
| V3D_BLEND_FACTOR_DST_ALPHA; |
| case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: |
| return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO : |
| V3D_BLEND_FACTOR_INV_DST_ALPHA; |
| case VK_BLEND_FACTOR_SRC1_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: |
| case VK_BLEND_FACTOR_SRC1_ALPHA: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: |
| assert(!"Invalid blend factor: dual source blending not supported."); |
| default: |
| assert(!"Unknown blend factor."); |
| } |
| |
| /* Should be handled by the switch, added to avoid a "end of non-void |
| * function" error |
| */ |
| unreachable("Unknown blend factor."); |
| } |
| |
| static void |
| pack_blend(struct v3dv_pipeline *pipeline, |
| const VkPipelineColorBlendStateCreateInfo *cb_info) |
| { |
| /* By default, we are not enabling blending and all color channel writes are |
| * enabled. Color write enables are independent of whether blending is |
| * enabled or not. |
| * |
| * Vulkan specifies color write masks so that bits set correspond to |
| * enabled channels. Our hardware does it the other way around. |
| */ |
| pipeline->blend.enables = 0; |
| pipeline->blend.color_write_masks = 0; /* All channels enabled */ |
| |
| if (!cb_info) |
| return; |
| |
| assert(pipeline->subpass); |
| if (pipeline->subpass->color_count == 0) |
| return; |
| |
| assert(pipeline->subpass->color_count == cb_info->attachmentCount); |
| |
| pipeline->blend.needs_color_constants = false; |
| uint32_t color_write_masks = 0; |
| for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) { |
| const VkPipelineColorBlendAttachmentState *b_state = |
| &cb_info->pAttachments[i]; |
| |
| uint32_t attachment_idx = |
| pipeline->subpass->color_attachments[i].attachment; |
| if (attachment_idx == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i); |
| |
| if (!b_state->blendEnable) |
| continue; |
| |
| VkAttachmentDescription *desc = |
| &pipeline->pass->attachments[attachment_idx].desc; |
| const struct v3dv_format *format = v3dv_get_format(desc->format); |
| bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1); |
| |
| uint8_t rt_mask = 1 << i; |
| pipeline->blend.enables |= rt_mask; |
| |
| v3dv_pack(pipeline->blend.cfg[i], BLEND_CFG, config) { |
| config.render_target_mask = rt_mask; |
| |
| config.color_blend_mode = b_state->colorBlendOp; |
| config.color_blend_dst_factor = |
| blend_factor(b_state->dstColorBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants); |
| config.color_blend_src_factor = |
| blend_factor(b_state->srcColorBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants); |
| |
| config.alpha_blend_mode = b_state->alphaBlendOp; |
| config.alpha_blend_dst_factor = |
| blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants); |
| config.alpha_blend_src_factor = |
| blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants); |
| } |
| } |
| |
| pipeline->blend.color_write_masks = color_write_masks; |
| } |
| |
| /* This requires that pack_blend() had been called before so we can set |
| * the overall blend enable bit in the CFG_BITS packet. |
| */ |
| static void |
| pack_cfg_bits(struct v3dv_pipeline *pipeline, |
| const VkPipelineDepthStencilStateCreateInfo *ds_info, |
| const VkPipelineRasterizationStateCreateInfo *rs_info) |
| { |
| assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS)); |
| |
| v3dv_pack(pipeline->cfg_bits, CFG_BITS, config) { |
| config.enable_forward_facing_primitive = |
| rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false; |
| |
| config.enable_reverse_facing_primitive = |
| rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false; |
| |
| /* Seems like the hardware is backwards regarding this setting... */ |
| config.clockwise_primitives = |
| rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false; |
| |
| config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false; |
| |
| /* This is required to pass line rasterization tests in CTS while |
| * exposing, at least, a minimum of 4-bits of subpixel precision |
| * (the minimum requirement). |
| */ |
| config.line_rasterization = 1; /* perp end caps */ |
| |
| if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) { |
| config.direct3d_wireframe_triangles_mode = true; |
| config.direct3d_point_fill_mode = |
| rs_info->polygonMode == VK_POLYGON_MODE_POINT; |
| } |
| |
| /* FIXME: oversample_mode postponed until msaa gets supported */ |
| config.rasterizer_oversample_mode = false; |
| |
| /* From the Vulkan spec: |
| * |
| * "Provoking Vertex: |
| * |
| * The vertex in a primitive from which flat shaded attribute |
| * values are taken. This is generally the “first” vertex in the |
| * primitive, and depends on the primitive topology." |
| * |
| * First vertex is the Direct3D style for provoking vertex. OpenGL uses |
| * the last vertex by default. |
| */ |
| config.direct3d_provoking_vertex = true; |
| |
| config.blend_enable = pipeline->blend.enables != 0; |
| |
| /* Disable depth/stencil if we don't have a D/S attachment */ |
| bool has_ds_attachment = |
| pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED; |
| |
| if (ds_info && ds_info->depthTestEnable && has_ds_attachment) { |
| config.z_updates_enable = true; |
| config.depth_test_function = ds_info->depthCompareOp; |
| } else { |
| config.depth_test_function = VK_COMPARE_OP_ALWAYS; |
| } |
| |
| /* EZ state will be updated at draw time based on bound pipeline state */ |
| config.early_z_updates_enable = false; |
| config.early_z_enable = false; |
| |
| config.stencil_enable = |
| ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false; |
| }; |
| } |
| |
| static uint32_t |
| translate_stencil_op(enum pipe_stencil_op op) |
| { |
| switch (op) { |
| case VK_STENCIL_OP_KEEP: |
| return V3D_STENCIL_OP_KEEP; |
| case VK_STENCIL_OP_ZERO: |
| return V3D_STENCIL_OP_ZERO; |
| case VK_STENCIL_OP_REPLACE: |
| return V3D_STENCIL_OP_REPLACE; |
| case VK_STENCIL_OP_INCREMENT_AND_CLAMP: |
| return V3D_STENCIL_OP_INCR; |
| case VK_STENCIL_OP_DECREMENT_AND_CLAMP: |
| return V3D_STENCIL_OP_DECR; |
| case VK_STENCIL_OP_INVERT: |
| return V3D_STENCIL_OP_INVERT; |
| case VK_STENCIL_OP_INCREMENT_AND_WRAP: |
| return V3D_STENCIL_OP_INCWRAP; |
| case VK_STENCIL_OP_DECREMENT_AND_WRAP: |
| return V3D_STENCIL_OP_DECWRAP; |
| default: |
| unreachable("bad stencil op"); |
| } |
| } |
| |
| static void |
| pack_single_stencil_cfg(struct v3dv_pipeline *pipeline, |
| uint8_t *stencil_cfg, |
| bool is_front, |
| bool is_back, |
| const VkStencilOpState *stencil_state) |
| { |
| /* From the Vulkan spec: |
| * |
| * "Reference is an integer reference value that is used in the unsigned |
| * stencil comparison. The reference value used by stencil comparison |
| * must be within the range [0,2^s-1] , where s is the number of bits in |
| * the stencil framebuffer attachment, otherwise the reference value is |
| * considered undefined." |
| * |
| * In our case, 's' is always 8, so we clamp to that to prevent our packing |
| * functions to assert in debug mode if they see larger values. |
| * |
| * If we have dynamic state we need to make sure we set the corresponding |
| * state bits to 0, since cl_emit_with_prepacked ORs the new value with |
| * the old. |
| */ |
| const uint8_t write_mask = |
| pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ? |
| 0 : stencil_state->writeMask & 0xff; |
| |
| const uint8_t compare_mask = |
| pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ? |
| 0 : stencil_state->compareMask & 0xff; |
| |
| const uint8_t reference = |
| pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ? |
| 0 : stencil_state->reference & 0xff; |
| |
| v3dv_pack(stencil_cfg, STENCIL_CFG, config) { |
| config.front_config = is_front; |
| config.back_config = is_back; |
| config.stencil_write_mask = write_mask; |
| config.stencil_test_mask = compare_mask; |
| config.stencil_test_function = stencil_state->compareOp; |
| config.stencil_pass_op = translate_stencil_op(stencil_state->passOp); |
| config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp); |
| config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp); |
| config.stencil_ref_value = reference; |
| } |
| } |
| |
| static void |
| pack_stencil_cfg(struct v3dv_pipeline *pipeline, |
| const VkPipelineDepthStencilStateCreateInfo *ds_info) |
| { |
| assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG)); |
| |
| if (!ds_info || !ds_info->stencilTestEnable) |
| return; |
| |
| if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) |
| return; |
| |
| const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK | |
| V3DV_DYNAMIC_STENCIL_WRITE_MASK | |
| V3DV_DYNAMIC_STENCIL_REFERENCE; |
| |
| |
| /* If front != back or we have dynamic stencil state we can't emit a single |
| * packet for both faces. |
| */ |
| bool needs_front_and_back = false; |
| if ((pipeline->dynamic_state.mask & dynamic_stencil_states) || |
| memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front))) |
| needs_front_and_back = true; |
| |
| /* If the front and back configurations are the same we can emit both with |
| * a single packet. |
| */ |
| pipeline->emit_stencil_cfg[0] = true; |
| if (!needs_front_and_back) { |
| pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], |
| true, true, &ds_info->front); |
| } else { |
| pipeline->emit_stencil_cfg[1] = true; |
| pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], |
| true, false, &ds_info->front); |
| pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1], |
| false, true, &ds_info->back); |
| } |
| } |
| |
| static bool |
| stencil_op_is_no_op(const VkStencilOpState *stencil) |
| { |
| return stencil->depthFailOp == VK_STENCIL_OP_KEEP && |
| stencil->compareOp == VK_COMPARE_OP_ALWAYS; |
| } |
| |
| static void |
| enable_depth_bias(struct v3dv_pipeline *pipeline, |
| const VkPipelineRasterizationStateCreateInfo *rs_info) |
| { |
| pipeline->depth_bias.enabled = false; |
| pipeline->depth_bias.is_z16 = false; |
| |
| if (!rs_info || !rs_info->depthBiasEnable) |
| return; |
| |
| /* Check the depth/stencil attachment description for the subpass used with |
| * this pipeline. |
| */ |
| assert(pipeline->pass && pipeline->subpass); |
| struct v3dv_render_pass *pass = pipeline->pass; |
| struct v3dv_subpass *subpass = pipeline->subpass; |
| |
| if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) |
| return; |
| |
| assert(subpass->ds_attachment.attachment < pass->attachment_count); |
| struct v3dv_render_pass_attachment *att = |
| &pass->attachments[subpass->ds_attachment.attachment]; |
| |
| if (att->desc.format == VK_FORMAT_D16_UNORM) |
| pipeline->depth_bias.is_z16 = true; |
| |
| pipeline->depth_bias.enabled = true; |
| } |
| |
| static void |
| pipeline_set_ez_state(struct v3dv_pipeline *pipeline, |
| const VkPipelineDepthStencilStateCreateInfo *ds_info) |
| { |
| if (!ds_info || !ds_info->depthTestEnable) { |
| pipeline->ez_state = VC5_EZ_DISABLED; |
| return; |
| } |
| |
| switch (ds_info->depthCompareOp) { |
| case VK_COMPARE_OP_LESS: |
| case VK_COMPARE_OP_LESS_OR_EQUAL: |
| pipeline->ez_state = VC5_EZ_LT_LE; |
| break; |
| case VK_COMPARE_OP_GREATER: |
| case VK_COMPARE_OP_GREATER_OR_EQUAL: |
| pipeline->ez_state = VC5_EZ_GT_GE; |
| break; |
| case VK_COMPARE_OP_NEVER: |
| case VK_COMPARE_OP_EQUAL: |
| pipeline->ez_state = VC5_EZ_UNDECIDED; |
| break; |
| default: |
| pipeline->ez_state = VC5_EZ_DISABLED; |
| break; |
| } |
| |
| /* If stencil is enabled and is not a no-op, we need to disable EZ */ |
| if (ds_info->stencilTestEnable && |
| (!stencil_op_is_no_op(&ds_info->front) || |
| !stencil_op_is_no_op(&ds_info->back))) { |
| pipeline->ez_state = VC5_EZ_DISABLED; |
| } |
| } |
| |
| static void |
| pack_shader_state_record(struct v3dv_pipeline *pipeline) |
| { |
| assert(sizeof(pipeline->shader_state_record) == |
| cl_packet_length(GL_SHADER_STATE_RECORD)); |
| |
| struct v3d_fs_prog_data *prog_data_fs = |
| pipeline->fs->current_variant->prog_data.fs; |
| |
| struct v3d_vs_prog_data *prog_data_vs = |
| pipeline->vs->current_variant->prog_data.vs; |
| |
| struct v3d_vs_prog_data *prog_data_vs_bin = |
| pipeline->vs_bin->current_variant->prog_data.vs; |
| |
| |
| /* Note: we are not packing addresses, as we need the job (see |
| * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this |
| * point as they depend on dynamic info that can be set after create the |
| * pipeline (like viewport), . Would need to be filled later, so we are |
| * doing a partial prepacking. |
| */ |
| v3dv_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) { |
| shader.enable_clipping = true; |
| |
| shader.point_size_in_shaded_vertex_data = |
| pipeline->vs->topology == PIPE_PRIM_POINTS; |
| |
| /* Must be set if the shader modifies Z, discards, or modifies |
| * the sample mask. For any of these cases, the fragment |
| * shader needs to write the Z value (even just discards). |
| */ |
| shader.fragment_shader_does_z_writes = prog_data_fs->writes_z; |
| /* Set if the EZ test must be disabled (due to shader side |
| * effects and the early_z flag not being present in the |
| * shader). |
| */ |
| shader.turn_off_early_z_test = prog_data_fs->disable_ez; |
| |
| shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = |
| prog_data_fs->uses_center_w; |
| |
| shader.any_shader_reads_hardware_written_primitive_id = false; |
| |
| shader.do_scoreboard_wait_on_first_thread_switch = |
| prog_data_fs->lock_scoreboard_on_first_thrsw; |
| shader.disable_implicit_point_line_varyings = |
| !prog_data_fs->uses_implicit_point_line_varyings; |
| |
| shader.number_of_varyings_in_fragment_shader = |
| prog_data_fs->num_inputs; |
| |
| shader.coordinate_shader_propagate_nans = true; |
| shader.vertex_shader_propagate_nans = true; |
| shader.fragment_shader_propagate_nans = true; |
| |
| /* Note: see previous note about adresses */ |
| /* shader.coordinate_shader_code_address */ |
| /* shader.vertex_shader_code_address */ |
| /* shader.fragment_shader_code_address */ |
| |
| /* FIXME: Use combined input/output size flag in the common case (also |
| * on v3d, see v3dx_draw). |
| */ |
| shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = |
| prog_data_vs_bin->separate_segments; |
| shader.vertex_shader_has_separate_input_and_output_vpm_blocks = |
| prog_data_vs->separate_segments; |
| |
| shader.coordinate_shader_input_vpm_segment_size = |
| prog_data_vs_bin->separate_segments ? |
| prog_data_vs_bin->vpm_input_size : 1; |
| shader.vertex_shader_input_vpm_segment_size = |
| prog_data_vs->separate_segments ? |
| prog_data_vs->vpm_input_size : 1; |
| |
| shader.coordinate_shader_output_vpm_segment_size = |
| prog_data_vs_bin->vpm_output_size; |
| shader.vertex_shader_output_vpm_segment_size = |
| prog_data_vs->vpm_output_size; |
| |
| /* Note: see previous note about adresses */ |
| /* shader.coordinate_shader_uniforms_address */ |
| /* shader.vertex_shader_uniforms_address */ |
| /* shader.fragment_shader_uniforms_address */ |
| |
| shader.min_coord_shader_input_segments_required_in_play = |
| pipeline->vpm_cfg_bin.As; |
| shader.min_vertex_shader_input_segments_required_in_play = |
| pipeline->vpm_cfg.As; |
| |
| shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = |
| pipeline->vpm_cfg_bin.Ve; |
| shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = |
| pipeline->vpm_cfg.Ve; |
| |
| shader.coordinate_shader_4_way_threadable = |
| prog_data_vs_bin->base.threads == 4; |
| shader.vertex_shader_4_way_threadable = |
| prog_data_vs->base.threads == 4; |
| shader.fragment_shader_4_way_threadable = |
| prog_data_fs->base.threads == 4; |
| |
| shader.coordinate_shader_start_in_final_thread_section = |
| prog_data_vs_bin->base.single_seg; |
| shader.vertex_shader_start_in_final_thread_section = |
| prog_data_vs->base.single_seg; |
| shader.fragment_shader_start_in_final_thread_section = |
| prog_data_fs->base.single_seg; |
| |
| shader.vertex_id_read_by_coordinate_shader = |
| prog_data_vs_bin->uses_vid; |
| shader.base_instance_id_read_by_coordinate_shader = |
| prog_data_vs_bin->uses_biid; |
| shader.instance_id_read_by_coordinate_shader = |
| prog_data_vs_bin->uses_iid; |
| shader.vertex_id_read_by_vertex_shader = |
| prog_data_vs->uses_vid; |
| shader.base_instance_id_read_by_vertex_shader = |
| prog_data_vs->uses_biid; |
| shader.instance_id_read_by_vertex_shader = |
| prog_data_vs->uses_iid; |
| |
| /* Note: see previous note about adresses */ |
| /* shader.address_of_default_attribute_values */ |
| } |
| } |
| |
| static void |
| pack_vcm_cache_size(struct v3dv_pipeline *pipeline) |
| { |
| assert(sizeof(pipeline->vcm_cache_size) == |
| cl_packet_length(VCM_CACHE_SIZE)); |
| |
| v3dv_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) { |
| vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc; |
| vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc; |
| } |
| } |
| |
| /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */ |
| static uint8_t |
| get_attr_type(const struct util_format_description *desc) |
| { |
| uint32_t r_size = desc->channel[0].size; |
| uint8_t attr_type = ATTRIBUTE_FLOAT; |
| |
| switch (desc->channel[0].type) { |
| case UTIL_FORMAT_TYPE_FLOAT: |
| if (r_size == 32) { |
| attr_type = ATTRIBUTE_FLOAT; |
| } else { |
| assert(r_size == 16); |
| attr_type = ATTRIBUTE_HALF_FLOAT; |
| } |
| break; |
| |
| case UTIL_FORMAT_TYPE_SIGNED: |
| case UTIL_FORMAT_TYPE_UNSIGNED: |
| switch (r_size) { |
| case 32: |
| attr_type = ATTRIBUTE_INT; |
| break; |
| case 16: |
| attr_type = ATTRIBUTE_SHORT; |
| break; |
| case 10: |
| attr_type = ATTRIBUTE_INT2_10_10_10; |
| break; |
| case 8: |
| attr_type = ATTRIBUTE_BYTE; |
| break; |
| default: |
| fprintf(stderr, |
| "format %s unsupported\n", |
| desc->name); |
| attr_type = ATTRIBUTE_BYTE; |
| abort(); |
| } |
| break; |
| |
| default: |
| fprintf(stderr, |
| "format %s unsupported\n", |
| desc->name); |
| abort(); |
| } |
| |
| return attr_type; |
| } |
| |
| static bool |
| create_default_attribute_values(struct v3dv_pipeline *pipeline, |
| const VkPipelineVertexInputStateCreateInfo *vi_info) |
| { |
| uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; |
| |
| if (pipeline->default_attribute_values == NULL) { |
| pipeline->default_attribute_values = v3dv_bo_alloc(pipeline->device, size, |
| "default_vi_attributes", |
| true); |
| |
| if (!pipeline->default_attribute_values) { |
| fprintf(stderr, "failed to allocate memory for the default " |
| "attribute values\n"); |
| return false; |
| } |
| } |
| |
| bool ok = v3dv_bo_map(pipeline->device, |
| pipeline->default_attribute_values, size); |
| if (!ok) { |
| fprintf(stderr, "failed to map default attribute values buffer\n"); |
| return false; |
| } |
| |
| uint32_t *attrs = pipeline->default_attribute_values->map; |
| |
| for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) { |
| attrs[i * 4 + 0] = 0; |
| attrs[i * 4 + 1] = 0; |
| attrs[i * 4 + 2] = 0; |
| if (i < pipeline->va_count && vk_format_is_int(pipeline->va[i].vk_format)) { |
| attrs[i * 4 + 3] = 1; |
| } else { |
| attrs[i * 4 + 3] = fui(1.0); |
| } |
| } |
| |
| v3dv_bo_unmap(pipeline->device, pipeline->default_attribute_values); |
| |
| return true; |
| } |
| |
| static void |
| pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline, |
| uint32_t index, |
| const VkVertexInputAttributeDescription *vi_desc) |
| { |
| const uint32_t packet_length = |
| cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); |
| |
| const struct util_format_description *desc = |
| vk_format_description(vi_desc->format); |
| |
| uint32_t binding = vi_desc->binding; |
| |
| v3dv_pack(&pipeline->vertex_attrs[index * packet_length], |
| GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { |
| |
| /* vec_size == 0 means 4 */ |
| attr.vec_size = desc->nr_channels & 3; |
| attr.signed_int_type = (desc->channel[0].type == |
| UTIL_FORMAT_TYPE_SIGNED); |
| attr.normalized_int_type = desc->channel[0].normalized; |
| attr.read_as_int_uint = desc->channel[0].pure_integer; |
| |
| attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor, |
| 0xffff); |
| attr.stride = pipeline->vb[binding].stride; |
| attr.type = get_attr_type(desc); |
| } |
| } |
| |
| static VkResult |
| pipeline_init(struct v3dv_pipeline *pipeline, |
| struct v3dv_device *device, |
| const VkGraphicsPipelineCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| VkResult result = VK_SUCCESS; |
| |
| pipeline->device = device; |
| |
| V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout); |
| pipeline->layout = layout; |
| |
| V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass); |
| assert(pCreateInfo->subpass < render_pass->subpass_count); |
| pipeline->pass = render_pass; |
| pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; |
| |
| /* If rasterization is not enabled, various CreateInfo structs must be |
| * ignored. |
| */ |
| const bool raster_enabled = |
| !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; |
| |
| const VkPipelineViewportStateCreateInfo *vp_info = |
| raster_enabled ? pCreateInfo->pViewportState : NULL; |
| |
| const VkPipelineDepthStencilStateCreateInfo *ds_info = |
| raster_enabled ? pCreateInfo->pDepthStencilState : NULL; |
| |
| const VkPipelineRasterizationStateCreateInfo *rs_info = |
| raster_enabled ? pCreateInfo->pRasterizationState : NULL; |
| |
| const VkPipelineColorBlendStateCreateInfo *cb_info = |
| raster_enabled ? pCreateInfo->pColorBlendState : NULL; |
| |
| pipeline_init_dynamic_state(pipeline, |
| pCreateInfo->pDynamicState, |
| vp_info, ds_info, cb_info, rs_info); |
| |
| /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that |
| * feature and it shouldn't be used by any pipeline. |
| */ |
| assert(!ds_info || !ds_info->depthBoundsTestEnable); |
| |
| pack_blend(pipeline, cb_info); |
| pack_cfg_bits(pipeline, ds_info, rs_info); |
| pack_stencil_cfg(pipeline, ds_info); |
| pipeline_set_ez_state(pipeline, ds_info); |
| enable_depth_bias(pipeline, rs_info); |
| |
| pipeline->primitive_restart = |
| pCreateInfo->pInputAssemblyState->primitiveRestartEnable; |
| |
| result = pipeline_compile_graphics(pipeline, pCreateInfo, pAllocator); |
| |
| if (result != VK_SUCCESS) { |
| /* Caller would already destroy the pipeline, and we didn't allocate any |
| * extra info. We don't need to do anything else. |
| */ |
| return result; |
| } |
| |
| pack_shader_state_record(pipeline); |
| pack_vcm_cache_size(pipeline); |
| |
| const VkPipelineVertexInputStateCreateInfo *vi_info = |
| pCreateInfo->pVertexInputState; |
| |
| pipeline->vb_count = vi_info->vertexBindingDescriptionCount; |
| for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { |
| const VkVertexInputBindingDescription *desc = |
| &vi_info->pVertexBindingDescriptions[i]; |
| |
| pipeline->vb[desc->binding].stride = desc->stride; |
| pipeline->vb[desc->binding].instance_divisor = desc->inputRate; |
| } |
| |
| pipeline->va_count = 0; |
| nir_shader *shader = pipeline->vs->nir; |
| |
| for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { |
| const VkVertexInputAttributeDescription *desc = |
| &vi_info->pVertexAttributeDescriptions[i]; |
| uint32_t location = desc->location + VERT_ATTRIB_GENERIC0; |
| |
| nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_in, location); |
| |
| if (var != NULL) { |
| unsigned driver_location = var->data.driver_location; |
| |
| assert(driver_location < MAX_VERTEX_ATTRIBS); |
| pipeline->va[driver_location].offset = desc->offset; |
| pipeline->va[driver_location].binding = desc->binding; |
| pipeline->va[driver_location].vk_format = desc->format; |
| |
| pack_shader_state_attribute_record(pipeline, driver_location, desc); |
| |
| pipeline->va_count++; |
| } |
| } |
| |
| if (!create_default_attribute_values(pipeline, vi_info)) |
| return VK_ERROR_OUT_OF_DEVICE_MEMORY; |
| |
| return result; |
| } |
| |
| static VkResult |
| graphics_pipeline_create(VkDevice _device, |
| VkPipelineCache _cache, |
| const VkGraphicsPipelineCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, |
| VkPipeline *pPipeline) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| |
| struct v3dv_pipeline *pipeline; |
| VkResult result; |
| |
| pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (pipeline == NULL) |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| result = pipeline_init(pipeline, device, |
| pCreateInfo, |
| pAllocator); |
| |
| if (result != VK_SUCCESS) { |
| v3dv_destroy_pipeline(pipeline, device, pAllocator); |
| return result; |
| } |
| |
| *pPipeline = v3dv_pipeline_to_handle(pipeline); |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| v3dv_CreateGraphicsPipelines(VkDevice _device, |
| VkPipelineCache pipelineCache, |
| uint32_t count, |
| const VkGraphicsPipelineCreateInfo *pCreateInfos, |
| const VkAllocationCallbacks *pAllocator, |
| VkPipeline *pPipelines) |
| { |
| VkResult result = VK_SUCCESS; |
| |
| for (uint32_t i = 0; i < count; i++) { |
| VkResult local_result; |
| |
| local_result = graphics_pipeline_create(_device, |
| pipelineCache, |
| &pCreateInfos[i], |
| pAllocator, |
| &pPipelines[i]); |
| |
| if (local_result != VK_SUCCESS) { |
| result = local_result; |
| pPipelines[i] = VK_NULL_HANDLE; |
| } |
| } |
| |
| return result; |
| } |
| |
| static void |
| shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) |
| { |
| assert(glsl_type_is_vector_or_scalar(type)); |
| |
| uint32_t comp_size = glsl_type_is_boolean(type) |
| ? 4 : glsl_get_bit_size(type) / 8; |
| unsigned length = glsl_get_vector_elements(type); |
| *size = comp_size * length, |
| *align = comp_size * (length == 3 ? 4 : length); |
| } |
| |
| static void |
| lower_cs_shared(struct nir_shader *nir) |
| { |
| NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, |
| nir_var_mem_shared, shared_type_info); |
| NIR_PASS_V(nir, nir_lower_explicit_io, |
| nir_var_mem_shared, nir_address_format_32bit_offset); |
| } |
| |
| static VkResult |
| pipeline_compile_compute(struct v3dv_pipeline *pipeline, |
| const VkComputePipelineCreateInfo *info, |
| const VkAllocationCallbacks *alloc) |
| { |
| struct v3dv_device *device = pipeline->device; |
| struct v3dv_physical_device *physical_device = |
| &device->instance->physicalDevice; |
| |
| const VkPipelineShaderStageCreateInfo *sinfo = &info->stage; |
| gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); |
| |
| struct v3dv_pipeline_stage *p_stage = |
| vk_zalloc2(&device->alloc, alloc, sizeof(*p_stage), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!p_stage) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); |
| p_stage->compiled_variant_count = 0; |
| p_stage->cache = create_variant_cache(MESA_SHADER_COMPUTE); |
| p_stage->pipeline = pipeline; |
| p_stage->stage = stage; |
| p_stage->entrypoint = sinfo->pName; |
| p_stage->module = v3dv_shader_module_from_handle(sinfo->module); |
| p_stage->spec_info = sinfo->pSpecializationInfo; |
| p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage); |
| |
| pipeline->active_stages |= sinfo->stage; |
| st_nir_opts(p_stage->nir); |
| pipeline_lower_nir(pipeline, p_stage, pipeline->layout); |
| lower_cs_shared(p_stage->nir); |
| |
| pipeline->cs = p_stage; |
| |
| struct v3d_key *key = &p_stage->key.base; |
| memset(key, 0, sizeof(*key)); |
| pipeline_populate_v3d_key(key, p_stage, 0); |
| |
| VkResult result; |
| p_stage->current_variant = |
| v3dv_get_shader_variant(p_stage, key, sizeof(*key), alloc, &result); |
| return result; |
| } |
| |
| static VkResult |
| compute_pipeline_init(struct v3dv_pipeline *pipeline, |
| struct v3dv_device *device, |
| const VkComputePipelineCreateInfo *info, |
| const VkAllocationCallbacks *alloc) |
| { |
| V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout); |
| |
| pipeline->device = device; |
| pipeline->layout = layout; |
| |
| VkResult result = pipeline_compile_compute(pipeline, info, alloc); |
| |
| return result; |
| } |
| |
| static VkResult |
| compute_pipeline_create(VkDevice _device, |
| VkPipelineCache _cache, |
| const VkComputePipelineCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, |
| VkPipeline *pPipeline) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| |
| struct v3dv_pipeline *pipeline; |
| VkResult result; |
| |
| pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (pipeline == NULL) |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| result = compute_pipeline_init(pipeline, device, pCreateInfo, pAllocator); |
| if (result != VK_SUCCESS) { |
| v3dv_destroy_pipeline(pipeline, device, pAllocator); |
| return result; |
| } |
| |
| *pPipeline = v3dv_pipeline_to_handle(pipeline); |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| v3dv_CreateComputePipelines(VkDevice _device, |
| VkPipelineCache pipelineCache, |
| uint32_t createInfoCount, |
| const VkComputePipelineCreateInfo *pCreateInfos, |
| const VkAllocationCallbacks *pAllocator, |
| VkPipeline *pPipelines) |
| { |
| VkResult result = VK_SUCCESS; |
| |
| for (uint32_t i = 0; i < createInfoCount; i++) { |
| VkResult local_result; |
| local_result = compute_pipeline_create(_device, |
| pipelineCache, |
| &pCreateInfos[i], |
| pAllocator, |
| &pPipelines[i]); |
| |
| if (local_result != VK_SUCCESS) { |
| result = local_result; |
| pPipelines[i] = VK_NULL_HANDLE; |
| } |
| } |
| |
| return result; |
| } |