| /* Copyright © 2024 Intel Corporation |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "anv_private.h" |
| |
| #include <math.h> |
| |
| #include "util/u_debug.h" |
| #include "util/half_float.h" |
| #include "util/u_atomic.h" |
| |
| #include "genxml/gen_macros.h" |
| #include "genxml/genX_pack.h" |
| #include "genxml/genX_rt_pack.h" |
| |
| #include "ds/intel_tracepoints.h" |
| |
| #include "bvh/anv_build_interface.h" |
| #include "vk_acceleration_structure.h" |
| #include "radix_sort/radix_sort_u64.h" |
| #include "radix_sort/common/vk/barrier.h" |
| |
| #include "vk_common_entrypoints.h" |
| #include "genX_mi_builder.h" |
| |
| #if GFX_VERx10 >= 125 |
| |
| /* Id to track bvh_dump */ |
| static uint32_t blas_id = 0; |
| static uint32_t tlas_id = 0; |
| |
| static void |
| begin_debug_marker(VkCommandBuffer commandBuffer, |
| enum vk_acceleration_structure_build_step step, |
| const char *format, ...) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| assert(cmd_buffer->state.rt.debug_marker_count < |
| ARRAY_SIZE(cmd_buffer->state.rt.debug_markers)); |
| cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count++] = |
| step; |
| switch (step) { |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP: |
| { |
| va_list args; |
| va_start(args, format); |
| cmd_buffer->state.rt.num_tlas = va_arg(args, uint32_t); |
| cmd_buffer->state.rt.num_blas = va_arg(args, uint32_t); |
| va_end(args); |
| trace_intel_begin_as_build(&cmd_buffer->trace); |
| break; |
| } |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES: |
| trace_intel_begin_as_build_leaves(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE: |
| trace_intel_begin_as_morton_generate(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT: |
| trace_intel_begin_as_morton_sort(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL: |
| trace_intel_begin_as_lbvh_build_internal(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL: |
| trace_intel_begin_as_ploc_build_internal(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE: |
| { |
| va_list args; |
| va_start(args, format); |
| cmd_buffer->state.rt.num_leaves = va_arg(args, uint32_t); |
| cmd_buffer->state.rt.num_ir_nodes = va_arg(args, uint32_t); |
| va_end(args); |
| trace_intel_begin_as_encode(&cmd_buffer->trace); |
| break; |
| } |
| default: |
| unreachable("Invalid build step"); |
| } |
| } |
| |
| static void |
| end_debug_marker(VkCommandBuffer commandBuffer) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| cmd_buffer->state.rt.debug_marker_count--; |
| switch (cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count]) { |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP: |
| trace_intel_end_as_build(&cmd_buffer->trace, |
| cmd_buffer->state.rt.num_tlas, |
| cmd_buffer->state.rt.num_blas); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES: |
| trace_intel_end_as_build_leaves(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE: |
| trace_intel_end_as_morton_generate(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT: |
| trace_intel_end_as_morton_sort(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL: |
| trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL: |
| trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace); |
| break; |
| case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE: |
| trace_intel_end_as_encode(&cmd_buffer->trace, cmd_buffer->state.rt.num_leaves, cmd_buffer->state.rt.num_ir_nodes); |
| break; |
| default: |
| unreachable("Invalid build step"); |
| } |
| } |
| |
| static void |
| add_bvh_dump(struct anv_cmd_buffer *cmd_buffer, |
| VkDeviceAddress src, |
| uint64_t dump_size, |
| VkGeometryTypeKHR geometry_type, |
| enum bvh_dump_type dump_type) |
| { |
| assert(dump_size % 4 == 0); |
| |
| struct anv_device *device = cmd_buffer->device; |
| struct anv_bo *bo = NULL; |
| |
| VkResult result = anv_device_alloc_bo(device, "bvh_dump", dump_size, |
| ANV_BO_ALLOC_MAPPED | |
| ANV_BO_ALLOC_HOST_CACHED_COHERENT, 0, |
| &bo); |
| if (result != VK_SUCCESS) { |
| printf("Failed to allocate bvh for dump\n"); |
| vk_command_buffer_set_error(&cmd_buffer->vk, result); |
| return; |
| } |
| |
| struct anv_bvh_dump *bvh_dump = malloc(sizeof(struct anv_bvh_dump)); |
| |
| bvh_dump->bo = bo; |
| bvh_dump->bvh_id = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ? |
| tlas_id : blas_id; |
| bvh_dump->dump_size = dump_size; |
| bvh_dump->geometry_type = geometry_type; |
| bvh_dump->dump_type = dump_type; |
| |
| struct anv_address dst_addr = { .bo = bvh_dump->bo, .offset = 0 }; |
| struct anv_address src_addr = anv_address_from_u64(src); |
| anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, bvh_dump->dump_size); |
| |
| pthread_mutex_lock(&device->mutex); |
| list_addtail(&bvh_dump->link, &device->bvh_dumps); |
| pthread_mutex_unlock(&device->mutex); |
| } |
| |
| static void |
| debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer, |
| VkDeviceAddress header_addr, |
| uint64_t bvh_anv_size, |
| VkDeviceAddress intermediate_header_addr, |
| VkDeviceAddress intermediate_as_addr, |
| uint32_t leaf_count, |
| VkGeometryTypeKHR geometry_type) |
| { |
| if (INTEL_DEBUG(DEBUG_BVH_BLAS) && |
| geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type, |
| BVH_ANV); |
| } |
| |
| if (INTEL_DEBUG(DEBUG_BVH_TLAS) && |
| geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type, |
| BVH_ANV); |
| } |
| |
| if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) && |
| geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| add_bvh_dump(cmd_buffer, intermediate_header_addr, |
| sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR); |
| } |
| |
| if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) && |
| geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| add_bvh_dump(cmd_buffer, intermediate_header_addr, |
| sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR); |
| } |
| |
| uint32_t internal_node_count = MAX2(leaf_count, 2) - 1; |
| uint64_t internal_node_total_size = sizeof(struct vk_ir_box_node) * |
| internal_node_count; |
| |
| if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) && |
| geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| uint64_t leaf_total_size; |
| |
| switch (geometry_type) { |
| case VK_GEOMETRY_TYPE_TRIANGLES_KHR: |
| leaf_total_size = sizeof(struct vk_ir_triangle_node) * leaf_count; |
| break; |
| case VK_GEOMETRY_TYPE_AABBS_KHR: |
| leaf_total_size = sizeof(struct vk_ir_aabb_node) * leaf_count; |
| break; |
| default: |
| unreachable("invalid geometry type"); |
| } |
| |
| add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size + |
| leaf_total_size, geometry_type, BVH_IR_AS); |
| } |
| |
| if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) && |
| geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| uint64_t leaf_total_size = sizeof(struct vk_ir_instance_node) * |
| leaf_count; |
| add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size + |
| leaf_total_size, geometry_type, BVH_IR_AS); |
| } |
| |
| |
| if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| tlas_id++; |
| } else { |
| blas_id++; |
| } |
| } |
| |
| #define STRINGIFY_HELPER(x) #x |
| #define STRINGIFY(x) STRINGIFY_HELPER(x) |
| |
| #define ENCODE_SPV_PATH STRINGIFY(bvh/genX(encode).spv.h) |
| #define HEADER_SPV_PATH STRINGIFY(bvh/genX(header).spv.h) |
| #define COPY_SPV_PATH STRINGIFY(bvh/genX(copy).spv.h) |
| |
| static const uint32_t encode_spv[] = { |
| #include ENCODE_SPV_PATH |
| }; |
| |
| static const uint32_t header_spv[] = { |
| #include HEADER_SPV_PATH |
| }; |
| |
| static const uint32_t copy_spv[] = { |
| #include COPY_SPV_PATH |
| }; |
| |
| static VkResult |
| get_pipeline_spv(struct anv_device *device, |
| const char *name, const uint32_t *spv, uint32_t spv_size, |
| unsigned push_constant_size, VkPipeline *pipeline, |
| VkPipelineLayout *layout) |
| { |
| |
| size_t key_size = strlen(name); |
| |
| const VkPushConstantRange pc_range = { |
| .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| .offset = 0, |
| .size = push_constant_size, |
| }; |
| |
| VkResult result = vk_meta_get_pipeline_layout(&device->vk, |
| &device->meta_device, NULL, |
| &pc_range, name, key_size, |
| layout); |
| |
| if (result != VK_SUCCESS) |
| return result; |
| |
| VkPipeline pipeline_from_cache = |
| vk_meta_lookup_pipeline(&device->meta_device, name, key_size); |
| if (pipeline_from_cache != VK_NULL_HANDLE) { |
| *pipeline = pipeline_from_cache; |
| return VK_SUCCESS; |
| } |
| |
| VkShaderModuleCreateInfo module_info = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .pNext = NULL, |
| .flags = 0, |
| .codeSize = spv_size, |
| .pCode = spv, |
| }; |
| |
| VkPipelineShaderStageCreateInfo shader_stage = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .pNext = &module_info, |
| .flags = 0, |
| .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| .pName = "main", |
| .pSpecializationInfo = NULL, |
| }; |
| |
| VkComputePipelineCreateInfo pipeline_info = { |
| .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| .flags = 0, |
| .stage = shader_stage, |
| .layout = *layout, |
| }; |
| |
| return vk_meta_create_compute_pipeline(&device->vk, &device->meta_device, |
| &pipeline_info, name, key_size, pipeline); |
| } |
| |
| static void |
| get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count, |
| struct bvh_layout *layout) |
| { |
| uint32_t internal_count = MAX2(leaf_count, 2) - 1; |
| |
| uint64_t offset = ANV_RT_BVH_HEADER_SIZE; |
| |
| /* For a TLAS, we store the address of anv_instance_leaf after header |
| * This is for quick access in the copy.comp |
| */ |
| if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) { |
| offset += leaf_count * sizeof(uint64_t); |
| } |
| /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */ |
| offset = ALIGN(offset, 64); |
| |
| /* This is where internal_nodes/leaves start to be encoded */ |
| layout->bvh_offset = offset; |
| |
| offset += internal_count * ANV_RT_INTERNAL_NODE_SIZE; |
| |
| switch (geometry_type) { |
| case VK_GEOMETRY_TYPE_TRIANGLES_KHR: |
| /* Currently we encode one triangle within one quad leaf */ |
| offset += leaf_count * ANV_RT_QUAD_LEAF_SIZE; |
| break; |
| case VK_GEOMETRY_TYPE_AABBS_KHR: |
| offset += leaf_count * ANV_RT_PROCEDURAL_LEAF_SIZE; |
| break; |
| case VK_GEOMETRY_TYPE_INSTANCES_KHR: |
| offset += leaf_count * ANV_RT_INSTANCE_LEAF_SIZE; |
| break; |
| default: |
| unreachable("Unknown VkGeometryTypeKHR"); |
| } |
| |
| layout->size = offset; |
| } |
| |
| static VkDeviceSize |
| anv_get_as_size(VkDevice device, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo, |
| uint32_t leaf_count) |
| { |
| struct bvh_layout layout; |
| get_bvh_layout(vk_get_as_geometry_type(pBuildInfo), leaf_count, &layout); |
| return layout.size; |
| } |
| |
| static uint32_t |
| anv_get_encode_key(struct vk_device *device, VkAccelerationStructureTypeKHR type, |
| VkBuildAccelerationStructureFlagBitsKHR flags) |
| { |
| return 0; |
| } |
| |
| static VkResult |
| anv_encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key) |
| { |
| VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| struct anv_device *device = cmd_buffer->device; |
| |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| VkResult result = get_pipeline_spv(device, "encode", encode_spv, |
| sizeof(encode_spv), |
| sizeof(struct encode_args), &pipeline, |
| &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, |
| pipeline); |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| anv_encode_as(VkCommandBuffer commandBuffer, |
| const VkAccelerationStructureBuildGeometryInfoKHR *build_info, |
| const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos, |
| VkDeviceAddress intermediate_as_addr, |
| VkDeviceAddress intermediate_header_addr, uint32_t leaf_count, |
| uint32_t key, |
| struct vk_acceleration_structure *dst) |
| { |
| if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD)) |
| return; |
| |
| VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| struct anv_device *device = cmd_buffer->device; |
| |
| VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info); |
| |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv), |
| sizeof(struct encode_args), &pipeline, &layout); |
| |
| STATIC_ASSERT(sizeof(struct anv_accel_struct_header) == ANV_RT_BVH_HEADER_SIZE); |
| STATIC_ASSERT(sizeof(struct anv_instance_leaf) == ANV_RT_INSTANCE_LEAF_SIZE); |
| STATIC_ASSERT(sizeof(struct anv_quad_leaf_node) == ANV_RT_QUAD_LEAF_SIZE); |
| STATIC_ASSERT(sizeof(struct anv_procedural_leaf_node) == ANV_RT_PROCEDURAL_LEAF_SIZE); |
| STATIC_ASSERT(sizeof(struct anv_internal_node) == ANV_RT_INTERNAL_NODE_SIZE); |
| |
| struct bvh_layout bvh_layout; |
| get_bvh_layout(geometry_type, leaf_count, &bvh_layout); |
| |
| const struct encode_args args = { |
| .intermediate_bvh = intermediate_as_addr, |
| .output_bvh = vk_acceleration_structure_get_va(dst) + |
| bvh_layout.bvh_offset, |
| .header = intermediate_header_addr, |
| .output_bvh_offset = bvh_layout.bvh_offset, |
| .leaf_node_count = leaf_count, |
| .geometry_type = geometry_type, |
| }; |
| |
| VkPushConstantsInfoKHR push_info = { |
| .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR, |
| .layout = layout, |
| .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| .offset = 0, |
| .size = sizeof(args), |
| .pValues = &args, |
| }; |
| |
| anv_CmdPushConstants2KHR(commandBuffer, &push_info); |
| |
| struct anv_address indirect_addr = |
| anv_address_from_u64(intermediate_header_addr + |
| offsetof(struct vk_ir_header, ir_internal_node_count)); |
| anv_genX(cmd_buffer->device->info, cmd_buffer_dispatch_indirect) |
| (cmd_buffer, indirect_addr, true /* is_unaligned_size_x */); |
| } |
| |
| static uint32_t |
| anv_get_header_key(struct vk_device *device, VkAccelerationStructureTypeKHR type, |
| VkBuildAccelerationStructureFlagBitsKHR flags) |
| { |
| return (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ? |
| 1 : 0; |
| } |
| |
| static VkResult |
| anv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key) |
| { |
| VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| if (key == 1) { |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| VkResult result = get_pipeline_spv(cmd_buffer->device, "header", |
| header_spv, sizeof(header_spv), |
| sizeof(struct header_args), &pipeline, |
| &layout); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, |
| pipeline); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| anv_init_header(VkCommandBuffer commandBuffer, |
| const VkAccelerationStructureBuildGeometryInfoKHR *build_info, |
| const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos, |
| VkDeviceAddress intermediate_as_addr, |
| VkDeviceAddress intermediate_header_addr, uint32_t leaf_count, |
| uint32_t key, |
| struct vk_acceleration_structure *dst) |
| { |
| VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| struct anv_device *device = cmd_buffer->device; |
| |
| VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info); |
| |
| struct bvh_layout bvh_layout; |
| get_bvh_layout(geometry_type, leaf_count, &bvh_layout); |
| |
| VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst); |
| |
| UNUSED size_t base = offsetof(struct anv_accel_struct_header, |
| copy_dispatch_size); |
| |
| uint32_t instance_count = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ? |
| leaf_count : 0; |
| |
| if (key == 1) { |
| /* Add a barrier to ensure the writes from encode.comp is ready to be |
| * read by header.comp |
| */ |
| vk_barrier_compute_w_to_compute_r(commandBuffer); |
| |
| /* VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR is set, so we |
| * want to populate header.compacted_size with the compacted size, which |
| * needs to be calculated by using ir_header.dst_node_offset, which we'll |
| * access in the header.comp. |
| */ |
| base = offsetof(struct anv_accel_struct_header, instance_count); |
| |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| get_pipeline_spv(device, "header", header_spv, sizeof(header_spv), |
| sizeof(struct header_args), &pipeline, &layout); |
| |
| struct header_args args = { |
| .src = intermediate_header_addr, |
| .dst = vk_acceleration_structure_get_va(dst), |
| .bvh_offset = bvh_layout.bvh_offset, |
| .instance_count = instance_count, |
| }; |
| |
| VkPushConstantsInfoKHR push_info = { |
| .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR, |
| .layout = layout, |
| .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| .offset = 0, |
| .size = sizeof(args), |
| .pValues = &args, |
| }; |
| |
| anv_CmdPushConstants2KHR(commandBuffer, &push_info); |
| vk_common_CmdDispatch(commandBuffer, 1, 1, 1); |
| } else { |
| vk_barrier_compute_w_to_host_r(commandBuffer); |
| |
| struct anv_accel_struct_header header = {}; |
| |
| header.instance_count = instance_count; |
| header.self_ptr = header_addr; |
| header.compacted_size = bvh_layout.size; |
| |
| /* 128 is local_size_x in copy.comp shader, 8 is the amount of data |
| * copied by each iteration of that shader's loop |
| */ |
| header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, |
| 8 * 128); |
| header.copy_dispatch_size[1] = 1; |
| header.copy_dispatch_size[2] = 1; |
| |
| header.serialization_size = |
| header.compacted_size + |
| sizeof(struct vk_accel_struct_serialization_header) + |
| sizeof(uint64_t) * header.instance_count; |
| |
| header.size = header.compacted_size; |
| |
| #if GFX_VERx10 >= 300 |
| header.enable_64b_rt = 1; |
| #else |
| header.enable_64b_rt = 0; |
| #endif |
| |
| size_t header_size = sizeof(struct anv_accel_struct_header) - base; |
| assert(base % sizeof(uint32_t) == 0); |
| assert(header_size % sizeof(uint32_t) == 0); |
| uint32_t *header_ptr = (uint32_t *)((char *)&header + base); |
| |
| struct anv_address addr = anv_address_from_u64(header_addr + base); |
| anv_cmd_buffer_update_addr(cmd_buffer, addr, header_size, header_ptr); |
| } |
| |
| if (INTEL_DEBUG(DEBUG_BVH_ANY)) { |
| genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, |
| cmd_buffer->state.current_pipeline, |
| ANV_PIPE_END_OF_PIPE_SYNC_BIT | |
| ANV_PIPE_DATA_CACHE_FLUSH_BIT | |
| ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | |
| ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT); |
| debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, |
| intermediate_header_addr, intermediate_as_addr, |
| leaf_count, geometry_type); |
| } |
| } |
| |
| static const struct vk_acceleration_structure_build_ops anv_build_ops = { |
| .begin_debug_marker = begin_debug_marker, |
| .end_debug_marker = end_debug_marker, |
| .get_as_size = anv_get_as_size, |
| .get_encode_key = { anv_get_encode_key, anv_get_header_key }, |
| .encode_bind_pipeline = { anv_encode_bind_pipeline, |
| anv_init_header_bind_pipeline }, |
| .encode_as = { anv_encode_as, anv_init_header }, |
| }; |
| |
| static VkResult |
| anv_device_init_accel_struct_build_state(struct anv_device *device) |
| { |
| VkResult result = VK_SUCCESS; |
| simple_mtx_lock(&device->accel_struct_build.mutex); |
| |
| if (device->accel_struct_build.radix_sort) |
| goto exit; |
| |
| const struct radix_sort_vk_target_config radix_sort_config = { |
| .keyval_dwords = 2, |
| .init = { .workgroup_size_log2 = 8, }, |
| .fill = { .workgroup_size_log2 = 8, .block_rows = 8 }, |
| .histogram = { |
| .workgroup_size_log2 = 8, |
| .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3, |
| .block_rows = 14, |
| }, |
| .prefix = { |
| .workgroup_size_log2 = 8, |
| .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3, |
| }, |
| .scatter = { |
| .workgroup_size_log2 = 8, |
| .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3, |
| .block_rows = 14, |
| }, |
| }; |
| |
| device->accel_struct_build.radix_sort = |
| vk_create_radix_sort_u64(anv_device_to_handle(device), |
| &device->vk.alloc, |
| VK_NULL_HANDLE, radix_sort_config); |
| |
| device->vk.as_build_ops = &anv_build_ops; |
| device->vk.write_buffer_cp = anv_cmd_write_buffer_cp; |
| device->vk.flush_buffer_write_cp = anv_cmd_flush_buffer_write_cp; |
| device->vk.cmd_dispatch_unaligned = anv_cmd_dispatch_unaligned; |
| device->vk.cmd_fill_buffer_addr = anv_cmd_fill_buffer_addr; |
| |
| device->accel_struct_build.build_args = |
| (struct vk_acceleration_structure_build_args) { |
| .emit_markers = u_trace_enabled(&device->ds.trace_context), |
| .subgroup_size = device->info->ver >= 20 ? 16 : 8, |
| .radix_sort = device->accel_struct_build.radix_sort, |
| /* See struct anv_accel_struct_header from anv_bvh.h |
| * |
| * Root pointer starts at offset 0 and bound box start at offset 8. |
| */ |
| .bvh_bounds_offset = 8, |
| }; |
| |
| exit: |
| simple_mtx_unlock(&device->accel_struct_build.mutex); |
| return result; |
| } |
| |
| void |
| genX(GetAccelerationStructureBuildSizesKHR)( |
| VkDevice _device, |
| VkAccelerationStructureBuildTypeKHR buildType, |
| const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo, |
| const uint32_t* pMaxPrimitiveCounts, |
| VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| if (anv_device_init_accel_struct_build_state(device) != VK_SUCCESS) |
| return; |
| |
| vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts, |
| pSizeInfo, &device->accel_struct_build.build_args); |
| } |
| |
| void |
| genX(GetDeviceAccelerationStructureCompatibilityKHR)( |
| VkDevice _device, |
| const VkAccelerationStructureVersionInfoKHR* pVersionInfo, |
| VkAccelerationStructureCompatibilityKHR* pCompatibility) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| struct vk_accel_struct_serialization_header* ser_header = |
| (struct vk_accel_struct_serialization_header*)(pVersionInfo->pVersionData); |
| |
| if (memcmp(ser_header->accel_struct_compat, |
| device->physical->rt_uuid, |
| sizeof(device->physical->rt_uuid)) == 0) { |
| *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR; |
| } else { |
| *pCompatibility = |
| VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR; |
| } |
| } |
| |
| void |
| genX(CmdBuildAccelerationStructuresKHR)( |
| VkCommandBuffer commandBuffer, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, |
| const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| struct anv_device *device = cmd_buffer->device; |
| |
| VkResult result = anv_device_init_accel_struct_build_state(device); |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, result); |
| return; |
| } |
| |
| struct anv_cmd_saved_state saved; |
| anv_cmd_buffer_save_state(cmd_buffer, |
| ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | |
| ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL | |
| ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved); |
| |
| vk_cmd_build_acceleration_structures(commandBuffer, &device->vk, |
| &device->meta_device, infoCount, |
| pInfos, ppBuildRangeInfos, |
| &device->accel_struct_build.build_args); |
| |
| anv_cmd_buffer_restore_state(cmd_buffer, &saved); |
| } |
| |
| void |
| genX(CmdCopyAccelerationStructureKHR)( |
| VkCommandBuffer commandBuffer, |
| const VkCopyAccelerationStructureInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src); |
| VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst); |
| |
| trace_intel_begin_as_copy(&cmd_buffer->trace); |
| |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv, |
| sizeof(copy_spv), sizeof(struct copy_args), |
| &pipeline, &layout); |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, result); |
| return; |
| } |
| |
| struct anv_cmd_saved_state saved; |
| anv_cmd_buffer_save_state(cmd_buffer, |
| ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | |
| ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL | |
| ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved); |
| |
| anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, |
| pipeline); |
| |
| struct copy_args consts = { |
| .src_addr = vk_acceleration_structure_get_va(src), |
| .dst_addr = vk_acceleration_structure_get_va(dst), |
| .mode = ANV_COPY_MODE_COPY, |
| }; |
| |
| VkPushConstantsInfoKHR push_info = { |
| .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR, |
| .layout = layout, |
| .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| .offset = 0, |
| .size = sizeof(consts), |
| .pValues = &consts, |
| }; |
| |
| anv_CmdPushConstants2KHR(commandBuffer, &push_info); |
| |
| /* L1/L2 caches flushes should have been dealt with by pipeline barriers. |
| * Unfortunately some platforms require L3 flush because CS (reading the |
| * dispatch paramters) is not L3 coherent. |
| */ |
| if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) { |
| anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_DATA_CACHE_FLUSH_BIT, |
| "bvh size read for dispatch"); |
| } |
| |
| anv_genX(cmd_buffer->device->info, CmdDispatchIndirect)( |
| commandBuffer, vk_buffer_to_handle(src->buffer), |
| src->offset + offsetof(struct anv_accel_struct_header, |
| copy_dispatch_size)); |
| |
| anv_cmd_buffer_restore_state(cmd_buffer, &saved); |
| |
| trace_intel_end_as_copy(&cmd_buffer->trace); |
| } |
| |
| void |
| genX(CmdCopyAccelerationStructureToMemoryKHR)( |
| VkCommandBuffer commandBuffer, |
| const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src); |
| struct anv_device *device = cmd_buffer->device; |
| |
| trace_intel_begin_as_copy(&cmd_buffer->trace); |
| |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| VkResult result = get_pipeline_spv(device, "copy", copy_spv, |
| sizeof(copy_spv), |
| sizeof(struct copy_args), &pipeline, |
| &layout); |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, result); |
| return; |
| } |
| |
| struct anv_cmd_saved_state saved; |
| anv_cmd_buffer_save_state(cmd_buffer, |
| ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | |
| ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL | |
| ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved); |
| |
| anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, |
| pipeline); |
| |
| struct copy_args consts = { |
| .src_addr = vk_acceleration_structure_get_va(src), |
| .dst_addr = pInfo->dst.deviceAddress, |
| .mode = ANV_COPY_MODE_SERIALIZE, |
| }; |
| |
| memcpy(consts.driver_uuid, device->physical->driver_uuid, VK_UUID_SIZE); |
| memcpy(consts.accel_struct_compat, device->physical->rt_uuid, VK_UUID_SIZE); |
| |
| VkPushConstantsInfoKHR push_info = { |
| .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR, |
| .layout = layout, |
| .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| .offset = 0, |
| .size = sizeof(consts), |
| .pValues = &consts, |
| }; |
| |
| anv_CmdPushConstants2KHR(commandBuffer, &push_info); |
| |
| /* L1/L2 caches flushes should have been dealt with by pipeline barriers. |
| * Unfortunately some platforms require L3 flush because CS (reading the |
| * dispatch paramters) is not L3 coherent. |
| */ |
| if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) { |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_PIPE_DATA_CACHE_FLUSH_BIT, |
| "bvh size read for dispatch"); |
| } |
| |
| anv_genX(device->info, CmdDispatchIndirect)( |
| commandBuffer, vk_buffer_to_handle(src->buffer), |
| src->offset + offsetof(struct anv_accel_struct_header, |
| copy_dispatch_size)); |
| |
| anv_cmd_buffer_restore_state(cmd_buffer, &saved); |
| |
| trace_intel_end_as_copy(&cmd_buffer->trace); |
| } |
| |
| void |
| genX(CmdCopyMemoryToAccelerationStructureKHR)( |
| VkCommandBuffer commandBuffer, |
| const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst); |
| |
| trace_intel_begin_as_copy(&cmd_buffer->trace); |
| |
| VkPipeline pipeline; |
| VkPipelineLayout layout; |
| VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv, |
| sizeof(copy_spv), |
| sizeof(struct copy_args), &pipeline, |
| &layout); |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, result); |
| return; |
| } |
| |
| struct anv_cmd_saved_state saved; |
| anv_cmd_buffer_save_state(cmd_buffer, |
| ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | |
| ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL | |
| ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved); |
| |
| anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, |
| pipeline); |
| |
| const struct copy_args consts = { |
| .src_addr = pInfo->src.deviceAddress, |
| .dst_addr = vk_acceleration_structure_get_va(dst), |
| .mode = ANV_COPY_MODE_DESERIALIZE, |
| }; |
| |
| VkPushConstantsInfoKHR push_info = { |
| .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR, |
| .layout = layout, |
| .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| .offset = 0, |
| .size = sizeof(consts), |
| .pValues = &consts, |
| }; |
| |
| anv_CmdPushConstants2KHR(commandBuffer, &push_info); |
| |
| vk_common_CmdDispatch(commandBuffer, 512, 1, 1); |
| anv_cmd_buffer_restore_state(cmd_buffer, &saved); |
| |
| trace_intel_end_as_copy(&cmd_buffer->trace); |
| } |
| |
| void |
| genX(DestroyAccelerationStructureKHR)( |
| VkDevice _device, |
| VkAccelerationStructureKHR accelerationStructure, |
| const VkAllocationCallbacks* pAllocator) |
| { |
| vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure, |
| pAllocator); |
| } |
| #endif |