| /* |
| * Copyright © 2020 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "anv_private.h" |
| |
| #include <math.h> |
| |
| #include "util/u_debug.h" |
| #include "util/half_float.h" |
| #include "util/u_atomic.h" |
| |
| #include "genxml/gen_macros.h" |
| #include "genxml/genX_pack.h" |
| #include "genxml/genX_rt_pack.h" |
| |
| #include "ds/intel_tracepoints.h" |
| |
| #if GFX_VERx10 >= 125 |
| #include "grl/grl_structs.h" |
| |
| /* Wait for the previous dispatches to finish and flush their data port |
| * writes. |
| */ |
| #define ANV_GRL_FLUSH_FLAGS (ANV_PIPE_END_OF_PIPE_SYNC_BIT | \ |
| ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) |
| |
| static const VkAccelerationStructureGeometryKHR * |
| get_geometry(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo, |
| uint32_t index) |
| { |
| return pInfo->pGeometries ? &pInfo->pGeometries[index] : |
| pInfo->ppGeometries[index]; |
| } |
| |
| static size_t align_transient_size(size_t bytes) |
| { |
| return align_uintptr(bytes, 64); |
| } |
| |
| static size_t align_private_size(size_t bytes) |
| { |
| return align_uintptr(bytes, 64); |
| } |
| |
| static size_t get_scheduler_size(size_t num_builds) |
| { |
| size_t scheduler_size = sizeof(union SchedulerUnion); |
| /* add more memory for qnode creation stage if needed */ |
| if (num_builds > QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) { |
| scheduler_size += (num_builds - QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) * 2 * |
| sizeof(struct QNodeGlobalRootBufferEntry); |
| } |
| |
| return align_private_size(scheduler_size); |
| } |
| |
| static size_t |
| get_batched_binnedsah_transient_mem_size(size_t num_builds) |
| { |
| if (num_builds == 0) |
| return 0; |
| return num_builds * (sizeof(struct SAHBuildBuffersInfo) + sizeof(gpuva_t)); |
| } |
| |
| static size_t |
| get_batched_binnedsah_private_mem_size(size_t num_builds) |
| { |
| if (num_builds == 0) |
| return 0; |
| |
| size_t globals_size = align_private_size(num_builds * sizeof(struct SAHBuildGlobals)); |
| return globals_size + get_scheduler_size(num_builds); |
| } |
| |
| static uint32_t |
| estimate_qbvh6_nodes(const uint32_t N) |
| { |
| const uint32_t W = 6; |
| const uint32_t N0 = N / 2 + N % 2; // lowest level with 2 leaves per QBVH6 node |
| const uint32_t N1 = N0 / W + (N0 % W ? 1 : 0); // filled level |
| const uint32_t N2 = N0 / W + (N1 % W ? 1 : 0); // filled level |
| const uint32_t N3 = N0 / W + (N2 % W ? 1 : 0); // filled level |
| const uint32_t N4 = N3; // overestimate remaining nodes |
| return N0 + N1 + N2 + N3 + N4; |
| } |
| |
| /* Estimates the worst case number of QBVH6 nodes for a top-down BVH |
| * build that guarantees to produce subtree with N >= K primitives |
| * from which a single QBVH6 node is created. |
| */ |
| static uint32_t |
| estimate_qbvh6_nodes_minK(const uint32_t N, uint32_t K) |
| { |
| const uint32_t N0 = N / K + (N % K ? 1 : 0); // lowest level of nodes with K leaves minimally |
| return N0 + estimate_qbvh6_nodes(N0); |
| } |
| |
| static size_t |
| estimate_qbvh6_fatleafs(const size_t P) |
| { |
| return P; |
| } |
| |
| static size_t |
| estimate_qbvh6_nodes_worstcase(const size_t P) |
| { |
| const size_t F = estimate_qbvh6_fatleafs(P); |
| |
| // worst-case each inner node having 5 fat-leaf children. |
| // number of inner nodes is F/5 and number of fat-leaves is F |
| return F + ceil(F/5.0); |
| } |
| |
| #define sizeof_PrimRef 32 |
| #define sizeof_HwInstanceLeaf (GENX(RT_BVH_INSTANCE_LEAF_length) * 4) |
| #define sizeof_InternalNode (GENX(RT_BVH_INTERNAL_NODE_length) * 4) |
| #define sizeof_Procedural (GENX(RT_BVH_PROCEDURAL_LEAF_length) * 4) |
| #define sizeof_Quad (GENX(RT_BVH_QUAD_LEAF_length) * 4) |
| |
| static struct MKSizeEstimate |
| get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo, |
| const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos, |
| const uint32_t *pMaxPrimitiveCounts) |
| { |
| uint32_t num_triangles = 0, num_aabbs = 0, num_instances = 0; |
| for (unsigned g = 0; g < pInfo->geometryCount; g++) { |
| const VkAccelerationStructureGeometryKHR *pGeometry = |
| get_geometry(pInfo, g); |
| uint32_t prim_count = pBuildRangeInfos != NULL ? |
| pBuildRangeInfos[g].primitiveCount : pMaxPrimitiveCounts[g]; |
| |
| switch (pGeometry->geometryType) { |
| case VK_GEOMETRY_TYPE_TRIANGLES_KHR: |
| num_triangles += prim_count; |
| break; |
| case VK_GEOMETRY_TYPE_AABBS_KHR: |
| num_aabbs += prim_count; |
| break; |
| case VK_GEOMETRY_TYPE_INSTANCES_KHR: |
| num_instances += prim_count; |
| break; |
| default: |
| unreachable("Unsupported geometry type"); |
| } |
| } |
| const uint32_t num_primitives = num_triangles + num_aabbs + num_instances; |
| |
| struct MKSizeEstimate est = {}; |
| |
| uint64_t size = sizeof(BVHBase); |
| size = align64(size, 64); |
| |
| /* Must immediately follow BVHBase because we use fixed offset to nodes. */ |
| est.node_data_start = size; |
| |
| switch (pInfo->type) { |
| case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: { |
| assert(num_triangles == 0 && num_aabbs == 0); |
| |
| est.numPrimitives = num_instances; |
| est.numPrimitivesToSplit = 0; |
| est.numBuildPrimitives = est.numPrimitives + est.numPrimitivesToSplit; |
| |
| est.min_primitives = est.numPrimitives; |
| est.max_primitives = est.numPrimitives + est.numPrimitivesToSplit; |
| |
| unsigned int sizeInnerNodes = |
| (unsigned int) estimate_qbvh6_nodes_worstcase(est.numBuildPrimitives) * |
| sizeof_InternalNode; |
| if (sizeInnerNodes == 0) |
| sizeInnerNodes = sizeof_InternalNode; |
| |
| est.max_inner_nodes = sizeInnerNodes / sizeof_InternalNode; |
| |
| size += sizeInnerNodes; |
| STATIC_ASSERT(sizeof_InternalNode % 64 == 0); |
| |
| est.leaf_data_start = size; |
| size += est.numBuildPrimitives * sizeof_HwInstanceLeaf; |
| STATIC_ASSERT(sizeof_HwInstanceLeaf % 64 == 0); |
| |
| est.leaf_data_size = est.numBuildPrimitives * sizeof_HwInstanceLeaf; |
| |
| break; |
| } |
| |
| case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: { |
| assert(num_instances == 0); |
| |
| /* RT: TODO */ |
| const float split_factor = 0.0f; |
| uint32_t num_prims_to_split = 0; |
| if (false) |
| num_prims_to_split = num_triangles + (double)split_factor; |
| |
| const uint32_t num_build_triangles = num_triangles + num_prims_to_split; |
| const uint32_t num_build_primitives = num_build_triangles + num_aabbs; |
| |
| est.numPrimitives = num_primitives; |
| est.numTriangles = num_triangles; |
| est.numProcedurals = num_aabbs; |
| est.numMeshes = pInfo->geometryCount; |
| est.numBuildPrimitives = num_build_primitives; |
| est.numPrimitivesToSplit = num_prims_to_split; |
| est.max_instance_leafs = 0; |
| |
| est.min_primitives = (size_t)(num_build_triangles * 0.5f + num_aabbs); |
| est.max_primitives = num_build_triangles + num_aabbs; |
| |
| size_t nodeBytes = 0; |
| nodeBytes += estimate_qbvh6_nodes_worstcase(num_build_triangles) * sizeof_InternalNode; |
| nodeBytes += estimate_qbvh6_nodes_worstcase(num_aabbs) * sizeof_InternalNode; |
| if (nodeBytes == 0) // for case with 0 primitives |
| nodeBytes = sizeof_InternalNode; |
| nodeBytes = MAX2(nodeBytes, 8 * (size_t)num_build_primitives); // for primref_index0/1 buffers |
| |
| est.max_inner_nodes = nodeBytes / sizeof_InternalNode; |
| |
| size += nodeBytes; |
| STATIC_ASSERT(sizeof_InternalNode % 64 == 0); |
| |
| est.leaf_data_start = size; |
| size += num_build_triangles * sizeof_Quad; |
| STATIC_ASSERT(sizeof_Quad % 64 == 0); |
| |
| est.procedural_data_start = size; |
| size += num_aabbs * sizeof_Procedural; |
| STATIC_ASSERT(sizeof_Procedural % 64 == 0); |
| |
| est.leaf_data_size = num_build_triangles * sizeof_Quad + |
| num_aabbs * sizeof_Procedural; |
| |
| if (num_build_primitives == 0) |
| size += MAX2(sizeof_Quad, sizeof_Procedural); |
| break; |
| } |
| |
| default: |
| unreachable("Unsupported acceleration structure type"); |
| } |
| |
| size = align64(size, 64); |
| est.instance_descs_start = size; |
| size += sizeof(struct InstanceDesc) * num_instances; |
| |
| est.geo_meta_data_start = size; |
| size += sizeof(struct GeoMetaData) * pInfo->geometryCount; |
| size = align64(size, 64); |
| |
| assert(size == align64(size, 64)); |
| est.back_pointer_start = size; |
| |
| const bool alloc_backpointers = false; /* RT TODO */ |
| if (alloc_backpointers) { |
| size += est.max_inner_nodes * sizeof(uint32_t); |
| size = align64(size, 64); |
| } |
| |
| assert(size < UINT32_MAX); |
| est.sizeTotal = align64(size, 64); |
| |
| return est; |
| } |
| |
| struct scratch_layout { |
| gpuva_t base; |
| uint32_t total_size; |
| |
| gpuva_t primrefs; |
| gpuva_t globals; |
| gpuva_t leaf_index_buffers; |
| uint32_t leaf_index_buffer_stride; |
| |
| /* new_sah */ |
| gpuva_t qnode_buffer; |
| gpuva_t bvh2_buffer; |
| }; |
| |
| static size_t |
| get_bvh2_size(uint32_t num_primitivies) |
| { |
| if (num_primitivies == 0) |
| return 0; |
| return sizeof(struct BVH2) + |
| (2 * num_primitivies - 1) * sizeof(struct BVH2Node); |
| } |
| |
| static struct scratch_layout |
| get_gpu_scratch_layout(struct anv_address base, |
| struct MKSizeEstimate est, |
| enum anv_rt_bvh_build_method build_method) |
| { |
| struct scratch_layout scratch = { |
| .base = anv_address_physical(base), |
| }; |
| gpuva_t current = anv_address_physical(base); |
| |
| scratch.globals = current; |
| current += sizeof(struct Globals); |
| |
| scratch.primrefs = intel_canonical_address(current); |
| current += est.numBuildPrimitives * sizeof_PrimRef; |
| |
| scratch.leaf_index_buffers = intel_canonical_address(current); |
| current += est.numBuildPrimitives * sizeof(uint32_t) * 2; |
| scratch.leaf_index_buffer_stride = sizeof(uint32_t); |
| |
| switch (build_method) { |
| case ANV_BVH_BUILD_METHOD_TRIVIAL: |
| break; |
| |
| case ANV_BVH_BUILD_METHOD_NEW_SAH: { |
| size_t bvh2_size = get_bvh2_size(est.numBuildPrimitives); |
| if (est.leaf_data_size < bvh2_size) { |
| scratch.bvh2_buffer = intel_canonical_address(current); |
| current += bvh2_size; |
| } |
| |
| scratch.qnode_buffer = intel_canonical_address(current); |
| current += 2 * sizeof(dword) * est.max_inner_nodes; |
| break; |
| } |
| |
| default: |
| unreachable("invalid build"); |
| } |
| |
| assert((current - scratch.base) < UINT32_MAX); |
| scratch.total_size = current - scratch.base; |
| |
| return scratch; |
| } |
| |
| static void |
| anv_get_gpu_acceleration_structure_size( |
| UNUSED struct anv_device *device, |
| VkAccelerationStructureBuildTypeKHR buildType, |
| const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo, |
| const uint32_t* pMaxPrimitiveCounts, |
| VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo) |
| { |
| |
| struct MKSizeEstimate est = get_gpu_size_estimate(pBuildInfo, NULL, |
| pMaxPrimitiveCounts); |
| struct scratch_layout scratch = get_gpu_scratch_layout(ANV_NULL_ADDRESS, est, |
| device->bvh_build_method); |
| |
| pSizeInfo->accelerationStructureSize = est.sizeTotal; |
| pSizeInfo->buildScratchSize = scratch.total_size; |
| pSizeInfo->updateScratchSize = scratch.total_size; /* TODO */ |
| } |
| |
| void |
| genX(GetAccelerationStructureBuildSizesKHR)( |
| VkDevice _device, |
| VkAccelerationStructureBuildTypeKHR buildType, |
| const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo, |
| const uint32_t* pMaxPrimitiveCounts, |
| VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| assert(pSizeInfo->sType == |
| VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR); |
| |
| VkAccelerationStructureBuildSizesInfoKHR gpu_size_info; |
| anv_get_gpu_acceleration_structure_size(device, buildType, pBuildInfo, |
| pMaxPrimitiveCounts, |
| &gpu_size_info); |
| |
| pSizeInfo->accelerationStructureSize = |
| gpu_size_info.accelerationStructureSize; |
| pSizeInfo->buildScratchSize = gpu_size_info.buildScratchSize; |
| pSizeInfo->updateScratchSize = gpu_size_info.updateScratchSize; |
| } |
| |
| void |
| genX(GetDeviceAccelerationStructureCompatibilityKHR)( |
| VkDevice _device, |
| const VkAccelerationStructureVersionInfoKHR* pVersionInfo, |
| VkAccelerationStructureCompatibilityKHR* pCompatibility) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| |
| if (memcmp(pVersionInfo->pVersionData, |
| device->physical->rt_uuid, |
| sizeof(device->physical->rt_uuid)) == 0) { |
| *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR; |
| } else { |
| *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR; |
| } |
| } |
| |
| static inline uint8_t |
| vk_to_grl_GeometryFlags(VkGeometryFlagsKHR flags) |
| { |
| uint8_t grl_flags = GEOMETRY_FLAG_NONE; |
| unsigned mask = flags; |
| while (mask) { |
| int i = u_bit_scan(&mask); |
| switch ((VkGeometryFlagBitsKHR)(1u << i)) { |
| case VK_GEOMETRY_OPAQUE_BIT_KHR: |
| grl_flags |= GEOMETRY_FLAG_OPAQUE; |
| break; |
| case VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR: |
| grl_flags |= GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION; |
| break; |
| default: |
| unreachable("Unsupported acceleration structure build flag"); |
| } |
| } |
| return grl_flags; |
| } |
| |
| static inline IndexFormat |
| vk_to_grl_IndexFormat(VkIndexType type) |
| { |
| switch (type) { |
| case VK_INDEX_TYPE_NONE_KHR: return INDEX_FORMAT_NONE; |
| case VK_INDEX_TYPE_UINT8_KHR: unreachable("No UINT8 support yet"); |
| case VK_INDEX_TYPE_UINT16: return INDEX_FORMAT_R16_UINT; |
| case VK_INDEX_TYPE_UINT32: return INDEX_FORMAT_R32_UINT; |
| default: |
| unreachable("Unsupported index type"); |
| } |
| } |
| |
| static inline VertexFormat |
| vk_to_grl_VertexFormat(VkFormat format) |
| { |
| switch (format) { |
| case VK_FORMAT_R32G32_SFLOAT: return VERTEX_FORMAT_R32G32_FLOAT; |
| case VK_FORMAT_R32G32B32_SFLOAT: return VERTEX_FORMAT_R32G32B32_FLOAT; |
| case VK_FORMAT_R16G16_SFLOAT: return VERTEX_FORMAT_R16G16_FLOAT; |
| case VK_FORMAT_R16G16B16A16_SFLOAT: return VERTEX_FORMAT_R16G16B16A16_FLOAT; |
| case VK_FORMAT_R16G16_SNORM: return VERTEX_FORMAT_R16G16_SNORM; |
| case VK_FORMAT_R16G16B16A16_SNORM: return VERTEX_FORMAT_R16G16B16A16_SNORM; |
| case VK_FORMAT_R16G16B16A16_UNORM: return VERTEX_FORMAT_R16G16B16A16_UNORM; |
| case VK_FORMAT_R16G16_UNORM: return VERTEX_FORMAT_R16G16_UNORM; |
| /* case VK_FORMAT_R10G10B10A2_UNORM: return VERTEX_FORMAT_R10G10B10A2_UNORM; */ |
| case VK_FORMAT_R8G8B8A8_UNORM: return VERTEX_FORMAT_R8G8B8A8_UNORM; |
| case VK_FORMAT_R8G8_UNORM: return VERTEX_FORMAT_R8G8_UNORM; |
| case VK_FORMAT_R8G8B8A8_SNORM: return VERTEX_FORMAT_R8G8B8A8_SNORM; |
| case VK_FORMAT_R8G8_SNORM: return VERTEX_FORMAT_R8G8_SNORM; |
| default: |
| unreachable("Unsupported vertex format"); |
| } |
| } |
| |
| static struct Geo |
| vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry, |
| uint32_t prim_count, |
| uint32_t transform_offset, |
| uint32_t primitive_offset, |
| uint32_t first_vertex) |
| { |
| struct Geo geo = { |
| .Flags = vk_to_grl_GeometryFlags(pGeometry->flags), |
| }; |
| |
| switch (pGeometry->geometryType) { |
| case VK_GEOMETRY_TYPE_TRIANGLES_KHR: { |
| const VkAccelerationStructureGeometryTrianglesDataKHR *vk_tri = |
| &pGeometry->geometry.triangles; |
| |
| geo.Type = GEOMETRY_TYPE_TRIANGLES; |
| |
| geo.Desc.Triangles.pTransformBuffer = |
| vk_tri->transformData.deviceAddress; |
| geo.Desc.Triangles.pIndexBuffer = |
| vk_tri->indexData.deviceAddress; |
| geo.Desc.Triangles.pVertexBuffer = |
| vk_tri->vertexData.deviceAddress; |
| geo.Desc.Triangles.VertexBufferByteStride = vk_tri->vertexStride; |
| |
| if (geo.Desc.Triangles.pTransformBuffer) |
| geo.Desc.Triangles.pTransformBuffer += transform_offset; |
| |
| if (vk_tri->indexType == VK_INDEX_TYPE_NONE_KHR) { |
| geo.Desc.Triangles.IndexCount = 0; |
| geo.Desc.Triangles.VertexCount = prim_count * 3; |
| geo.Desc.Triangles.IndexFormat = INDEX_FORMAT_NONE; |
| geo.Desc.Triangles.pVertexBuffer += primitive_offset; |
| } else { |
| geo.Desc.Triangles.IndexCount = prim_count * 3; |
| geo.Desc.Triangles.VertexCount = vk_tri->maxVertex; |
| geo.Desc.Triangles.IndexFormat = |
| vk_to_grl_IndexFormat(vk_tri->indexType); |
| geo.Desc.Triangles.pIndexBuffer += primitive_offset; |
| } |
| |
| geo.Desc.Triangles.VertexFormat = |
| vk_to_grl_VertexFormat(vk_tri->vertexFormat); |
| geo.Desc.Triangles.pVertexBuffer += vk_tri->vertexStride * first_vertex; |
| break; |
| } |
| |
| case VK_GEOMETRY_TYPE_AABBS_KHR: { |
| const VkAccelerationStructureGeometryAabbsDataKHR *vk_aabbs = |
| &pGeometry->geometry.aabbs; |
| geo.Type = GEOMETRY_TYPE_PROCEDURAL; |
| geo.Desc.Procedural.pAABBs_GPUVA = |
| vk_aabbs->data.deviceAddress + primitive_offset; |
| geo.Desc.Procedural.AABBByteStride = vk_aabbs->stride; |
| geo.Desc.Procedural.AABBCount = prim_count; |
| break; |
| } |
| |
| default: |
| unreachable("Invalid geometry type"); |
| } |
| |
| return geo; |
| } |
| |
| #include "grl/grl_metakernel_copy.h" |
| #include "grl/grl_metakernel_misc.h" |
| #include "grl/grl_metakernel_build_primref.h" |
| #include "grl/grl_metakernel_new_sah_builder.h" |
| #include "grl/grl_metakernel_build_leaf.h" |
| |
| struct build_state { |
| enum anv_rt_bvh_build_method build_method; |
| |
| struct MKSizeEstimate estimate; |
| struct scratch_layout scratch; |
| struct MKBuilderState state; |
| |
| struct anv_address bvh_addr; |
| |
| size_t geom_size_prefix_sum_buffer; |
| size_t transient_size; |
| |
| uint32_t leaf_type; |
| uint32_t leaf_size; |
| |
| uint32_t num_geometries; |
| uint32_t num_instances; |
| |
| uint64_t instances_addr; |
| bool array_of_instances_ptr; |
| |
| const VkAccelerationStructureGeometryKHR *vk_geoms; |
| }; |
| |
| static void |
| get_binnedsah_scratch_buffers(struct build_state *bs, |
| uint64_t *p_qnode_buffer, |
| uint64_t *p_primref_indices, |
| uint64_t *p_bvh2) |
| { |
| if (bs->estimate.numBuildPrimitives == 0) |
| { |
| *p_bvh2 = 0; |
| *p_qnode_buffer = 0; |
| *p_primref_indices = 0; |
| return; |
| } |
| |
| size_t bvh2_size = get_bvh2_size(bs->estimate.numBuildPrimitives); |
| if (bs->estimate.leaf_data_size < bvh2_size) { |
| assert(bs->scratch.bvh2_buffer != 0); |
| *p_bvh2 = bs->scratch.bvh2_buffer; |
| } else { |
| *p_bvh2 = intel_canonical_address(bs->state.bvh_buffer + |
| bs->estimate.leaf_data_start); |
| } |
| |
| assert(bs->scratch.qnode_buffer != 0); |
| *p_qnode_buffer = bs->scratch.qnode_buffer; |
| |
| assert(bs->scratch.leaf_index_buffers != 0); |
| *p_primref_indices = bs->scratch.leaf_index_buffers; |
| } |
| |
| static void |
| write_memory(struct anv_cmd_alloc alloc, size_t offset, const void *data, size_t data_len) |
| { |
| assert((offset + data_len) < alloc.size); |
| memcpy(alloc.map + offset, data, data_len); |
| } |
| |
| static void |
| cmd_build_acceleration_structures( |
| struct anv_cmd_buffer *cmd_buffer, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, |
| const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, |
| const VkDeviceAddress *pIndirectDeviceAddresses, |
| const uint32_t *pIndirectStrides, |
| const uint32_t *const *ppMaxPrimitiveCounts) |
| { |
| struct anv_device *device = cmd_buffer->device; |
| VK_MULTIALLOC(ma); |
| |
| struct build_state *builds; |
| vk_multialloc_add(&ma, &builds, struct build_state, infoCount); |
| |
| if (!vk_multialloc_zalloc(&ma, |
| &cmd_buffer->device->vk.alloc, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) { |
| anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return; |
| } |
| |
| trace_intel_begin_as_build(&cmd_buffer->trace); |
| |
| /* TODO: Indirect */ |
| assert(ppBuildRangeInfos != NULL); |
| |
| size_t transient_mem_init_globals_size = 0; |
| size_t transient_mem_init_globals_offset = 0; |
| |
| size_t transient_total = 0; |
| |
| size_t private_mem_total = 0; |
| |
| size_t num_trivial_builds = 0; |
| size_t num_new_sah_builds = 0; |
| |
| /* Prepare a bunch of data for the kernels we have to run. */ |
| for (uint32_t i = 0; i < infoCount; i++) { |
| struct build_state *bs = &builds[i]; |
| |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i]; |
| struct anv_address scratch_addr = |
| anv_address_from_u64(pInfo->scratchData.deviceAddress); |
| |
| const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos = |
| ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL; |
| const uint32_t *pMaxPrimitiveCounts = |
| ppMaxPrimitiveCounts ? ppMaxPrimitiveCounts[i] : NULL; |
| |
| ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, |
| pInfo->dstAccelerationStructure); |
| |
| bs->build_method = device->bvh_build_method; |
| |
| bs->bvh_addr = anv_address_from_u64(vk_acceleration_structure_get_va(dst_accel)); |
| |
| bs->estimate = get_gpu_size_estimate(pInfo, pBuildRangeInfos, |
| pMaxPrimitiveCounts); |
| bs->scratch = get_gpu_scratch_layout(scratch_addr, bs->estimate, |
| bs->build_method); |
| |
| uint32_t leaf_size, leaf_type; |
| |
| switch (pInfo->type) { |
| case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: { |
| assert(pInfo->geometryCount == 1); |
| |
| const VkAccelerationStructureGeometryKHR *pGeometry = |
| get_geometry(pInfo, 0); |
| assert(pGeometry->geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR); |
| |
| const VkAccelerationStructureGeometryInstancesDataKHR *instances = |
| &pGeometry->geometry.instances; |
| |
| bs->num_instances = pBuildRangeInfos[0].primitiveCount; |
| bs->instances_addr = instances->data.deviceAddress; |
| bs->array_of_instances_ptr = instances->arrayOfPointers; |
| leaf_type = NODE_TYPE_INSTANCE; |
| leaf_size = GENX(RT_BVH_INSTANCE_LEAF_length) * 4; |
| break; |
| } |
| |
| case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: { |
| bs->num_geometries = pInfo->geometryCount; |
| leaf_type = NODE_TYPE_QUAD; |
| leaf_size = GENX(RT_BVH_QUAD_LEAF_length) * 4; |
| break; |
| } |
| |
| default: |
| unreachable("Unsupported acceleration structure type"); |
| } |
| |
| size_t geom_struct_size = bs->num_geometries * sizeof(struct Geo); |
| size_t geom_prefix_sum_size = align_uintptr(sizeof(uint32_t) * (bs->num_geometries + 1), 64); |
| |
| bs->transient_size = geom_prefix_sum_size + geom_struct_size; |
| |
| bs->geom_size_prefix_sum_buffer = transient_total + 0; |
| |
| bs->state = (struct MKBuilderState) { |
| .geomDesc_buffer = bs->geom_size_prefix_sum_buffer + |
| geom_prefix_sum_size, |
| .build_primref_buffer = bs->scratch.primrefs, |
| .build_globals = bs->scratch.globals, |
| .bvh_buffer = anv_address_physical(bs->bvh_addr), |
| .leaf_type = leaf_type, |
| .leaf_size = leaf_size, |
| }; |
| |
| transient_total += bs->transient_size; |
| |
| switch (device->bvh_build_method) { |
| case ANV_BVH_BUILD_METHOD_TRIVIAL: |
| num_trivial_builds++; |
| break; |
| case ANV_BVH_BUILD_METHOD_NEW_SAH: |
| num_new_sah_builds++; |
| break; |
| default: |
| unreachable("invalid BVH build method"); |
| } |
| |
| transient_mem_init_globals_size += sizeof(struct BatchedInitGlobalsData); |
| } |
| |
| transient_total = align_transient_size(transient_total); |
| transient_mem_init_globals_offset = transient_total; |
| transient_total += align_transient_size(transient_mem_init_globals_size); |
| |
| size_t transient_mem_binnedsah_size = 0; |
| size_t transient_mem_binnedsah_offset = 0; |
| size_t private_mem_binnedsah_size = 0; |
| size_t private_mem_binnedsah_offset = 0; |
| |
| transient_mem_binnedsah_size = get_batched_binnedsah_transient_mem_size(num_new_sah_builds); |
| transient_mem_binnedsah_offset = transient_total; |
| transient_total += align_transient_size(transient_mem_binnedsah_size); |
| |
| private_mem_binnedsah_size = get_batched_binnedsah_private_mem_size(num_new_sah_builds); |
| private_mem_binnedsah_offset = private_mem_total; |
| private_mem_total += align_private_size(private_mem_binnedsah_size); |
| |
| /* Allocate required memory, unless we already have a suiteable buffer */ |
| struct anv_cmd_alloc private_mem_alloc; |
| if (private_mem_total > cmd_buffer->state.rt.build_priv_mem_size) { |
| private_mem_alloc = |
| anv_cmd_buffer_alloc_space(cmd_buffer, private_mem_total, 64, |
| false /* mapped */); |
| if (anv_cmd_alloc_is_empty(private_mem_alloc)) { |
| anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| goto error; |
| } |
| |
| cmd_buffer->state.rt.build_priv_mem_addr = private_mem_alloc.address; |
| cmd_buffer->state.rt.build_priv_mem_size = private_mem_alloc.size; |
| } else { |
| private_mem_alloc = (struct anv_cmd_alloc) { |
| .address = cmd_buffer->state.rt.build_priv_mem_addr, |
| .map = anv_address_map(cmd_buffer->state.rt.build_priv_mem_addr), |
| .size = cmd_buffer->state.rt.build_priv_mem_size, |
| }; |
| } |
| |
| struct anv_cmd_alloc transient_mem_alloc = |
| anv_cmd_buffer_alloc_space(cmd_buffer, transient_total, 64, |
| true /* mapped */); |
| if (transient_total > 0 && anv_cmd_alloc_is_empty(transient_mem_alloc)) { |
| anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| goto error; |
| } |
| |
| uint64_t private_base = anv_address_physical(private_mem_alloc.address); |
| uint64_t transient_base = anv_address_physical(transient_mem_alloc.address); |
| |
| /* Prepare transient memory */ |
| for (uint32_t i = 0; i < infoCount; i++) { |
| struct build_state *bs = &builds[i]; |
| |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i]; |
| |
| const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos = |
| ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL; |
| |
| struct Geo *geos = transient_mem_alloc.map + bs->state.geomDesc_buffer; |
| uint32_t *prefixes = transient_mem_alloc.map + bs->geom_size_prefix_sum_buffer; |
| uint32_t prefix_sum = 0; |
| for (unsigned g = 0; g < bs->num_geometries; g++) { |
| const VkAccelerationStructureGeometryKHR *pGeometry = get_geometry(pInfo, g); |
| uint32_t prim_count = pBuildRangeInfos[g].primitiveCount; |
| geos[g] = vk_to_grl_Geo(pGeometry, prim_count, |
| pBuildRangeInfos[g].transformOffset, |
| pBuildRangeInfos[g].primitiveOffset, |
| pBuildRangeInfos[g].firstVertex); |
| |
| prefixes[g] = prefix_sum; |
| prefix_sum += prim_count; |
| } |
| |
| prefixes[bs->num_geometries] = prefix_sum; |
| |
| bs->geom_size_prefix_sum_buffer = |
| intel_canonical_address(bs->geom_size_prefix_sum_buffer + |
| transient_base); |
| bs->state.geomDesc_buffer = |
| intel_canonical_address(bs->state.geomDesc_buffer + |
| transient_base); |
| |
| struct BatchedInitGlobalsData data = { |
| .p_build_globals = bs->scratch.globals, |
| .p_bvh_buffer = anv_address_physical(bs->bvh_addr), |
| |
| .numPrimitives = 0, |
| .numGeometries = bs->num_geometries, |
| .numInstances = bs->num_instances, |
| |
| .instance_descs_start = bs->estimate.instance_descs_start, |
| .geo_meta_data_start = bs->estimate.geo_meta_data_start, |
| .node_data_start = bs->estimate.node_data_start, |
| .leaf_data_start = bs->estimate.leaf_data_start, |
| .procedural_data_start = bs->estimate.procedural_data_start, |
| .back_pointer_start = bs->estimate.back_pointer_start, |
| .sizeTotal = bs->estimate.sizeTotal, |
| |
| .leafType = bs->state.leaf_type, |
| .leafSize = bs->state.leaf_size, |
| }; |
| |
| write_memory(transient_mem_alloc, |
| transient_mem_init_globals_offset + i * sizeof(data), |
| &data, sizeof(data)); |
| } |
| |
| genX(flush_pipeline_select_gpgpu)(cmd_buffer); |
| |
| /* Due to the nature of GRL and its heavy use of jumps/predication, we |
| * cannot tell exactly in what order the CFE_STATE we insert are going to |
| * be executed. So always use the largest possible size. |
| */ |
| genX(cmd_buffer_ensure_cfe_state)( |
| cmd_buffer, |
| cmd_buffer->device->physical->max_grl_scratch_size); |
| |
| /* Round 1 : init_globals kernel */ |
| genX(grl_misc_batched_init_globals)( |
| cmd_buffer, |
| intel_canonical_address(transient_base + |
| transient_mem_init_globals_offset), |
| infoCount); |
| |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_GRL_FLUSH_FLAGS, |
| "building accel struct"); |
| |
| /* Round 2 : Copy instance/geometry data from the application provided |
| * buffers into the acceleration structures. |
| */ |
| for (uint32_t i = 0; i < infoCount; i++) { |
| struct build_state *bs = &builds[i]; |
| |
| /* Metadata */ |
| if (bs->num_instances) { |
| assert(bs->num_geometries == 0); |
| |
| const uint64_t copy_size = bs->num_instances * sizeof(InstanceDesc); |
| /* This must be calculated in same way as |
| * groupCountForGeoMetaDataCopySize |
| */ |
| const uint32_t num_threads = (copy_size >> 8) + 3; |
| |
| if (bs->array_of_instances_ptr) { |
| genX(grl_misc_copy_instance_ptrs)( |
| cmd_buffer, |
| anv_address_physical(anv_address_add(bs->bvh_addr, |
| bs->estimate.instance_descs_start)), |
| bs->instances_addr, |
| copy_size, num_threads); |
| } else { |
| genX(grl_misc_copy_instances)( |
| cmd_buffer, |
| anv_address_physical(anv_address_add(bs->bvh_addr, |
| bs->estimate.instance_descs_start)), |
| bs->instances_addr, |
| copy_size, num_threads); |
| } |
| } |
| |
| if (bs->num_geometries) { |
| assert(bs->num_instances == 0); |
| const uint64_t copy_size = bs->num_geometries * sizeof(struct GeoMetaData); |
| |
| /* This must be calculated in same way as |
| * groupCountForGeoMetaDataCopySize |
| */ |
| const uint32_t num_threads = (copy_size >> 6) + 1; |
| |
| genX(grl_misc_copy_geo_meta_data)( |
| cmd_buffer, |
| anv_address_physical(anv_address_add(bs->bvh_addr, |
| bs->estimate.geo_meta_data_start)), |
| bs->state.geomDesc_buffer, |
| copy_size, |
| num_threads); |
| } |
| |
| /* Primrefs */ |
| if (bs->num_instances) { |
| if (bs->array_of_instances_ptr) { |
| genX(grl_build_primref_buildPrimirefsFromInstancesArrOfPtrs)( |
| cmd_buffer, |
| bs->instances_addr, |
| PREFIX_MK_SIZE(grl_build_primref, bs->estimate), |
| PREFIX_MK_STATE(grl_build_primref, bs->state), |
| false /* allowUpdate */); |
| } else { |
| genX(grl_build_primref_buildPrimirefsFromInstances)( |
| cmd_buffer, |
| bs->instances_addr, |
| PREFIX_MK_SIZE(grl_build_primref, bs->estimate), |
| PREFIX_MK_STATE(grl_build_primref, bs->state), |
| false /* allowUpdate */); |
| } |
| } |
| |
| if (bs->num_geometries) { |
| const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i]; |
| const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos = |
| ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL; |
| |
| assert(pInfo->geometryCount == bs->num_geometries); |
| for (unsigned g = 0; g < pInfo->geometryCount; g++) { |
| const VkAccelerationStructureGeometryKHR *pGeometry = |
| get_geometry(pInfo, g); |
| |
| switch (pGeometry->geometryType) { |
| case VK_GEOMETRY_TYPE_TRIANGLES_KHR: |
| genX(grl_build_primref_primrefs_from_tris)( |
| cmd_buffer, |
| PREFIX_MK_STATE(grl_build_primref, bs->state), |
| PREFIX_MK_SIZE(grl_build_primref, bs->estimate), |
| bs->state.geomDesc_buffer + g * sizeof(struct Geo), |
| g, |
| vk_to_grl_GeometryFlags(pGeometry->flags), |
| /* TODO: Indirect */ |
| pBuildRangeInfos[g].primitiveCount); |
| break; |
| |
| case VK_GEOMETRY_TYPE_AABBS_KHR: |
| genX(grl_build_primref_primrefs_from_proc)( |
| cmd_buffer, |
| PREFIX_MK_STATE(grl_build_primref, bs->state), |
| PREFIX_MK_SIZE(grl_build_primref, bs->estimate), |
| bs->state.geomDesc_buffer + g * sizeof(struct Geo), |
| g, |
| vk_to_grl_GeometryFlags(pGeometry->flags), |
| /* TODO: Indirect */ |
| pBuildRangeInfos[g].primitiveCount); |
| break; |
| |
| default: |
| unreachable("Invalid geometry type"); |
| } |
| } |
| } |
| } |
| |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_GRL_FLUSH_FLAGS, |
| "building accel struct"); |
| |
| /* Dispatch trivial builds */ |
| if (num_trivial_builds) { |
| for (uint32_t i = 0; i < infoCount; i++) { |
| struct build_state *bs = &builds[i]; |
| |
| if (bs->build_method != ANV_BVH_BUILD_METHOD_TRIVIAL) |
| continue; |
| |
| genX(grl_new_sah_builder_single_pass_binsah)( |
| cmd_buffer, |
| bs->scratch.globals, |
| bs->state.bvh_buffer, |
| bs->state.build_primref_buffer, |
| bs->scratch.leaf_index_buffers, |
| false /* alloc_backpointers */); |
| } |
| } |
| |
| /* Dispatch new SAH builds */ |
| if (num_new_sah_builds) { |
| size_t global_ptrs_offset = transient_mem_binnedsah_offset; |
| size_t buffers_info_offset = transient_mem_binnedsah_offset + sizeof(gpuva_t) * num_new_sah_builds; |
| |
| size_t scheduler_offset = private_mem_binnedsah_offset; |
| size_t sah_globals_offset = private_mem_binnedsah_offset + get_scheduler_size(num_new_sah_builds); |
| |
| struct SAHBuildArgsBatchable args = { |
| .num_builds = infoCount, |
| .p_globals_ptrs = intel_canonical_address(transient_base + global_ptrs_offset), |
| .p_buffers_info = intel_canonical_address(transient_base + buffers_info_offset), |
| .p_scheduler = intel_canonical_address(private_base + scheduler_offset), |
| .p_sah_globals = intel_canonical_address(private_base + sah_globals_offset), |
| .num_max_qnode_global_root_buffer_entries = MAX2(num_new_sah_builds, QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM), |
| }; |
| |
| for (uint32_t i = 0; i < infoCount; i++) { |
| struct build_state *bs = &builds[i]; |
| |
| if (bs->build_method != ANV_BVH_BUILD_METHOD_NEW_SAH) |
| continue; |
| |
| uint64_t p_build_primref_index_buffers; |
| uint64_t p_bvh2; |
| uint64_t p_qnode_child_buffer; |
| |
| get_binnedsah_scratch_buffers(bs, |
| &p_qnode_child_buffer, |
| &p_build_primref_index_buffers, |
| &p_bvh2); |
| |
| struct SAHBuildBuffersInfo buffers = { |
| .p_primref_index_buffers = bs->scratch.leaf_index_buffers, |
| .p_bvh_base = bs->state.bvh_buffer, |
| .p_primrefs_buffer = bs->state.build_primref_buffer, |
| .p_bvh2 = p_bvh2, |
| .p_qnode_root_buffer = p_qnode_child_buffer, |
| .sah_globals_flags = 0, |
| }; |
| |
| write_memory(transient_mem_alloc, buffers_info_offset, &buffers, sizeof(buffers)); |
| buffers_info_offset += sizeof(buffers); |
| |
| write_memory(transient_mem_alloc, global_ptrs_offset, &bs->state.build_globals, |
| sizeof(bs->state.build_globals)); |
| global_ptrs_offset += sizeof(bs->state.build_globals); |
| } |
| |
| genX(grl_new_sah_builder_new_sah_build_batchable)( |
| cmd_buffer, PREFIX_MK_SAH_BUILD_ARGS_BATCHABLE(grl_new_sah_builder, args)); |
| } |
| |
| if (num_new_sah_builds == 0) |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_GRL_FLUSH_FLAGS, |
| "building accel struct"); |
| |
| /* Finally write the leaves. */ |
| for (uint32_t i = 0; i < infoCount; i++) { |
| struct build_state *bs = &builds[i]; |
| |
| if (bs->num_instances) { |
| assert(bs->num_geometries == 0); |
| if (bs->array_of_instances_ptr) { |
| genX(grl_leaf_builder_buildLeafDXR_instances_pointers)(cmd_buffer, |
| PREFIX_MK_STATE(grl_leaf_builder, bs->state), |
| bs->scratch.leaf_index_buffers, |
| bs->instances_addr, |
| bs->scratch.leaf_index_buffer_stride, |
| 0 /* offset */, |
| bs->estimate.numBuildPrimitives); |
| } else { |
| genX(grl_leaf_builder_buildLeafDXR_instances)(cmd_buffer, |
| PREFIX_MK_STATE(grl_leaf_builder, bs->state), |
| bs->scratch.leaf_index_buffers, |
| bs->instances_addr, |
| bs->scratch.leaf_index_buffer_stride, |
| 0 /* offset */, |
| bs->estimate.numBuildPrimitives); |
| } |
| } |
| |
| if (bs->num_geometries) { |
| assert(bs->num_instances == 0); |
| const uint64_t p_numPrimitives = |
| bs->state.build_globals + offsetof(struct Globals, numPrimitives); |
| |
| assert(bs->estimate.numProcedurals == 0 || |
| bs->estimate.numTriangles == 0); |
| if (bs->estimate.numProcedurals) { |
| genX(grl_leaf_builder_buildLeafDXR_procedurals)( |
| cmd_buffer, |
| PREFIX_MK_STATE(grl_leaf_builder, bs->state), |
| bs->scratch.leaf_index_buffers, |
| bs->scratch.leaf_index_buffer_stride, |
| 0 /* offset */, |
| p_numPrimitives); |
| } else { |
| genX(grl_leaf_builder_buildLeafDXR_quads)( |
| cmd_buffer, |
| PREFIX_MK_STATE(grl_leaf_builder, bs->state), |
| bs->scratch.leaf_index_buffers, |
| bs->scratch.leaf_index_buffer_stride, |
| 0 /* offset */, |
| p_numPrimitives, |
| false /* allow_updates */); |
| } |
| } |
| } |
| |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_GRL_FLUSH_FLAGS, |
| "building accel struct"); |
| |
| trace_intel_end_as_build(&cmd_buffer->trace); |
| |
| error: |
| vk_free(&cmd_buffer->device->vk.alloc, builds); |
| } |
| |
| void |
| genX(CmdBuildAccelerationStructuresKHR)( |
| VkCommandBuffer commandBuffer, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, |
| const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| if (anv_batch_has_error(&cmd_buffer->batch)) |
| return; |
| |
| cmd_build_acceleration_structures(cmd_buffer, infoCount, pInfos, |
| ppBuildRangeInfos, NULL, NULL, NULL); |
| } |
| |
| void |
| genX(CmdBuildAccelerationStructuresIndirectKHR)( |
| VkCommandBuffer commandBuffer, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, |
| const VkDeviceAddress* pIndirectDeviceAddresses, |
| const uint32_t* pIndirectStrides, |
| const uint32_t* const* ppMaxPrimitiveCounts) |
| { |
| unreachable("Unimplemented"); |
| } |
| |
| void |
| genX(CmdCopyAccelerationStructureKHR)( |
| VkCommandBuffer commandBuffer, |
| const VkCopyAccelerationStructureInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src); |
| ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst); |
| |
| assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR || |
| pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR); |
| |
| if (pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR) { |
| uint64_t src_size_addr = |
| vk_acceleration_structure_get_va(src_accel) + |
| offsetof(struct BVHBase, Meta.allocationSize); |
| genX(grl_copy_clone_indirect)( |
| cmd_buffer, |
| vk_acceleration_structure_get_va(dst_accel), |
| vk_acceleration_structure_get_va(src_accel), |
| src_size_addr); |
| } else { |
| genX(grl_copy_compact)( |
| cmd_buffer, |
| vk_acceleration_structure_get_va(dst_accel), |
| vk_acceleration_structure_get_va(src_accel)); |
| } |
| |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_PIPE_END_OF_PIPE_SYNC_BIT, |
| "after copy acceleration struct"); |
| } |
| |
| void |
| genX(CmdCopyAccelerationStructureToMemoryKHR)( |
| VkCommandBuffer commandBuffer, |
| const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src); |
| struct anv_device *device = cmd_buffer->device; |
| uint64_t src_size_addr = |
| vk_acceleration_structure_get_va(src_accel) + |
| offsetof(struct BVHBase, Meta.allocationSize); |
| |
| assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR); |
| |
| genX(grl_copy_serialize_indirect)( |
| cmd_buffer, |
| pInfo->dst.deviceAddress, |
| vk_acceleration_structure_get_va(src_accel), |
| anv_address_physical(device->rt_uuid_addr), |
| src_size_addr); |
| |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_PIPE_END_OF_PIPE_SYNC_BIT, |
| "after copy acceleration struct"); |
| } |
| |
| void |
| genX(CmdCopyMemoryToAccelerationStructureKHR)( |
| VkCommandBuffer commandBuffer, |
| const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); |
| ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst); |
| |
| assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR); |
| |
| uint64_t src_size_addr = pInfo->src.deviceAddress + |
| offsetof(struct SerializationHeader, DeserializedSizeInBytes); |
| genX(grl_copy_deserialize_indirect)( |
| cmd_buffer, |
| vk_acceleration_structure_get_va(dst_accel), |
| pInfo->src.deviceAddress, |
| src_size_addr); |
| |
| anv_add_pending_pipe_bits(cmd_buffer, |
| ANV_PIPE_END_OF_PIPE_SYNC_BIT, |
| "after copy acceleration struct"); |
| } |
| |
| /* TODO: Host commands */ |
| |
| VkResult |
| genX(BuildAccelerationStructuresKHR)( |
| VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| uint32_t infoCount, |
| const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, |
| const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VkResult |
| genX(CopyAccelerationStructureKHR)( |
| VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| const VkCopyAccelerationStructureInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VkResult |
| genX(CopyAccelerationStructureToMemoryKHR)( |
| VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VkResult |
| genX(CopyMemoryToAccelerationStructureKHR)( |
| VkDevice _device, |
| VkDeferredOperationKHR deferredOperation, |
| const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| VkResult |
| genX(WriteAccelerationStructuresPropertiesKHR)( |
| VkDevice _device, |
| uint32_t accelerationStructureCount, |
| const VkAccelerationStructureKHR* pAccelerationStructures, |
| VkQueryType queryType, |
| size_t dataSize, |
| void* pData, |
| size_t stride) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| unreachable("Unimplemented"); |
| return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |
| |
| #endif /* GFX_VERx10 >= 125 */ |