| /* |
| * Copyright © 2015 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #ifndef ANV_PRIVATE_H |
| #define ANV_PRIVATE_H |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <stdbool.h> |
| #include <pthread.h> |
| #include <assert.h> |
| #include <stdint.h> |
| #include "drm-uapi/drm_fourcc.h" |
| |
| #ifdef HAVE_VALGRIND |
| #include <valgrind.h> |
| #include <memcheck.h> |
| #define VG(x) x |
| #else |
| #define VG(x) ((void)0) |
| #endif |
| |
| #include "common/intel_aux_map.h" |
| #include "common/intel_bind_timeline.h" |
| #include "common/intel_engine.h" |
| #include "common/intel_gem.h" |
| #include "common/intel_l3_config.h" |
| #include "common/intel_measure.h" |
| #include "common/intel_mem.h" |
| #include "common/intel_sample_positions.h" |
| #include "decoder/intel_decoder.h" |
| #include "dev/intel_device_info.h" |
| #include "blorp/blorp.h" |
| #include "compiler/brw_compiler.h" |
| #include "compiler/brw_kernel.h" |
| #include "compiler/brw_rt.h" |
| #include "ds/intel_driver_ds.h" |
| #include "util/bitset.h" |
| #include "util/bitscan.h" |
| #include "util/detect_os.h" |
| #include "util/macros.h" |
| #include "util/hash_table.h" |
| #include "util/list.h" |
| #include "util/perf/u_trace.h" |
| #include "util/set.h" |
| #include "util/sparse_array.h" |
| #include "util/u_atomic.h" |
| #if DETECT_OS_ANDROID |
| #include "util/u_gralloc/u_gralloc.h" |
| #endif |
| #include "util/u_vector.h" |
| #include "util/u_math.h" |
| #include "util/vma.h" |
| #include "util/xmlconfig.h" |
| #include "vk_acceleration_structure.h" |
| #include "vk_alloc.h" |
| #include "vk_buffer.h" |
| #include "vk_buffer_view.h" |
| #include "vk_command_buffer.h" |
| #include "vk_command_pool.h" |
| #include "vk_debug_report.h" |
| #include "vk_descriptor_update_template.h" |
| #include "vk_device.h" |
| #include "vk_device_memory.h" |
| #include "vk_drm_syncobj.h" |
| #include "vk_enum_defines.h" |
| #include "vk_format.h" |
| #include "vk_framebuffer.h" |
| #include "vk_graphics_state.h" |
| #include "vk_image.h" |
| #include "vk_instance.h" |
| #include "vk_pipeline_cache.h" |
| #include "vk_physical_device.h" |
| #include "vk_sampler.h" |
| #include "vk_shader_module.h" |
| #include "vk_sync.h" |
| #include "vk_sync_timeline.h" |
| #include "vk_texcompress_astc.h" |
| #include "vk_util.h" |
| #include "vk_query_pool.h" |
| #include "vk_queue.h" |
| #include "vk_log.h" |
| #include "vk_ycbcr_conversion.h" |
| #include "vk_video.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| /* Pre-declarations needed for WSI entrypoints */ |
| struct wl_surface; |
| struct wl_display; |
| typedef struct xcb_connection_t xcb_connection_t; |
| typedef uint32_t xcb_visualid_t; |
| typedef uint32_t xcb_window_t; |
| |
| struct anv_batch; |
| struct anv_buffer; |
| struct anv_buffer_view; |
| struct anv_image_view; |
| struct anv_instance; |
| |
| struct intel_aux_map_context; |
| struct intel_perf_config; |
| struct intel_perf_counter_pass; |
| struct intel_perf_query_result; |
| |
| #include <vulkan/vulkan.h> |
| #include <vulkan/vk_icd.h> |
| |
| #include "anv_android.h" |
| #include "anv_entrypoints.h" |
| #include "anv_kmd_backend.h" |
| #include "anv_rmv.h" |
| #include "isl/isl.h" |
| |
| #include "dev/intel_debug.h" |
| #undef MESA_LOG_TAG |
| #define MESA_LOG_TAG "MESA-INTEL" |
| #include "util/log.h" |
| #include "wsi_common.h" |
| |
| /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */ |
| #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST) |
| #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST |
| #endif |
| |
| #define NSEC_PER_SEC 1000000000ull |
| |
| #define BINDING_TABLE_POOL_BLOCK_SIZE (65536) |
| |
| /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */ |
| #define MAX_VBS (33 - 2) |
| |
| /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler |
| * only supports the push model of VS inputs, and we only have 128 GRFs, |
| * minus the g0 and g1 payload, which gives us a maximum of 31 VEs. Plus, |
| * we use two of them for SGVs. |
| */ |
| #define MAX_VES (31 - 2) |
| |
| #define MAX_XFB_BUFFERS 4 |
| #define MAX_XFB_STREAMS 4 |
| #define MAX_SETS 8 |
| #define MAX_RTS 8 |
| #define MAX_VIEWPORTS 16 |
| #define MAX_SCISSORS 16 |
| #define MAX_PUSH_CONSTANTS_SIZE 128 |
| #define MAX_DYNAMIC_BUFFERS 16 |
| #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ |
| #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096 |
| #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 |
| #define MAX_EMBEDDED_SAMPLERS 2048 |
| #define MAX_CUSTOM_BORDER_COLORS 4096 |
| /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we |
| * use 64 here to avoid cache issues. This could most likely bring it back to |
| * 32 if we had different virtual addresses for the different views on a given |
| * GEM object. |
| */ |
| #define ANV_UBO_ALIGNMENT 64 |
| #define ANV_SSBO_ALIGNMENT 4 |
| #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4 |
| #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16 |
| #define MAX_SAMPLE_LOCATIONS 16 |
| |
| /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64 |
| * and we can't put anything else there we use 64b. |
| */ |
| #define ANV_SURFACE_STATE_SIZE (64) |
| |
| /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model": |
| * |
| * "The surface state model is used when a Binding Table Index (specified |
| * in the message descriptor) of less than 240 is specified. In this model, |
| * the Binding Table Index is used to index into the binding table, and the |
| * binding table entry contains a pointer to the SURFACE_STATE." |
| * |
| * Binding table values above 240 are used for various things in the hardware |
| * such as stateless, stateless with incoherent cache, SLM, and bindless. |
| */ |
| #define MAX_BINDING_TABLE_SIZE 240 |
| |
| #define ANV_SVGS_VB_INDEX MAX_VBS |
| #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1) |
| |
| /* We reserve this MI ALU register for the purpose of handling predication. |
| * Other code which uses the MI ALU should leave it alone. |
| */ |
| #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */ |
| |
| /* We reserve this MI ALU register to pass around an offset computed from |
| * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query. |
| * Other code which uses the MI ALU should leave it alone. |
| */ |
| #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */ |
| |
| /* We reserve this MI ALU register to hold the last programmed bindless |
| * surface state base address so that we can predicate STATE_BASE_ADDRESS |
| * emissions if the address doesn't change. |
| */ |
| #define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */ |
| |
| #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1) |
| |
| #define ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET (8) |
| |
| /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64 |
| * and we can't put anything else there we use 64b. |
| */ |
| #define ANV_SURFACE_STATE_SIZE (64) |
| #define ANV_SAMPLER_STATE_SIZE (32) |
| |
| /* For gfx12 we set the streamout buffers using 4 separate commands |
| * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout |
| * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of |
| * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the |
| * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode. |
| * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for |
| * 3DSTATE_SO_BUFFER_INDEX_0. |
| */ |
| #define SO_BUFFER_INDEX_0_CMD 0x60 |
| #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) |
| |
| /* The TR-TT L1 page table entries may contain these values instead of actual |
| * pointers to indicate the regions are either NULL or invalid. We program |
| * these values to TR-TT registers, so we could change them, but it's super |
| * convenient to have the NULL value be 0 because everything is |
| * zero-initialized when allocated. |
| * |
| * Since we reserve these values for NULL/INVALID, then we can't use them as |
| * destinations for TR-TT address translation. Both values are shifted by 16 |
| * bits, wich results in graphic addresses 0 and 64k. On Anv the first vma |
| * starts at 2MB, so we already don't use 0 and 64k for anything, so there's |
| * nothing really to reserve. We could instead just reserve random 64kb |
| * ranges from any of the non-TR-TT vmas and use their addresses. |
| */ |
| #define ANV_TRTT_L1_NULL_TILE_VAL 0 |
| #define ANV_TRTT_L1_INVALID_TILE_VAL 1 |
| |
| #define ANV_COLOR_OUTPUT_DISABLED (0xff) |
| #define ANV_COLOR_OUTPUT_UNUSED (0xfe) |
| |
| static inline uint32_t |
| align_down_npot_u32(uint32_t v, uint32_t a) |
| { |
| return v - (v % a); |
| } |
| |
| /** Alignment must be a power of 2. */ |
| static inline bool |
| anv_is_aligned(uintmax_t n, uintmax_t a) |
| { |
| assert(a == (a & -a)); |
| return (n & (a - 1)) == 0; |
| } |
| |
| static inline union isl_color_value |
| vk_to_isl_color(VkClearColorValue color) |
| { |
| return (union isl_color_value) { |
| .u32 = { |
| color.uint32[0], |
| color.uint32[1], |
| color.uint32[2], |
| color.uint32[3], |
| }, |
| }; |
| } |
| |
| static inline union isl_color_value |
| vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format) |
| { |
| const struct isl_format_layout *fmtl = isl_format_get_layout(format); |
| union isl_color_value isl_color = { .u32 = {0, } }; |
| |
| #define COPY_COLOR_CHANNEL(c, i) \ |
| if (fmtl->channels.c.bits) \ |
| isl_color.u32[i] = color.uint32[i] |
| |
| COPY_COLOR_CHANNEL(r, 0); |
| COPY_COLOR_CHANNEL(g, 1); |
| COPY_COLOR_CHANNEL(b, 2); |
| COPY_COLOR_CHANNEL(a, 3); |
| |
| #undef COPY_COLOR_CHANNEL |
| |
| return isl_color; |
| } |
| |
| void __anv_perf_warn(struct anv_device *device, |
| const struct vk_object_base *object, |
| const char *file, int line, const char *format, ...) |
| anv_printflike(5, 6); |
| |
| /** |
| * Print a FINISHME message, including its source location. |
| */ |
| #define anv_finishme(format, ...) \ |
| do { \ |
| static bool reported = false; \ |
| if (!reported) { \ |
| mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \ |
| ##__VA_ARGS__); \ |
| reported = true; \ |
| } \ |
| } while (0) |
| |
| /** |
| * Print a perf warning message. Set INTEL_DEBUG=perf to see these. |
| */ |
| #define anv_perf_warn(objects_macro, format, ...) \ |
| do { \ |
| static bool reported = false; \ |
| if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \ |
| __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \ |
| VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, \ |
| objects_macro, __FILE__, __LINE__, \ |
| format, ## __VA_ARGS__); \ |
| reported = true; \ |
| } \ |
| } while (0) |
| |
| /* A non-fatal assert. Useful for debugging. */ |
| #if MESA_DEBUG |
| #define anv_assert(x) ({ \ |
| if (unlikely(!(x))) \ |
| mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \ |
| }) |
| #else |
| #define anv_assert(x) |
| #endif |
| |
| enum anv_bo_alloc_flags { |
| /** Specifies that the BO must have a 32-bit address |
| * |
| * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS. |
| */ |
| ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0), |
| |
| /** Specifies that the BO may be shared externally */ |
| ANV_BO_ALLOC_EXTERNAL = (1 << 1), |
| |
| /** Specifies that the BO should be mapped */ |
| ANV_BO_ALLOC_MAPPED = (1 << 2), |
| |
| /** Specifies that the BO should be coherent. |
| * |
| * Note: In platforms with LLC where HOST_CACHED + HOST_COHERENT is free, |
| * bo can get upgraded to HOST_CACHED_COHERENT |
| */ |
| ANV_BO_ALLOC_HOST_COHERENT = (1 << 3), |
| |
| /** Specifies that the BO should be captured in error states */ |
| ANV_BO_ALLOC_CAPTURE = (1 << 4), |
| |
| /** Specifies that the BO will have an address assigned by the caller |
| * |
| * Such BOs do not exist in any VMA heap. |
| */ |
| ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5), |
| |
| /** Enables implicit synchronization on the BO |
| * |
| * This is the opposite of EXEC_OBJECT_ASYNC. |
| */ |
| ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6), |
| |
| /** Enables implicit synchronization on the BO |
| * |
| * This is equivalent to EXEC_OBJECT_WRITE. |
| */ |
| ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7), |
| |
| /** Has an address which is visible to the client */ |
| ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8), |
| |
| /** Align the BO's virtual address to match AUX-TT requirements */ |
| ANV_BO_ALLOC_AUX_TT_ALIGNED = (1 << 9), |
| |
| /** This buffer is allocated from local memory and should be cpu visible */ |
| ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10), |
| |
| /** For non device local allocations */ |
| ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11), |
| |
| /** This buffer will be scanout to display */ |
| ANV_BO_ALLOC_SCANOUT = (1 << 12), |
| |
| /** For descriptor pools */ |
| ANV_BO_ALLOC_DESCRIPTOR_POOL = (1 << 13), |
| |
| /** For buffers that will be bound using TR-TT. |
| * |
| * Not for buffers used as the TR-TT page tables. |
| */ |
| ANV_BO_ALLOC_TRTT = (1 << 14), |
| |
| /** Protected buffer */ |
| ANV_BO_ALLOC_PROTECTED = (1 << 15), |
| |
| /** Specifies that the BO should be cached and incoherent. */ |
| ANV_BO_ALLOC_HOST_CACHED = (1 << 16), |
| |
| /** For buffer addressable from the dynamic state heap */ |
| ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL = (1 << 17), |
| |
| /** Specifies that the BO is imported. |
| * |
| * Imported BOs must also be marked as ANV_BO_ALLOC_EXTERNAL |
| */ |
| ANV_BO_ALLOC_IMPORTED = (1 << 18), |
| |
| /** Specify whether this BO is internal to the driver */ |
| ANV_BO_ALLOC_INTERNAL = (1 << 19), |
| |
| /** Allocate with CCS AUX requirements |
| * |
| * This pads the BO include CCS data mapppable through the AUX-TT and |
| * aligned to the AUX-TT requirements. |
| */ |
| ANV_BO_ALLOC_AUX_CCS = (1 << 20), |
| |
| /** Compressed buffer, only supported in Xe2+ */ |
| ANV_BO_ALLOC_COMPRESSED = (1 << 21), |
| }; |
| |
| /** Specifies that the BO should be cached and coherent. */ |
| #define ANV_BO_ALLOC_HOST_CACHED_COHERENT (ANV_BO_ALLOC_HOST_COHERENT | \ |
| ANV_BO_ALLOC_HOST_CACHED) |
| |
| |
| struct anv_bo { |
| const char *name; |
| |
| /* The VMA heap in anv_device from which this BO takes its offset. |
| * |
| * This can only be NULL when has_fixed_address is true. |
| */ |
| struct util_vma_heap *vma_heap; |
| |
| /* All userptr bos in Xe KMD has gem_handle set to workaround_bo->gem_handle */ |
| uint32_t gem_handle; |
| |
| uint32_t refcount; |
| |
| /* Index into the current validation list. This is used by the |
| * validation list building algorithm to track which buffers are already |
| * in the validation list so that we can ensure uniqueness. |
| */ |
| uint32_t exec_obj_index; |
| |
| /* Index for use with util_sparse_array_free_list */ |
| uint32_t free_index; |
| |
| /* Last known offset. This value is provided by the kernel when we |
| * execbuf and is used as the presumed offset for the next bunch of |
| * relocations, in canonical address format. |
| */ |
| uint64_t offset; |
| |
| /** Size of the buffer */ |
| uint64_t size; |
| |
| /** Offset at which the CCS data is stored */ |
| uint64_t ccs_offset; |
| |
| /* Map for internally mapped BOs. |
| * |
| * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole |
| * BO. |
| */ |
| void *map; |
| |
| /* The actual size of bo allocated by kmd, basically: |
| * align(size, mem_alignment) |
| */ |
| uint64_t actual_size; |
| |
| /** Flags to pass to the kernel through drm_i915_exec_object2::flags */ |
| uint32_t flags; |
| |
| enum anv_bo_alloc_flags alloc_flags; |
| |
| /** True if this BO wraps a host pointer */ |
| bool from_host_ptr:1; |
| |
| /** True if this BO is mapped in the GTT (only used for RMV) */ |
| bool gtt_mapped:1; |
| }; |
| |
| static inline bool |
| anv_bo_is_external(const struct anv_bo *bo) |
| { |
| return bo->alloc_flags & ANV_BO_ALLOC_EXTERNAL; |
| } |
| |
| static inline bool |
| anv_bo_is_vram_only(const struct anv_bo *bo) |
| { |
| return !(bo->alloc_flags & (ANV_BO_ALLOC_NO_LOCAL_MEM | |
| ANV_BO_ALLOC_MAPPED | |
| ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE | |
| ANV_BO_ALLOC_IMPORTED)); |
| } |
| |
| static inline struct anv_bo * |
| anv_bo_ref(struct anv_bo *bo) |
| { |
| p_atomic_inc(&bo->refcount); |
| return bo; |
| } |
| |
| enum intel_device_info_mmap_mode |
| anv_bo_get_mmap_mode(struct anv_device *device, struct anv_bo *bo); |
| |
| static inline bool |
| anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags) |
| { |
| return (alloc_flags & (ANV_BO_ALLOC_HOST_CACHED | ANV_BO_ALLOC_HOST_COHERENT)) == |
| ANV_BO_ALLOC_HOST_CACHED; |
| } |
| |
| struct anv_address { |
| struct anv_bo *bo; |
| int64_t offset; |
| }; |
| |
| #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 }) |
| |
| static inline struct anv_address |
| anv_address_from_u64(uint64_t addr_u64) |
| { |
| assert(addr_u64 == intel_canonical_address(addr_u64)); |
| return (struct anv_address) { |
| .bo = NULL, |
| .offset = addr_u64, |
| }; |
| } |
| |
| static inline bool |
| anv_address_is_null(struct anv_address addr) |
| { |
| return addr.bo == NULL && addr.offset == 0; |
| } |
| |
| static inline uint64_t |
| anv_address_physical(struct anv_address addr) |
| { |
| uint64_t address = (addr.bo ? addr.bo->offset : 0ull) + addr.offset; |
| return intel_canonical_address(address); |
| } |
| |
| static inline struct u_trace_address |
| anv_address_utrace(struct anv_address addr) |
| { |
| return (struct u_trace_address) { |
| .bo = addr.bo, |
| .offset = addr.offset, |
| }; |
| } |
| |
| static inline struct anv_address |
| anv_address_add(struct anv_address addr, uint64_t offset) |
| { |
| addr.offset += offset; |
| return addr; |
| } |
| |
| static inline struct anv_address |
| anv_address_add_aligned(struct anv_address addr, uint64_t offset, uint32_t alignment) |
| { |
| addr.offset = align(addr.offset + offset, alignment); |
| return addr; |
| } |
| |
| static inline void * |
| anv_address_map(struct anv_address addr) |
| { |
| if (addr.bo == NULL) |
| return NULL; |
| |
| if (addr.bo->map == NULL) |
| return NULL; |
| |
| return addr.bo->map + addr.offset; |
| } |
| |
| /* Represent a virtual address range */ |
| struct anv_va_range { |
| uint64_t addr; |
| uint64_t size; |
| }; |
| |
| /* Represents a lock-free linked list of "free" things. This is used by |
| * both the block pool and the state pools. Unfortunately, in order to |
| * solve the ABA problem, we can't use a single uint32_t head. |
| */ |
| union anv_free_list { |
| struct { |
| uint32_t offset; |
| |
| /* A simple count that is incremented every time the head changes. */ |
| uint32_t count; |
| }; |
| /* Make sure it's aligned to 64 bits. This will make atomic operations |
| * faster on 32 bit platforms. |
| */ |
| alignas(8) uint64_t u64; |
| }; |
| |
| #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } }) |
| |
| struct anv_block_state { |
| union { |
| struct { |
| uint32_t next; |
| uint32_t end; |
| }; |
| /* Make sure it's aligned to 64 bits. This will make atomic operations |
| * faster on 32 bit platforms. |
| */ |
| alignas(8) uint64_t u64; |
| }; |
| }; |
| |
| #define anv_block_pool_foreach_bo(bo, pool) \ |
| for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \ |
| _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \ |
| _pp_bo++) |
| |
| #define ANV_MAX_BLOCK_POOL_BOS 20 |
| |
| struct anv_block_pool { |
| const char *name; |
| |
| struct anv_device *device; |
| |
| struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS]; |
| struct anv_bo *bo; |
| uint32_t nbos; |
| |
| /* Maximum size of the pool */ |
| uint64_t max_size; |
| |
| /* Current size of the pool */ |
| uint64_t size; |
| |
| /* The canonical address where the start of the pool is pinned. The various bos that |
| * are created as the pool grows will have addresses in the range |
| * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE). |
| */ |
| uint64_t start_address; |
| |
| /* The offset from the start of the bo to the "center" of the block |
| * pool. Pointers to allocated blocks are given by |
| * bo.map + center_bo_offset + offsets. |
| */ |
| uint32_t center_bo_offset; |
| |
| struct anv_block_state state; |
| |
| enum anv_bo_alloc_flags bo_alloc_flags; |
| }; |
| |
| /* Block pools are backed by a fixed-size 1GB memfd */ |
| #define BLOCK_POOL_MEMFD_SIZE (1ul << 30) |
| |
| /* The center of the block pool is also the middle of the memfd. This may |
| * change in the future if we decide differently for some reason. |
| */ |
| #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) |
| |
| static inline uint32_t |
| anv_block_pool_size(struct anv_block_pool *pool) |
| { |
| return pool->state.end; |
| } |
| |
| struct anv_state { |
| int64_t offset; |
| uint32_t alloc_size; |
| uint32_t idx; |
| void *map; |
| }; |
| |
| #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 }) |
| |
| struct anv_fixed_size_state_pool { |
| union anv_free_list free_list; |
| struct anv_block_state block; |
| }; |
| |
| #define ANV_MIN_STATE_SIZE_LOG2 6 |
| #define ANV_MAX_STATE_SIZE_LOG2 24 |
| |
| #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) |
| |
| struct anv_free_entry { |
| uint32_t next; |
| struct anv_state state; |
| }; |
| |
| struct anv_state_table { |
| struct anv_device *device; |
| int fd; |
| struct anv_free_entry *map; |
| uint32_t size; |
| uint64_t max_size; |
| struct anv_block_state state; |
| struct u_vector cleanups; |
| }; |
| |
| struct anv_state_pool { |
| struct anv_block_pool block_pool; |
| |
| /* Offset into the relevant state base address where the state pool starts |
| * allocating memory. |
| */ |
| int64_t start_offset; |
| |
| struct anv_state_table table; |
| |
| /* The size of blocks which will be allocated from the block pool */ |
| uint32_t block_size; |
| |
| struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; |
| }; |
| |
| struct anv_state_reserved_pool { |
| struct anv_state_pool *pool; |
| union anv_free_list reserved_blocks; |
| uint32_t count; |
| }; |
| |
| struct anv_state_reserved_array_pool { |
| struct anv_state_pool *pool; |
| simple_mtx_t mutex; |
| /* Bitfield of usable elements */ |
| BITSET_WORD *states; |
| /* Backing store */ |
| struct anv_state state; |
| /* Number of elements */ |
| uint32_t count; |
| /* Stride between each element */ |
| uint32_t stride; |
| /* Size of each element */ |
| uint32_t size; |
| }; |
| |
| struct anv_state_stream { |
| struct anv_state_pool *state_pool; |
| |
| /* The size of blocks to allocate from the state pool */ |
| uint32_t block_size; |
| |
| /* Current block we're allocating from */ |
| struct anv_state block; |
| |
| /* Offset into the current block at which to allocate the next state */ |
| uint32_t next; |
| |
| /* Sum of all the blocks in all_blocks */ |
| uint32_t total_size; |
| |
| /* List of all blocks allocated from this pool */ |
| struct util_dynarray all_blocks; |
| }; |
| |
| /* The block_pool functions exported for testing only. The block pool should |
| * only be used via a state pool (see below). |
| */ |
| VkResult anv_block_pool_init(struct anv_block_pool *pool, |
| struct anv_device *device, |
| const char *name, |
| uint64_t start_address, |
| uint32_t initial_size, |
| uint32_t max_size); |
| void anv_block_pool_finish(struct anv_block_pool *pool); |
| VkResult anv_block_pool_alloc(struct anv_block_pool *pool, |
| uint32_t block_size, |
| int64_t *offset, |
| uint32_t *padding); |
| void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t |
| size); |
| |
| struct anv_state_pool_params { |
| const char *name; |
| uint64_t base_address; |
| int64_t start_offset; |
| uint32_t block_size; |
| uint32_t max_size; |
| }; |
| |
| VkResult anv_state_pool_init(struct anv_state_pool *pool, |
| struct anv_device *device, |
| const struct anv_state_pool_params *params); |
| void anv_state_pool_finish(struct anv_state_pool *pool); |
| struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, |
| uint32_t state_size, uint32_t alignment); |
| void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); |
| |
| static inline struct anv_address |
| anv_state_pool_state_address(struct anv_state_pool *pool, struct anv_state state) |
| { |
| return (struct anv_address) { |
| .bo = pool->block_pool.bo, |
| .offset = state.offset - pool->start_offset, |
| }; |
| } |
| |
| static inline struct anv_state |
| anv_state_pool_emit_data(struct anv_state_pool *pool, |
| size_t size, size_t align, |
| const void *p) |
| { |
| struct anv_state state; |
| |
| state = anv_state_pool_alloc(pool, size, align); |
| memcpy(state.map, p, size); |
| |
| return state; |
| } |
| |
| void anv_state_stream_init(struct anv_state_stream *stream, |
| struct anv_state_pool *state_pool, |
| uint32_t block_size); |
| void anv_state_stream_finish(struct anv_state_stream *stream); |
| struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, |
| uint32_t size, uint32_t alignment); |
| |
| void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool, |
| struct anv_state_pool *parent, |
| uint32_t count, uint32_t size, |
| uint32_t alignment); |
| void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool); |
| struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool); |
| void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool, |
| struct anv_state state); |
| |
| VkResult anv_state_reserved_array_pool_init(struct anv_state_reserved_array_pool *pool, |
| struct anv_state_pool *parent, |
| uint32_t count, uint32_t size, |
| uint32_t alignment); |
| void anv_state_reserved_array_pool_finish(struct anv_state_reserved_array_pool *pool); |
| struct anv_state anv_state_reserved_array_pool_alloc(struct anv_state_reserved_array_pool *pool, |
| bool alloc_back); |
| struct anv_state anv_state_reserved_array_pool_alloc_index(struct anv_state_reserved_array_pool *pool, |
| unsigned idx); |
| uint32_t anv_state_reserved_array_pool_state_index(struct anv_state_reserved_array_pool *pool, |
| struct anv_state state); |
| void anv_state_reserved_array_pool_free(struct anv_state_reserved_array_pool *pool, |
| struct anv_state state); |
| |
| VkResult anv_state_table_init(struct anv_state_table *table, |
| struct anv_device *device, |
| uint32_t initial_entries); |
| void anv_state_table_finish(struct anv_state_table *table); |
| VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx, |
| uint32_t count); |
| void anv_free_list_push(union anv_free_list *list, |
| struct anv_state_table *table, |
| uint32_t idx, uint32_t count); |
| struct anv_state* anv_free_list_pop(union anv_free_list *list, |
| struct anv_state_table *table); |
| |
| |
| static inline struct anv_state * |
| anv_state_table_get(struct anv_state_table *table, uint32_t idx) |
| { |
| return &table->map[idx].state; |
| } |
| /** |
| * Implements a pool of re-usable BOs. The interface is identical to that |
| * of block_pool except that each block is its own BO. |
| */ |
| struct anv_bo_pool { |
| const char *name; |
| |
| struct anv_device *device; |
| |
| enum anv_bo_alloc_flags bo_alloc_flags; |
| |
| struct util_sparse_array_free_list free_list[16]; |
| }; |
| |
| void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, |
| const char *name, enum anv_bo_alloc_flags alloc_flags); |
| void anv_bo_pool_finish(struct anv_bo_pool *pool); |
| VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size, |
| struct anv_bo **bo_out); |
| void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo); |
| |
| struct anv_scratch_pool { |
| enum anv_bo_alloc_flags alloc_flags; |
| /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */ |
| struct anv_bo *bos[16][MESA_SHADER_STAGES]; |
| uint32_t surfs[16]; |
| struct anv_state surf_states[16]; |
| }; |
| |
| void anv_scratch_pool_init(struct anv_device *device, |
| struct anv_scratch_pool *pool, |
| bool protected); |
| void anv_scratch_pool_finish(struct anv_device *device, |
| struct anv_scratch_pool *pool); |
| struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device, |
| struct anv_scratch_pool *pool, |
| gl_shader_stage stage, |
| unsigned per_thread_scratch); |
| uint32_t anv_scratch_pool_get_surf(struct anv_device *device, |
| struct anv_scratch_pool *pool, |
| unsigned per_thread_scratch); |
| |
| /* Note that on Gfx12HP we pass a scratch space surface state offset |
| * shifted by 2 relative to the value specified on the BSpec, since |
| * that allows the compiler to save a shift instruction while |
| * constructing the extended descriptor for SS addressing. That |
| * worked because we limit the scratch surface state pool to 8 MB and |
| * because we relied on the legacy (ExBSO=0) encoding of the extended |
| * descriptor in order to save the shift, which is no longer supported |
| * for the UGM shared function on Xe2 platforms, so we no longer |
| * attempt to do that trick. |
| */ |
| #define ANV_SCRATCH_SPACE_SHIFT(ver) ((ver) >= 20 ? 6 : 4) |
| |
| /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */ |
| struct anv_bo_cache { |
| struct util_sparse_array bo_map; |
| pthread_mutex_t mutex; |
| }; |
| |
| VkResult anv_bo_cache_init(struct anv_bo_cache *cache, |
| struct anv_device *device); |
| void anv_bo_cache_finish(struct anv_bo_cache *cache); |
| |
| struct anv_queue_family { |
| /* Standard bits passed on to the client */ |
| VkQueueFlags queueFlags; |
| uint32_t queueCount; |
| |
| enum intel_engine_class engine_class; |
| bool supports_perf; |
| }; |
| |
| #define ANV_MAX_QUEUE_FAMILIES 5 |
| |
| struct anv_memory_type { |
| /* Standard bits passed on to the client */ |
| VkMemoryPropertyFlags propertyFlags; |
| uint32_t heapIndex; |
| /* Whether this is the dynamic visible memory type */ |
| bool dynamic_visible; |
| bool compressed; |
| }; |
| |
| struct anv_memory_heap { |
| /* Standard bits passed on to the client */ |
| VkDeviceSize size; |
| VkMemoryHeapFlags flags; |
| |
| /** Driver-internal book-keeping. |
| * |
| * Align it to 64 bits to make atomic operations faster on 32 bit platforms. |
| */ |
| alignas(8) VkDeviceSize used; |
| |
| bool is_local_mem; |
| }; |
| |
| struct anv_memregion { |
| const struct intel_memory_class_instance *region; |
| uint64_t size; |
| uint64_t available; |
| }; |
| |
| enum anv_timestamp_capture_type { |
| ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE, |
| ANV_TIMESTAMP_CAPTURE_END_OF_PIPE, |
| ANV_TIMESTAMP_CAPTURE_AT_CS_STALL, |
| ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER, |
| ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH, |
| }; |
| |
| struct anv_physical_device { |
| struct vk_physical_device vk; |
| |
| /* Link in anv_instance::physical_devices */ |
| struct list_head link; |
| |
| struct anv_instance * instance; |
| char path[20]; |
| struct intel_device_info info; |
| |
| bool video_decode_enabled; |
| bool video_encode_enabled; |
| |
| struct brw_compiler * compiler; |
| struct isl_device isl_dev; |
| struct intel_perf_config * perf; |
| /* |
| * Number of commands required to implement a performance query begin + |
| * end. |
| */ |
| uint32_t n_perf_query_commands; |
| bool has_exec_async; |
| bool has_exec_capture; |
| VkQueueGlobalPriorityKHR max_context_priority; |
| uint64_t gtt_size; |
| |
| bool always_use_bindless; |
| bool use_call_secondary; |
| |
| /** True if we can use timeline semaphores through execbuf */ |
| bool has_exec_timeline; |
| |
| /** True if we can read the GPU timestamp register |
| * |
| * When running in a virtual context, the timestamp register is unreadable |
| * on Gfx12+. |
| */ |
| bool has_reg_timestamp; |
| |
| /** True if we can create protected contexts. */ |
| bool has_protected_contexts; |
| |
| /** Whether KMD has the ability to create VM objects */ |
| bool has_vm_control; |
| |
| /** Whether the device is not able map all the device local memory on the |
| * host |
| */ |
| bool has_small_bar; |
| |
| /** True if we have the means to do sparse binding (e.g., a Kernel driver |
| * a vm_bind ioctl). |
| */ |
| enum anv_sparse_type { |
| ANV_SPARSE_TYPE_NOT_SUPPORTED = 0, |
| ANV_SPARSE_TYPE_VM_BIND, |
| ANV_SPARSE_TYPE_TRTT, |
| ANV_SPARSE_TYPE_FAKE, |
| } sparse_type; |
| |
| /** True if HW supports ASTC LDR */ |
| bool has_astc_ldr; |
| /** True if denorms in void extents should be flushed to zero */ |
| bool flush_astc_ldr_void_extent_denorms; |
| /** True if ASTC LDR is supported via emulation */ |
| bool emu_astc_ldr; |
| /* true if FCV optimization should be disabled. */ |
| bool disable_fcv; |
| /**/ |
| bool uses_ex_bso; |
| |
| bool always_flush_cache; |
| |
| /** True if application memory is allocated with extra AUX memory |
| * |
| * Applications quite often pool image allocations together in a single |
| * VkDeviceMemory object. On platforms like MTL, the alignment of images |
| * with compression mapped through the AUX translation tables is large : |
| * 1MB. This can create a lot of wasted space in the application memory |
| * objects. |
| * |
| * To workaround this problem, we allocate CCS data at the end of |
| * VkDeviceMemory objects. This would not work well for TGL-like platforms |
| * because the AUX translation tables also contain the format of the |
| * images, but on MTL the HW ignore those values. So we can share the AUX |
| * TT entries between different images without problem. |
| * |
| * This should be only true for platforms with AUX TT. |
| */ |
| bool alloc_aux_tt_mem; |
| |
| /** |
| * True if the descriptors buffers are holding one of the following : |
| * - anv_sampled_image_descriptor |
| * - anv_storage_image_descriptor |
| * - anv_address_range_descriptor |
| * |
| * Accessing the descriptors in a bindless fashion from the shader |
| * requires an indirection in the shader, first fetch one of the structure |
| * listed above from the descriptor buffer, then emit the send message to |
| * the fixed function (sampler, dataport, etc...) with the handle fetched |
| * above. |
| * |
| * We need to do things this way prior to DG2 because the bindless surface |
| * state space is limited to 64Mb and some application will allocate more |
| * than what HW can support. On DG2+ we get 4Gb of bindless surface state |
| * and so we can reference directly RENDER_SURFACE_STATE/SAMPLER_STATE |
| * structures instead. |
| */ |
| bool indirect_descriptors; |
| |
| bool uses_relocs; |
| |
| /** Can the platform support cooperative matrices and is it enabled? */ |
| bool has_cooperative_matrix; |
| |
| struct { |
| uint32_t family_count; |
| struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES]; |
| } queue; |
| |
| struct { |
| uint32_t type_count; |
| struct anv_memory_type types[VK_MAX_MEMORY_TYPES]; |
| uint32_t heap_count; |
| struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS]; |
| #ifdef SUPPORT_INTEL_INTEGRATED_GPUS |
| bool need_flush; |
| #endif |
| /** Mask of memory types of normal allocations */ |
| uint32_t default_buffer_mem_types; |
| /** Mask of memory types of data indexable from the dynamic heap */ |
| uint32_t dynamic_visible_mem_types; |
| /** Mask of memory types of protected buffers/images */ |
| uint32_t protected_mem_types; |
| /** Mask of memory types of compressed buffers/images */ |
| uint32_t compressed_mem_types; |
| } memory; |
| |
| struct { |
| /** |
| * General state pool |
| */ |
| struct anv_va_range general_state_pool; |
| /** |
| * Low 32bit heap |
| */ |
| struct anv_va_range low_heap; |
| /** |
| * Binding table pool |
| */ |
| struct anv_va_range binding_table_pool; |
| /** |
| * Internal surface states for blorp & push descriptors. |
| */ |
| struct anv_va_range internal_surface_state_pool; |
| /** |
| * Scratch surfaces (overlaps with internal_surface_state_pool). |
| */ |
| struct anv_va_range scratch_surface_state_pool; |
| /** |
| * Bindless surface states (indirectly referred to by indirect |
| * descriptors or for direct descriptors) |
| */ |
| struct anv_va_range bindless_surface_state_pool; |
| /** |
| * Dynamic state pool |
| */ |
| struct anv_va_range dynamic_state_pool; |
| /** |
| * Buffer pool that can be index from the dynamic state heap |
| */ |
| struct anv_va_range dynamic_visible_pool; |
| /** |
| * Indirect descriptor pool |
| */ |
| struct anv_va_range indirect_descriptor_pool; |
| /** |
| * Indirect push descriptor pool |
| */ |
| struct anv_va_range indirect_push_descriptor_pool; |
| /** |
| * Instruction state pool |
| */ |
| struct anv_va_range instruction_state_pool; |
| /** |
| * Push descriptor with descriptor buffers |
| */ |
| struct anv_va_range push_descriptor_buffer_pool; |
| /** |
| * AUX-TT |
| */ |
| struct anv_va_range aux_tt_pool; |
| /** |
| * Client heap |
| */ |
| struct anv_va_range high_heap; |
| struct anv_va_range trtt; |
| } va; |
| |
| /* Either we have a single vram region and it's all mappable, or we have |
| * both mappable & non-mappable parts. System memory is always available. |
| */ |
| struct anv_memregion vram_mappable; |
| struct anv_memregion vram_non_mappable; |
| struct anv_memregion sys; |
| uint8_t driver_build_sha1[20]; |
| uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; |
| uint8_t driver_uuid[VK_UUID_SIZE]; |
| uint8_t device_uuid[VK_UUID_SIZE]; |
| uint8_t rt_uuid[VK_UUID_SIZE]; |
| |
| /* Maximum amount of scratch space used by all the GRL kernels */ |
| uint32_t max_grl_scratch_size; |
| |
| struct vk_sync_type sync_syncobj_type; |
| struct vk_sync_timeline_type sync_timeline_type; |
| const struct vk_sync_type * sync_types[4]; |
| |
| struct wsi_device wsi_device; |
| int local_fd; |
| bool has_local; |
| int64_t local_major; |
| int64_t local_minor; |
| int master_fd; |
| bool has_master; |
| int64_t master_major; |
| int64_t master_minor; |
| struct intel_query_engine_info * engine_info; |
| |
| void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, |
| enum anv_timestamp_capture_type, void *); |
| void (*cmd_capture_data)(struct anv_batch *, struct anv_device *, |
| struct anv_address, struct anv_address, |
| uint32_t); |
| struct intel_measure_device measure_device; |
| |
| /* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */ |
| uint32_t gpgpu_pipeline_value; |
| |
| /** A pre packed VERTEX_ELEMENT_STATE feeding 0s to the VS stage |
| * |
| * For use when a pipeline has no VS input |
| */ |
| uint32_t empty_vs_input[2]; |
| }; |
| |
| VkResult anv_physical_device_try_create(struct vk_instance *vk_instance, |
| struct _drmDevice *drm_device, |
| struct vk_physical_device **out); |
| |
| void anv_physical_device_destroy(struct vk_physical_device *vk_device); |
| |
| static inline uint32_t |
| anv_physical_device_bindless_heap_size(const struct anv_physical_device *device, |
| bool descriptor_buffer) |
| { |
| /* Pre-Gfx12.5, the HW bindless surface heap is only 64MB. After it's 4GB, |
| * but we have some workarounds that require 2 heaps to overlap, so the |
| * size is dictated by our VA allocation. |
| */ |
| return device->uses_ex_bso ? |
| (descriptor_buffer ? |
| device->va.dynamic_visible_pool.size : |
| device->va.bindless_surface_state_pool.size) : |
| 64 * 1024 * 1024 /* 64 MiB */; |
| } |
| |
| static inline bool |
| anv_physical_device_has_vram(const struct anv_physical_device *device) |
| { |
| return device->vram_mappable.size > 0; |
| } |
| |
| struct anv_instance { |
| struct vk_instance vk; |
| |
| struct driOptionCache dri_options; |
| struct driOptionCache available_dri_options; |
| |
| int mesh_conv_prim_attrs_to_vert_attrs; |
| bool enable_tbimr; |
| bool external_memory_implicit_sync; |
| bool force_guc_low_latency; |
| |
| /** |
| * Workarounds for game bugs. |
| */ |
| uint8_t assume_full_subgroups; |
| bool assume_full_subgroups_with_barrier; |
| bool limit_trig_input_range; |
| bool sample_mask_out_opengl_behaviour; |
| bool force_filter_addr_rounding; |
| bool fp64_workaround_enabled; |
| float lower_depth_range_rate; |
| unsigned generated_indirect_threshold; |
| unsigned generated_indirect_ring_threshold; |
| unsigned query_clear_with_blorp_threshold; |
| unsigned query_copy_with_shader_threshold; |
| unsigned force_vk_vendor; |
| bool has_fake_sparse; |
| bool disable_fcv; |
| bool disable_xe2_ccs; |
| bool compression_control_enabled; |
| bool anv_fake_nonlocal_memory; |
| bool anv_upper_bound_descriptor_pool_sampler; |
| |
| /* HW workarounds */ |
| bool no_16bit; |
| bool intel_enable_wa_14018912822; |
| |
| /** |
| * Ray tracing configuration. |
| */ |
| unsigned stack_ids; |
| }; |
| |
| VkResult anv_init_wsi(struct anv_physical_device *physical_device); |
| void anv_finish_wsi(struct anv_physical_device *physical_device); |
| |
| struct anv_queue { |
| struct vk_queue vk; |
| |
| struct anv_device * device; |
| |
| const struct anv_queue_family * family; |
| |
| struct intel_batch_decode_ctx * decoder; |
| |
| union { |
| uint32_t exec_flags; /* i915 */ |
| uint32_t context_id; /* i915 */ |
| uint32_t exec_queue_id; /* Xe */ |
| }; |
| |
| /** Context/Engine id which executes companion RCS command buffer */ |
| uint32_t companion_rcs_id; |
| |
| /** Synchronization object for debug purposes (DEBUG_SYNC) */ |
| struct vk_sync *sync; |
| |
| /** Companion synchronization object |
| * |
| * Vulkan command buffers can be destroyed as soon as their lifecycle moved |
| * from the Pending state to the Invalid/Executable state. This transition |
| * happens when the VkFence/VkSemaphore associated with the completion of |
| * the command buffer work is signaled. |
| * |
| * When we're using a companion command buffer to execute part of another |
| * command buffer, we need to tie the 2 work submissions together to ensure |
| * when the associated VkFence/VkSemaphore is signaled, both command |
| * buffers are actually unused by the HW. To do this, we run an empty batch |
| * buffer that we use to signal after both submissions : |
| * |
| * CCS --> main ---> empty_batch (with wait on companion) --> signal |
| * RCS --> companion -| |
| * |
| * When companion batch completes, it signals companion_sync and allow |
| * empty_batch to execute. Since empty_batch is running on the main engine, |
| * we're guaranteed that upon completion both main & companion command |
| * buffers are not used by HW anymore. |
| */ |
| struct vk_sync *companion_sync; |
| |
| struct intel_ds_queue ds; |
| |
| struct anv_async_submit *init_submit; |
| struct anv_async_submit *init_companion_submit; |
| }; |
| |
| struct nir_xfb_info; |
| struct anv_pipeline_bind_map; |
| struct anv_pipeline_sets_layout; |
| struct anv_push_descriptor_info; |
| enum anv_dynamic_push_bits; |
| |
| void anv_device_init_embedded_samplers(struct anv_device *device); |
| void anv_device_finish_embedded_samplers(struct anv_device *device); |
| |
| extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2]; |
| |
| struct anv_shader_bin * |
| anv_device_search_for_kernel(struct anv_device *device, |
| struct vk_pipeline_cache *cache, |
| const void *key_data, uint32_t key_size, |
| bool *user_cache_bit); |
| |
| struct anv_shader_upload_params; |
| |
| struct anv_shader_bin * |
| anv_device_upload_kernel(struct anv_device *device, |
| struct vk_pipeline_cache *cache, |
| const struct anv_shader_upload_params *params); |
| |
| struct nir_shader; |
| struct nir_shader_compiler_options; |
| |
| struct nir_shader * |
| anv_device_search_for_nir(struct anv_device *device, |
| struct vk_pipeline_cache *cache, |
| const struct nir_shader_compiler_options *nir_options, |
| unsigned char sha1_key[20], |
| void *mem_ctx); |
| |
| void |
| anv_device_upload_nir(struct anv_device *device, |
| struct vk_pipeline_cache *cache, |
| const struct nir_shader *nir, |
| unsigned char sha1_key[20]); |
| |
| void |
| anv_load_fp64_shader(struct anv_device *device); |
| |
| /** |
| * This enum tracks the various HW instructions that hold graphics state |
| * needing to be reprogrammed. Some instructions are grouped together as they |
| * pretty much need to be emitted together (like 3DSTATE_URB_*). |
| * |
| * Not all bits apply to all platforms. We build a dirty state based on |
| * enabled extensions & generation on anv_device. |
| */ |
| enum anv_gfx_state_bits { |
| /* Pipeline states */ |
| ANV_GFX_STATE_URB, /* All legacy stages, including mesh */ |
| ANV_GFX_STATE_VF_STATISTICS, |
| ANV_GFX_STATE_VF_SGVS, |
| ANV_GFX_STATE_VF_SGVS_2, |
| ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */ |
| ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */ |
| ANV_GFX_STATE_PRIMITIVE_REPLICATION, |
| ANV_GFX_STATE_SBE, |
| ANV_GFX_STATE_SBE_SWIZ, |
| ANV_GFX_STATE_SO_DECL_LIST, |
| ANV_GFX_STATE_VS, |
| ANV_GFX_STATE_HS, |
| ANV_GFX_STATE_DS, |
| ANV_GFX_STATE_GS, |
| ANV_GFX_STATE_PS, |
| ANV_GFX_STATE_SBE_MESH, |
| ANV_GFX_STATE_CLIP_MESH, |
| ANV_GFX_STATE_MESH_CONTROL, |
| ANV_GFX_STATE_MESH_SHADER, |
| ANV_GFX_STATE_MESH_DISTRIB, |
| ANV_GFX_STATE_TASK_CONTROL, |
| ANV_GFX_STATE_TASK_SHADER, |
| ANV_GFX_STATE_TASK_REDISTRIB, |
| /* Dynamic states */ |
| ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */ |
| ANV_GFX_STATE_BLEND_STATE_PTR, /* The pointer to the dynamic state */ |
| ANV_GFX_STATE_CLIP, |
| ANV_GFX_STATE_CC_STATE, |
| ANV_GFX_STATE_CC_STATE_PTR, |
| ANV_GFX_STATE_CPS, |
| ANV_GFX_STATE_DEPTH_BOUNDS, |
| ANV_GFX_STATE_INDEX_BUFFER, |
| ANV_GFX_STATE_LINE_STIPPLE, |
| ANV_GFX_STATE_MULTISAMPLE, |
| ANV_GFX_STATE_PS_BLEND, |
| ANV_GFX_STATE_RASTER, |
| ANV_GFX_STATE_SAMPLE_MASK, |
| ANV_GFX_STATE_SAMPLE_PATTERN, |
| ANV_GFX_STATE_SCISSOR, |
| ANV_GFX_STATE_SF, |
| ANV_GFX_STATE_STREAMOUT, |
| ANV_GFX_STATE_TE, |
| ANV_GFX_STATE_VERTEX_INPUT, |
| ANV_GFX_STATE_VF, |
| ANV_GFX_STATE_VF_TOPOLOGY, |
| ANV_GFX_STATE_VFG, |
| ANV_GFX_STATE_VIEWPORT_CC, |
| ANV_GFX_STATE_VIEWPORT_CC_PTR, |
| ANV_GFX_STATE_VIEWPORT_SF_CLIP, |
| ANV_GFX_STATE_WM, |
| ANV_GFX_STATE_WM_DEPTH_STENCIL, |
| ANV_GFX_STATE_PS_EXTRA, |
| ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */ |
| ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */ |
| ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */ |
| ANV_GFX_STATE_TBIMR_TILE_PASS_INFO, |
| ANV_GFX_STATE_FS_MSAA_FLAGS, |
| ANV_GFX_STATE_TCS_INPUT_VERTICES, |
| ANV_GFX_STATE_COARSE_STATE, |
| |
| ANV_GFX_STATE_MAX, |
| }; |
| |
| const char *anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state); |
| |
| enum anv_coarse_pixel_state { |
| ANV_COARSE_PIXEL_STATE_UNKNOWN, |
| ANV_COARSE_PIXEL_STATE_DISABLED, |
| ANV_COARSE_PIXEL_STATE_ENABLED, |
| }; |
| |
| /* This structure tracks the values to program in HW instructions for |
| * corresponding to dynamic states of the Vulkan API. Only fields that need to |
| * be reemitted outside of the VkPipeline object are tracked here. |
| */ |
| struct anv_gfx_dynamic_state { |
| /* 3DSTATE_BLEND_STATE_POINTERS */ |
| struct { |
| bool AlphaToCoverageEnable; |
| bool AlphaToOneEnable; |
| bool IndependentAlphaBlendEnable; |
| bool ColorDitherEnable; |
| struct { |
| bool WriteDisableAlpha; |
| bool WriteDisableRed; |
| bool WriteDisableGreen; |
| bool WriteDisableBlue; |
| |
| uint32_t LogicOpFunction; |
| bool LogicOpEnable; |
| |
| bool ColorBufferBlendEnable; |
| uint32_t ColorClampRange; |
| bool PreBlendColorClampEnable; |
| bool PostBlendColorClampEnable; |
| uint32_t SourceBlendFactor; |
| uint32_t DestinationBlendFactor; |
| uint32_t ColorBlendFunction; |
| uint32_t SourceAlphaBlendFactor; |
| uint32_t DestinationAlphaBlendFactor; |
| uint32_t AlphaBlendFunction; |
| } rts[MAX_RTS]; |
| |
| struct anv_state state; |
| } blend; |
| |
| /* 3DSTATE_CC_STATE_POINTERS */ |
| struct { |
| float BlendConstantColorRed; |
| float BlendConstantColorGreen; |
| float BlendConstantColorBlue; |
| float BlendConstantColorAlpha; |
| |
| struct anv_state state; |
| } cc; |
| |
| /* 3DSTATE_CLIP */ |
| struct { |
| uint32_t APIMode; |
| uint32_t ViewportXYClipTestEnable; |
| uint32_t MaximumVPIndex; |
| uint32_t TriangleStripListProvokingVertexSelect; |
| uint32_t LineStripListProvokingVertexSelect; |
| uint32_t TriangleFanProvokingVertexSelect; |
| } clip; |
| |
| /* 3DSTATE_CPS/3DSTATE_CPS_POINTERS */ |
| struct { |
| /* Gfx11 */ |
| uint32_t CoarsePixelShadingMode; |
| float MinCPSizeX; |
| float MinCPSizeY; |
| /* Gfx12+ */ |
| uint32_t CoarsePixelShadingStateArrayPointer; |
| } cps; |
| |
| /* 3DSTATE_DEPTH_BOUNDS */ |
| struct { |
| bool DepthBoundsTestEnable; |
| float DepthBoundsTestMinValue; |
| float DepthBoundsTestMaxValue; |
| } db; |
| |
| /* 3DSTATE_GS */ |
| struct { |
| uint32_t ReorderMode; |
| } gs; |
| |
| /* 3DSTATE_LINE_STIPPLE */ |
| struct { |
| uint32_t LineStipplePattern; |
| float LineStippleInverseRepeatCount; |
| uint32_t LineStippleRepeatCount; |
| } ls; |
| |
| /* 3DSTATE_MULTISAMPLE */ |
| struct { |
| uint32_t NumberofMultisamples; |
| } ms; |
| |
| /* 3DSTATE_PS */ |
| struct { |
| uint32_t PositionXYOffsetSelect; |
| |
| uint32_t KernelStartPointer0; |
| uint32_t KernelStartPointer1; |
| uint32_t KernelStartPointer2; |
| |
| uint32_t DispatchGRFStartRegisterForConstantSetupData0; |
| uint32_t DispatchGRFStartRegisterForConstantSetupData1; |
| uint32_t DispatchGRFStartRegisterForConstantSetupData2; |
| |
| /* Pre-Gfx20 only */ |
| bool _8PixelDispatchEnable; |
| bool _16PixelDispatchEnable; |
| bool _32PixelDispatchEnable; |
| |
| /* Gfx20+ only */ |
| bool Kernel0Enable; |
| bool Kernel1Enable; |
| uint32_t Kernel0SIMDWidth; |
| uint32_t Kernel1SIMDWidth; |
| uint32_t Kernel0PolyPackingPolicy; |
| } ps; |
| |
| /* 3DSTATE_PS_EXTRA */ |
| struct { |
| bool PixelShaderHasUAV; |
| bool PixelShaderIsPerSample; |
| bool PixelShaderKillsPixel; |
| bool PixelShaderIsPerCoarsePixel; |
| bool EnablePSDependencyOnCPsizeChange; |
| } ps_extra; |
| |
| /* 3DSTATE_PS_BLEND */ |
| struct { |
| bool HasWriteableRT; |
| bool ColorBufferBlendEnable; |
| uint32_t SourceAlphaBlendFactor; |
| uint32_t DestinationAlphaBlendFactor; |
| uint32_t SourceBlendFactor; |
| uint32_t DestinationBlendFactor; |
| bool AlphaTestEnable; |
| bool IndependentAlphaBlendEnable; |
| bool AlphaToCoverageEnable; |
| } ps_blend; |
| |
| /* 3DSTATE_RASTER */ |
| struct { |
| uint32_t APIMode; |
| bool DXMultisampleRasterizationEnable; |
| bool AntialiasingEnable; |
| uint32_t CullMode; |
| uint32_t FrontWinding; |
| bool GlobalDepthOffsetEnableSolid; |
| bool GlobalDepthOffsetEnableWireframe; |
| bool GlobalDepthOffsetEnablePoint; |
| float GlobalDepthOffsetConstant; |
| float GlobalDepthOffsetScale; |
| float GlobalDepthOffsetClamp; |
| uint32_t FrontFaceFillMode; |
| uint32_t BackFaceFillMode; |
| bool ViewportZFarClipTestEnable; |
| bool ViewportZNearClipTestEnable; |
| bool ConservativeRasterizationEnable; |
| } raster; |
| |
| /* 3DSTATE_SCISSOR_STATE_POINTERS */ |
| struct { |
| uint32_t count; |
| struct { |
| uint32_t ScissorRectangleYMin; |
| uint32_t ScissorRectangleXMin; |
| uint32_t ScissorRectangleYMax; |
| uint32_t ScissorRectangleXMax; |
| } elem[MAX_SCISSORS]; |
| } scissor; |
| |
| /* 3DSTATE_SF */ |
| struct { |
| float LineWidth; |
| uint32_t TriangleStripListProvokingVertexSelect; |
| uint32_t LineStripListProvokingVertexSelect; |
| uint32_t TriangleFanProvokingVertexSelect; |
| bool LegacyGlobalDepthBiasEnable; |
| } sf; |
| |
| /* 3DSTATE_STREAMOUT */ |
| struct { |
| bool RenderingDisable; |
| uint32_t RenderStreamSelect; |
| uint32_t ReorderMode; |
| uint32_t ForceRendering; |
| } so; |
| |
| /* 3DSTATE_SAMPLE_MASK */ |
| struct { |
| uint32_t SampleMask; |
| } sm; |
| |
| /* 3DSTATE_TE */ |
| struct { |
| uint32_t OutputTopology; |
| } te; |
| |
| /* 3DSTATE_VF */ |
| struct { |
| bool IndexedDrawCutIndexEnable; |
| uint32_t CutIndex; |
| } vf; |
| |
| /* 3DSTATE_VFG */ |
| struct { |
| uint32_t DistributionMode; |
| bool ListCutIndexEnable; |
| } vfg; |
| |
| /* 3DSTATE_VF_TOPOLOGY */ |
| struct { |
| uint32_t PrimitiveTopologyType; |
| } vft; |
| |
| /* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */ |
| struct { |
| uint32_t count; |
| struct { |
| float MinimumDepth; |
| float MaximumDepth; |
| } elem[MAX_VIEWPORTS]; |
| |
| struct anv_state state; |
| } vp_cc; |
| |
| /* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */ |
| struct { |
| uint32_t count; |
| struct { |
| float ViewportMatrixElementm00; |
| float ViewportMatrixElementm11; |
| float ViewportMatrixElementm22; |
| float ViewportMatrixElementm30; |
| float ViewportMatrixElementm31; |
| float ViewportMatrixElementm32; |
| float XMinClipGuardband; |
| float XMaxClipGuardband; |
| float YMinClipGuardband; |
| float YMaxClipGuardband; |
| float XMinViewPort; |
| float XMaxViewPort; |
| float YMinViewPort; |
| float YMaxViewPort; |
| } elem[MAX_VIEWPORTS]; |
| } vp_sf_clip; |
| |
| /* 3DSTATE_WM */ |
| struct { |
| bool LineStippleEnable; |
| uint32_t BarycentricInterpolationMode; |
| } wm; |
| |
| /* 3DSTATE_WM_DEPTH_STENCIL */ |
| struct { |
| bool DoubleSidedStencilEnable; |
| uint32_t StencilTestMask; |
| uint32_t StencilWriteMask; |
| uint32_t BackfaceStencilTestMask; |
| uint32_t BackfaceStencilWriteMask; |
| uint32_t StencilReferenceValue; |
| uint32_t BackfaceStencilReferenceValue; |
| bool DepthTestEnable; |
| bool DepthBufferWriteEnable; |
| uint32_t DepthTestFunction; |
| bool StencilTestEnable; |
| bool StencilBufferWriteEnable; |
| uint32_t StencilFailOp; |
| uint32_t StencilPassDepthPassOp; |
| uint32_t StencilPassDepthFailOp; |
| uint32_t StencilTestFunction; |
| uint32_t BackfaceStencilFailOp; |
| uint32_t BackfaceStencilPassDepthPassOp; |
| uint32_t BackfaceStencilPassDepthFailOp; |
| uint32_t BackfaceStencilTestFunction; |
| } ds; |
| |
| /* 3DSTATE_TBIMR_TILE_PASS_INFO */ |
| struct { |
| unsigned TileRectangleHeight; |
| unsigned TileRectangleWidth; |
| unsigned VerticalTileCount; |
| unsigned HorizontalTileCount; |
| unsigned TBIMRBatchSize; |
| unsigned TileBoxCheck; |
| } tbimr; |
| bool use_tbimr; |
| |
| /** |
| * Dynamic msaa flags, this value can be different from |
| * anv_push_constants::gfx::fs_msaa_flags, as the push constant value only |
| * needs to be updated for fragment shaders dynamically checking the value. |
| */ |
| enum intel_msaa_flags fs_msaa_flags; |
| |
| /** |
| * Dynamic TCS input vertices, this value can be different from |
| * anv_driver_constants::gfx::tcs_input_vertices, as the push constant |
| * value only needs to be updated for tesselation control shaders |
| * dynamically checking the value. |
| */ |
| uint32_t tcs_input_vertices; |
| |
| bool pma_fix; |
| |
| /** |
| * DEPTH and STENCIL attachment write state for Wa_18019816803. |
| */ |
| bool ds_write_state; |
| |
| /** |
| * Toggle tracking for Wa_14018283232. |
| */ |
| bool wa_14018283232_toggle; |
| |
| /** |
| * Coarse state tracking for Wa_18038825448. |
| */ |
| enum anv_coarse_pixel_state coarse_state; |
| |
| BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX); |
| }; |
| |
| enum anv_internal_kernel_name { |
| ANV_INTERNAL_KERNEL_GENERATED_DRAWS, |
| ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE, |
| ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT, |
| ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE, |
| |
| ANV_INTERNAL_KERNEL_COUNT, |
| }; |
| |
| enum anv_rt_bvh_build_method { |
| ANV_BVH_BUILD_METHOD_TRIVIAL, |
| ANV_BVH_BUILD_METHOD_NEW_SAH, |
| }; |
| |
| struct anv_device_astc_emu { |
| struct vk_texcompress_astc_state *texcompress; |
| |
| /* for flush_astc_ldr_void_extent_denorms */ |
| simple_mtx_t mutex; |
| VkDescriptorSetLayout ds_layout; |
| VkPipelineLayout pipeline_layout; |
| VkPipeline pipeline; |
| }; |
| |
| struct anv_device { |
| struct vk_device vk; |
| |
| struct anv_physical_device * physical; |
| const struct intel_device_info * info; |
| const struct anv_kmd_backend * kmd_backend; |
| struct isl_device isl_dev; |
| union { |
| uint32_t context_id; /* i915 */ |
| uint32_t vm_id; /* Xe */ |
| }; |
| int fd; |
| |
| pthread_mutex_t vma_mutex; |
| struct util_vma_heap vma_lo; |
| struct util_vma_heap vma_hi; |
| struct util_vma_heap vma_desc; |
| struct util_vma_heap vma_dynamic_visible; |
| struct util_vma_heap vma_trtt; |
| |
| /** List of all anv_device_memory objects */ |
| struct list_head memory_objects; |
| |
| /** List of anv_image objects with a private binding for implicit CCS */ |
| struct list_head image_private_objects; |
| |
| /** Memory pool for batch buffers */ |
| struct anv_bo_pool batch_bo_pool; |
| /** Memory pool for utrace timestamp buffers */ |
| struct anv_bo_pool utrace_bo_pool; |
| /** |
| * Size of the timestamp captured for utrace. |
| */ |
| uint32_t utrace_timestamp_size; |
| /** Memory pool for BVH build buffers */ |
| struct anv_bo_pool bvh_bo_pool; |
| |
| struct anv_bo_cache bo_cache; |
| |
| struct anv_state_pool general_state_pool; |
| struct anv_state_pool aux_tt_pool; |
| struct anv_state_pool dynamic_state_pool; |
| struct anv_state_pool instruction_state_pool; |
| struct anv_state_pool binding_table_pool; |
| struct anv_state_pool scratch_surface_state_pool; |
| struct anv_state_pool internal_surface_state_pool; |
| struct anv_state_pool bindless_surface_state_pool; |
| struct anv_state_pool indirect_push_descriptor_pool; |
| struct anv_state_pool push_descriptor_buffer_pool; |
| |
| struct anv_state_reserved_array_pool custom_border_colors; |
| |
| /** BO used for various workarounds |
| * |
| * There are a number of workarounds on our hardware which require writing |
| * data somewhere and it doesn't really matter where. For that, we use |
| * this BO and just write to the first dword or so. |
| * |
| * We also need to be able to handle NULL buffers bound as pushed UBOs. |
| * For that, we use the high bytes (>= 1024) of the workaround BO. |
| */ |
| struct anv_bo * workaround_bo; |
| struct anv_address workaround_address; |
| |
| struct anv_bo * dummy_aux_bo; |
| |
| /** |
| * Workarounds for game bugs. |
| */ |
| struct { |
| struct set * doom64_images; |
| } workarounds; |
| |
| struct anv_bo * trivial_batch_bo; |
| struct anv_state null_surface_state; |
| |
| /** |
| * NULL surface state copy stored in host memory for use as a fast |
| * memcpy() source. |
| */ |
| char host_null_surface_state[ANV_SURFACE_STATE_SIZE]; |
| |
| struct vk_pipeline_cache * internal_cache; |
| |
| struct { |
| struct blorp_context context; |
| struct anv_state dynamic_states[BLORP_DYNAMIC_STATE_COUNT]; |
| } blorp; |
| |
| struct anv_state border_colors; |
| |
| struct anv_state slice_hash; |
| |
| /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements |
| * |
| * We need to emit CPS_STATE structures for each viewport accessible by a |
| * pipeline. So rather than write many identical CPS_STATE structures |
| * dynamically, we can enumerate all possible combinaisons and then just |
| * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this |
| * array. |
| */ |
| struct anv_state cps_states; |
| |
| uint32_t queue_count; |
| struct anv_queue * queues; |
| |
| struct anv_scratch_pool scratch_pool; |
| struct anv_scratch_pool protected_scratch_pool; |
| struct anv_bo *rt_scratch_bos[16]; |
| struct anv_bo *btd_fifo_bo; |
| struct anv_address rt_uuid_addr; |
| |
| bool robust_buffer_access; |
| |
| uint32_t protected_session_id; |
| |
| /** Shadow ray query BO |
| * |
| * The ray_query_bo only holds the current ray being traced. When using |
| * more than 1 ray query per thread, we cannot fit all the queries in |
| * there, so we need a another buffer to hold query data that is not |
| * currently being used by the HW for tracing, similar to a scratch space. |
| * |
| * The size of the shadow buffer depends on the number of queries per |
| * shader. |
| * |
| * We might need a buffer per queue family due to Wa_14022863161. |
| */ |
| struct anv_bo *ray_query_shadow_bos[2][16]; |
| /** Ray query buffer used to communicated with HW unit. |
| */ |
| struct anv_bo *ray_query_bo[2]; |
| |
| struct anv_shader_bin *rt_trampoline; |
| struct anv_shader_bin *rt_trivial_return; |
| |
| enum anv_rt_bvh_build_method bvh_build_method; |
| |
| /** Draw generation shader |
| * |
| * Generates direct draw calls out of indirect parameters. Used to |
| * workaround slowness with indirect draw calls. |
| */ |
| struct anv_shader_bin *internal_kernels[ANV_INTERNAL_KERNEL_COUNT]; |
| const struct intel_l3_config *internal_kernels_l3_config; |
| |
| pthread_mutex_t mutex; |
| pthread_cond_t queue_submit; |
| |
| struct intel_batch_decode_ctx decoder[ANV_MAX_QUEUE_FAMILIES]; |
| /* |
| * When decoding a anv_cmd_buffer, we might need to search for BOs through |
| * the cmd_buffer's list. |
| */ |
| struct anv_cmd_buffer *cmd_buffer_being_decoded; |
| |
| int perf_fd; /* -1 if no opened */ |
| struct anv_queue *perf_queue; |
| struct intel_bind_timeline perf_timeline; |
| |
| struct intel_aux_map_context *aux_map_ctx; |
| |
| const struct intel_l3_config *l3_config; |
| |
| struct intel_debug_block_frame *debug_frame_desc; |
| |
| struct intel_ds_device ds; |
| |
| nir_shader *fp64_nir; |
| |
| uint32_t draw_call_count; |
| struct anv_state breakpoint; |
| #if DETECT_OS_ANDROID |
| struct u_gralloc *u_gralloc; |
| #endif |
| |
| /** Precompute all dirty graphics bits |
| * |
| * Depending on platforms, some of the dirty bits don't apply (for example |
| * 3DSTATE_PRIMITIVE_REPLICATION is only Gfx12.0+). Disabling some |
| * extensions like Mesh shaders also allow us to avoid emitting any |
| * mesh/task related instructions (we only initialize them once at device |
| * initialization). |
| */ |
| BITSET_DECLARE(gfx_dirty_state, ANV_GFX_STATE_MAX); |
| |
| /* |
| * Command pool for companion RCS command buffer. |
| */ |
| VkCommandPool companion_rcs_cmd_pool; |
| |
| struct anv_trtt { |
| simple_mtx_t mutex; |
| |
| /* Sometimes we need to run batches from places where we don't have a |
| * queue coming from the API, so we use this. |
| */ |
| struct anv_queue *queue; |
| |
| /* There's only one L3 table, so if l3_addr is zero that means we |
| * didn't initialize the TR-TT context yet (i.e., we're not using TR-TT |
| * yet in this context). |
| */ |
| uint64_t l3_addr; |
| |
| /* We don't want to access the page tables from the CPU, so just |
| * maintain a mirror that we can use. |
| */ |
| uint64_t *l3_mirror; |
| uint64_t *l2_mirror; |
| |
| /* We keep a dynamic list of page table bos, and each bo can store |
| * multiple page tables. |
| */ |
| struct anv_bo **page_table_bos; |
| int num_page_table_bos; |
| int page_table_bos_capacity; |
| |
| /* These are used to keep track of space available for more page tables |
| * within a bo. |
| */ |
| struct anv_bo *cur_page_table_bo; |
| uint64_t next_page_table_bo_offset; |
| |
| struct vk_sync *timeline; |
| uint64_t timeline_val; |
| |
| /* List of struct anv_trtt_submission that are in flight and can be |
| * freed once their vk_sync gets signaled. |
| */ |
| struct list_head in_flight_batches; |
| } trtt; |
| |
| /* Number of sparse resources that currently exist. This is used for a |
| * workaround that makes every memoryBarrier flush more things than it |
| * should. Some workloads create and then immediately destroy sparse |
| * resources when they start, so just counting if a sparse resource was |
| * ever created is not enough. |
| */ |
| uint32_t num_sparse_resources; |
| |
| struct anv_device_astc_emu astc_emu; |
| |
| struct intel_bind_timeline bind_timeline; /* Xe only */ |
| |
| struct { |
| simple_mtx_t mutex; |
| struct hash_table *map; |
| } embedded_samplers; |
| |
| struct { |
| /** |
| * Mutex for the printfs array |
| */ |
| simple_mtx_t mutex; |
| /** |
| * Buffer in which the shader printfs are stored |
| */ |
| struct anv_bo *bo; |
| /** |
| * Array of pointers to u_printf_info |
| */ |
| struct util_dynarray prints; |
| } printf; |
| }; |
| |
| static inline uint32_t |
| anv_get_first_render_queue_index(struct anv_physical_device *pdevice) |
| { |
| assert(pdevice != NULL); |
| |
| for (uint32_t i = 0; i < pdevice->queue.family_count; i++) { |
| if (pdevice->queue.families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { |
| return i; |
| } |
| } |
| |
| unreachable("Graphics capable queue family not found"); |
| } |
| |
| static inline struct anv_state |
| anv_binding_table_pool_alloc(struct anv_device *device) |
| { |
| return anv_state_pool_alloc(&device->binding_table_pool, |
| device->binding_table_pool.block_size, 0); |
| } |
| |
| static inline void |
| anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) |
| { |
| anv_state_pool_free(&device->binding_table_pool, state); |
| } |
| |
| static inline struct anv_state |
| anv_null_surface_state_for_binding_table(struct anv_device *device) |
| { |
| struct anv_state state = device->null_surface_state; |
| if (device->physical->indirect_descriptors) { |
| state.offset += device->physical->va.bindless_surface_state_pool.addr - |
| device->physical->va.internal_surface_state_pool.addr; |
| } |
| return state; |
| } |
| |
| static inline struct anv_state |
| anv_bindless_state_for_binding_table(struct anv_device *device, |
| struct anv_state state) |
| { |
| state.offset += device->physical->va.bindless_surface_state_pool.addr - |
| device->physical->va.internal_surface_state_pool.addr; |
| return state; |
| } |
| |
| static inline struct anv_state |
| anv_device_maybe_alloc_surface_state(struct anv_device *device, |
| struct anv_state_stream *surface_state_stream) |
| { |
| if (device->physical->indirect_descriptors) { |
| if (surface_state_stream) |
| return anv_state_stream_alloc(surface_state_stream, 64, 64); |
| return anv_state_pool_alloc(&device->bindless_surface_state_pool, 64, 64); |
| } else { |
| return ANV_STATE_NULL; |
| } |
| } |
| |
| static inline uint32_t |
| anv_mocs(const struct anv_device *device, |
| const struct anv_bo *bo, |
| isl_surf_usage_flags_t usage) |
| { |
| return isl_mocs(&device->isl_dev, usage, bo && anv_bo_is_external(bo)); |
| } |
| |
| static inline uint32_t |
| anv_mocs_for_address(const struct anv_device *device, |
| const struct anv_address *addr) |
| { |
| return anv_mocs(device, addr->bo, 0); |
| } |
| |
| void anv_device_init_blorp(struct anv_device *device); |
| void anv_device_finish_blorp(struct anv_device *device); |
| |
| static inline void |
| anv_sanitize_map_params(struct anv_device *device, |
| uint64_t in_offset, |
| uint64_t in_size, |
| uint64_t *out_offset, |
| uint64_t *out_size) |
| { |
| /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ |
| if (!device->physical->info.has_mmap_offset) |
| *out_offset = in_offset & ~4095ull; |
| else |
| *out_offset = 0; |
| assert(in_offset >= *out_offset); |
| *out_size = (in_offset + in_size) - *out_offset; |
| |
| /* Let's map whole pages */ |
| *out_size = align64(*out_size, 4096); |
| } |
| |
| |
| VkResult anv_device_alloc_bo(struct anv_device *device, |
| const char *name, uint64_t size, |
| enum anv_bo_alloc_flags alloc_flags, |
| uint64_t explicit_address, |
| struct anv_bo **bo); |
| VkResult anv_device_map_bo(struct anv_device *device, |
| struct anv_bo *bo, |
| uint64_t offset, |
| size_t size, |
| void *placed_addr, |
| void **map_out); |
| VkResult anv_device_unmap_bo(struct anv_device *device, |
| struct anv_bo *bo, |
| void *map, size_t map_size, |
| bool replace); |
| VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device, |
| void *host_ptr, uint32_t size, |
| enum anv_bo_alloc_flags alloc_flags, |
| uint64_t client_address, |
| struct anv_bo **bo_out); |
| VkResult anv_device_import_bo(struct anv_device *device, int fd, |
| enum anv_bo_alloc_flags alloc_flags, |
| uint64_t client_address, |
| struct anv_bo **bo); |
| VkResult anv_device_export_bo(struct anv_device *device, |
| struct anv_bo *bo, int *fd_out); |
| VkResult anv_device_get_bo_tiling(struct anv_device *device, |
| struct anv_bo *bo, |
| enum isl_tiling *tiling_out); |
| VkResult anv_device_set_bo_tiling(struct anv_device *device, |
| struct anv_bo *bo, |
| uint32_t row_pitch_B, |
| enum isl_tiling tiling); |
| void anv_device_release_bo(struct anv_device *device, |
| struct anv_bo *bo); |
| |
| static inline void anv_device_set_physical(struct anv_device *device, |
| struct anv_physical_device *physical_device) |
| { |
| device->physical = physical_device; |
| device->info = &physical_device->info; |
| device->isl_dev = physical_device->isl_dev; |
| } |
| |
| static inline struct anv_bo * |
| anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle) |
| { |
| return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle); |
| } |
| |
| VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo, |
| int64_t timeout); |
| |
| VkResult anv_device_print_init(struct anv_device *device); |
| void anv_device_print_fini(struct anv_device *device); |
| void anv_device_print_shader_prints(struct anv_device *device); |
| |
| VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue, |
| const VkDeviceQueueCreateInfo *pCreateInfo, |
| uint32_t index_in_family); |
| void anv_queue_finish(struct anv_queue *queue); |
| |
| VkResult anv_queue_submit(struct vk_queue *queue, |
| struct vk_queue_submit *submit); |
| |
| void anv_queue_trace(struct anv_queue *queue, const char *label, |
| bool frame, bool begin); |
| |
| static inline VkResult |
| anv_queue_post_submit(struct anv_queue *queue, VkResult submit_result) |
| { |
| if (submit_result != VK_SUCCESS) |
| return submit_result; |
| |
| VkResult result = VK_SUCCESS; |
| if (queue->sync) { |
| result = vk_sync_wait(&queue->device->vk, queue->sync, 0, |
| VK_SYNC_WAIT_COMPLETE, UINT64_MAX); |
| if (result != VK_SUCCESS) |
| result = vk_queue_set_lost(&queue->vk, "sync wait failed"); |
| } |
| |
| if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) |
| anv_device_print_shader_prints(queue->device); |
| |
| return result; |
| } |
| |
| int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); |
| int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, |
| uint32_t stride, uint32_t tiling); |
| int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle); |
| int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); |
| uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); |
| int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param, |
| uint64_t value); |
| VkResult |
| anv_gem_import_bo_alloc_flags_to_bo_flags(struct anv_device *device, |
| struct anv_bo *bo, |
| enum anv_bo_alloc_flags alloc_flags, |
| uint32_t *bo_flags); |
| const struct intel_device_info_pat_entry * |
| anv_device_get_pat_entry(struct anv_device *device, |
| enum anv_bo_alloc_flags alloc_flags); |
| |
| uint64_t anv_vma_alloc(struct anv_device *device, |
| uint64_t size, uint64_t align, |
| enum anv_bo_alloc_flags alloc_flags, |
| uint64_t client_address, |
| struct util_vma_heap **out_vma_heap); |
| void anv_vma_free(struct anv_device *device, |
| struct util_vma_heap *vma_heap, |
| uint64_t address, uint64_t size); |
| |
| struct anv_reloc_list { |
| bool uses_relocs; |
| uint32_t dep_words; |
| BITSET_WORD * deps; |
| const VkAllocationCallbacks *alloc; |
| }; |
| |
| VkResult anv_reloc_list_init(struct anv_reloc_list *list, |
| const VkAllocationCallbacks *alloc, |
| bool uses_relocs); |
| void anv_reloc_list_finish(struct anv_reloc_list *list); |
| |
| VkResult |
| anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo); |
| |
| static inline VkResult |
| anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo) |
| { |
| return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS; |
| } |
| |
| VkResult anv_reloc_list_append(struct anv_reloc_list *list, |
| struct anv_reloc_list *other); |
| |
| struct anv_batch_bo { |
| /* Link in the anv_cmd_buffer.owned_batch_bos list */ |
| struct list_head link; |
| |
| struct anv_bo * bo; |
| |
| /* Bytes actually consumed in this batch BO */ |
| uint32_t length; |
| |
| /* When this batch BO is used as part of a primary batch buffer, this |
| * tracked whether it is chained to another primary batch buffer. |
| * |
| * If this is the case, the relocation list's last entry points the |
| * location of the MI_BATCH_BUFFER_START chaining to the next batch. |
| */ |
| bool chained; |
| |
| struct anv_reloc_list relocs; |
| }; |
| |
| struct anv_batch { |
| const VkAllocationCallbacks * alloc; |
| |
| /** |
| * Sum of all the anv_batch_bo sizes allocated for this command buffer. |
| * Used to increase allocation size for long command buffers. |
| */ |
| size_t allocated_batch_size; |
| |
| struct anv_address start_addr; |
| |
| void * start; |
| void * end; |
| void * next; |
| |
| struct anv_reloc_list * relocs; |
| |
| /* This callback is called (with the associated user data) in the event |
| * that the batch runs out of space. |
| */ |
| VkResult (*extend_cb)(struct anv_batch *, uint32_t, void *); |
| void * user_data; |
| |
| /** |
| * Current error status of the command buffer. Used to track inconsistent |
| * or incomplete command buffer states that are the consequence of run-time |
| * errors such as out of memory scenarios. We want to track this in the |
| * batch because the command buffer object is not visible to some parts |
| * of the driver. |
| */ |
| VkResult status; |
| |
| enum intel_engine_class engine_class; |
| |
| /** |
| * Write fencing status for mi_builder. |
| */ |
| bool write_fence_status; |
| |
| /** |
| * Number of 3DPRIMITIVE's emitted for WA 16014538804 |
| */ |
| uint8_t num_3d_primitives_emitted; |
| |
| struct u_trace * trace; |
| const char * pc_reasons[4]; |
| uint32_t pc_reasons_count; |
| |
| }; |
| |
| void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); |
| VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size); |
| void anv_batch_advance(struct anv_batch *batch, uint32_t size); |
| void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); |
| struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location); |
| |
| static inline struct anv_address |
| anv_batch_current_address(struct anv_batch *batch) |
| { |
| return anv_batch_address(batch, batch->next); |
| } |
| |
| static inline void |
| anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr, |
| void *map, size_t size) |
| { |
| batch->start_addr = addr; |
| batch->next = batch->start = map; |
| batch->end = map + size; |
| } |
| |
| static inline VkResult |
| anv_batch_set_error(struct anv_batch *batch, VkResult error) |
| { |
| assert(error != VK_SUCCESS); |
| if (batch->status == VK_SUCCESS) |
| batch->status = error; |
| return batch->status; |
| } |
| |
| static inline bool |
| anv_batch_has_error(struct anv_batch *batch) |
| { |
| return batch->status != VK_SUCCESS; |
| } |
| |
| static inline uint64_t |
| _anv_combine_address(struct anv_batch *batch, void *location, |
| const struct anv_address address, uint32_t delta) |
| { |
| if (address.bo == NULL) |
| return address.offset + delta; |
| |
| if (batch) |
| anv_reloc_list_add_bo(batch->relocs, address.bo); |
| |
| return anv_address_physical(anv_address_add(address, delta)); |
| } |
| |
| #define __gen_address_type struct anv_address |
| #define __gen_user_data struct anv_batch |
| #define __gen_combine_address _anv_combine_address |
| |
| /* Wrapper macros needed to work around preprocessor argument issues. In |
| * particular, arguments don't get pre-evaluated if they are concatenated. |
| * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the |
| * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". |
| * We can work around this easily enough with these helpers. |
| */ |
| #define __anv_cmd_length(cmd) cmd ## _length |
| #define __anv_cmd_length_bias(cmd) cmd ## _length_bias |
| #define __anv_cmd_header(cmd) cmd ## _header |
| #define __anv_cmd_pack(cmd) cmd ## _pack |
| #define __anv_reg_num(reg) reg ## _num |
| |
| #define anv_pack_struct(dst, struc, ...) do { \ |
| struct struc __template = { \ |
| __VA_ARGS__ \ |
| }; \ |
| __anv_cmd_pack(struc)(NULL, dst, &__template); \ |
| VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \ |
| } while (0) |
| |
| #define anv_batch_emitn(batch, n, cmd, ...) ({ \ |
| void *__dst = anv_batch_emit_dwords(batch, n); \ |
| if (__dst) { \ |
| struct cmd __template = { \ |
| __anv_cmd_header(cmd), \ |
| .DWordLength = n - __anv_cmd_length_bias(cmd), \ |
| __VA_ARGS__ \ |
| }; \ |
| __anv_cmd_pack(cmd)(batch, __dst, &__template); \ |
| } \ |
| __dst; \ |
| }) |
| |
| #define anv_batch_emit_merge(batch, cmd, pipeline, state, name) \ |
| for (struct cmd name = { 0 }, \ |
| *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ |
| __builtin_expect(_dst != NULL, 1); \ |
| ({ uint32_t _partial[__anv_cmd_length(cmd)]; \ |
| assert((pipeline)->state.len == __anv_cmd_length(cmd)); \ |
| __anv_cmd_pack(cmd)(batch, _partial, &name); \ |
| for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \ |
| assert((_partial[i] & \ |
| (pipeline)->batch_data[ \ |
| (pipeline)->state.offset + i]) == 0); \ |
| ((uint32_t *)_dst)[i] = _partial[i] | \ |
| (pipeline)->batch_data[(pipeline)->state.offset + i]; \ |
| } \ |
| VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ |
| _dst = NULL; \ |
| })) |
| |
| #define anv_batch_emit_merge_protected(batch, cmd, pipeline, state, \ |
| name, protected) \ |
| for (struct cmd name = { 0 }, \ |
| *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ |
| __builtin_expect(_dst != NULL, 1); \ |
| ({ struct anv_gfx_state_ptr *_cmd_state = protected ? \ |
| &(pipeline)->state##_protected : \ |
| &(pipeline)->state; \ |
| uint32_t _partial[__anv_cmd_length(cmd)]; \ |
| assert(_cmd_state->len == __anv_cmd_length(cmd)); \ |
| __anv_cmd_pack(cmd)(batch, _partial, &name); \ |
| for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \ |
| assert((_partial[i] & \ |
| (pipeline)->batch_data[ \ |
| (pipeline)->state.offset + i]) == 0); \ |
| ((uint32_t *)_dst)[i] = _partial[i] | \ |
| (pipeline)->batch_data[_cmd_state->offset + i]; \ |
| } \ |
| VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ |
| _dst = NULL; \ |
| })) |
| |
| #define anv_batch_emit(batch, cmd, name) \ |
| for (struct cmd name = { __anv_cmd_header(cmd) }, \ |
| *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ |
| __builtin_expect(_dst != NULL, 1); \ |
| ({ __anv_cmd_pack(cmd)(batch, _dst, &name); \ |
| VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ |
| _dst = NULL; \ |
| })) |
| |
| #define anv_batch_write_reg(batch, reg, name) \ |
| for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \ |
| ({ \ |
| uint32_t _dw[__anv_cmd_length(reg)]; \ |
| __anv_cmd_pack(reg)(NULL, _dw, &name); \ |
| for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \ |
| anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \ |
| lri.RegisterOffset = __anv_reg_num(reg); \ |
| lri.DataDWord = _dw[i]; \ |
| } \ |
| } \ |
| _cont = NULL; \ |
| })) |
| |
| /* #define __gen_get_batch_dwords anv_batch_emit_dwords */ |
| /* #define __gen_get_batch_address anv_batch_address */ |
| /* #define __gen_address_value anv_address_physical */ |
| /* #define __gen_address_offset anv_address_add */ |
| |
| /* Base structure used to track a submission that needs some clean operations |
| * upon completion. Should be embedded into a larger structure. |
| */ |
| struct anv_async_submit { |
| struct anv_queue *queue; |
| |
| struct anv_bo_pool *bo_pool; |
| |
| bool use_companion_rcs; |
| |
| bool owns_sync; |
| struct vk_sync_signal signal; |
| |
| struct anv_reloc_list relocs; |
| struct anv_batch batch; |
| struct util_dynarray batch_bos; |
| }; |
| |
| VkResult |
| anv_async_submit_init(struct anv_async_submit *submit, |
| struct anv_queue *queue, |
| struct anv_bo_pool *bo_pool, |
| bool use_companion_rcs, |
| bool create_signal_sync); |
| |
| void |
| anv_async_submit_fini(struct anv_async_submit *submit); |
| |
| VkResult |
| anv_async_submit_create(struct anv_queue *queue, |
| struct anv_bo_pool *bo_pool, |
| bool use_companion_rcs, |
| bool create_signal_sync, |
| struct anv_async_submit **out_submit); |
| |
| void |
| anv_async_submit_destroy(struct anv_async_submit *submit); |
| |
| bool |
| anv_async_submit_done(struct anv_async_submit *submit); |
| |
| bool |
| anv_async_submit_wait(struct anv_async_submit *submit); |
| |
| struct anv_sparse_submission { |
| struct anv_queue *queue; |
| |
| struct anv_vm_bind *binds; |
| int binds_len; |
| int binds_capacity; |
| |
| uint32_t wait_count; |
| uint32_t signal_count; |
| |
| struct vk_sync_wait *waits; |
| struct vk_sync_signal *signals; |
| }; |
| |
| struct anv_trtt_bind { |
| uint64_t pte_addr; |
| uint64_t entry_addr; |
| }; |
| |
| struct anv_trtt_submission { |
| struct anv_async_submit base; |
| |
| struct anv_sparse_submission *sparse; |
| |
| struct list_head link; |
| }; |
| |
| struct anv_device_memory { |
| struct vk_device_memory vk; |
| |
| struct list_head link; |
| |
| struct anv_bo * bo; |
| const struct anv_memory_type * type; |
| |
| void * map; |
| size_t map_size; |
| |
| /* The map, from the user PoV is map + map_delta */ |
| uint64_t map_delta; |
| }; |
| |
| /** |
| * Header for Vertex URB Entry (VUE) |
| */ |
| struct anv_vue_header { |
| uint32_t Reserved; |
| uint32_t RTAIndex; /* RenderTargetArrayIndex */ |
| uint32_t ViewportIndex; |
| float PointWidth; |
| }; |
| |
| /** Struct representing a sampled image descriptor |
| * |
| * This descriptor layout is used for sampled images, bare sampler, and |
| * combined image/sampler descriptors. |
| */ |
| struct anv_sampled_image_descriptor { |
| /** Bindless image handle |
| * |
| * This is expected to already be shifted such that the 20-bit |
| * SURFACE_STATE table index is in the top 20 bits. |
| */ |
| uint32_t image; |
| |
| /** Bindless sampler handle |
| * |
| * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative |
| * to the dynamic state base address. |
| */ |
| uint32_t sampler; |
| }; |
| |
| /** Struct representing a storage image descriptor */ |
| struct anv_storage_image_descriptor { |
| /** Bindless image handles |
| * |
| * These are expected to already be shifted such that the 20-bit |
| * SURFACE_STATE table index is in the top 20 bits. |
| */ |
| uint32_t vanilla; |
| |
| /** Image depth |
| * |
| * By default the HW RESINFO message allows us to query the depth of an image : |
| * |
| * From the Kaby Lake docs for the RESINFO message: |
| * |
| * "Surface Type | ... | Blue |
| * --------------+-----+---------------- |
| * SURFTYPE_3D | ... | (Depth+1)»LOD" |
| * |
| * With VK_EXT_sliced_view_of_3d, we have to support a slice of a 3D image, |
| * meaning at a depth offset with a new depth value potentially reduced |
| * from the original image. Unfortunately if we change the Depth value of |
| * the image, we then run into issues with Yf/Ys tilings where the HW fetch |
| * data at incorrect locations. |
| * |
| * To solve this, we put the slice depth in the descriptor and recompose |
| * the vec3 (width, height, depth) using this field for z and xy using the |
| * RESINFO result. |
| */ |
| uint32_t image_depth; |
| }; |
| |
| /** Struct representing a address/range descriptor |
| * |
| * The fields of this struct correspond directly to the data layout of |
| * nir_address_format_64bit_bounded_global addresses. The last field is the |
| * offset in the NIR address so it must be zero so that when you load the |
| * descriptor you get a pointer to the start of the range. |
| */ |
| struct anv_address_range_descriptor { |
| uint64_t address; |
| uint32_t range; |
| uint32_t zero; |
| }; |
| |
| enum anv_descriptor_data { |
| /** The descriptor contains a BTI reference to a surface state */ |
| ANV_DESCRIPTOR_BTI_SURFACE_STATE = BITFIELD_BIT(0), |
| /** The descriptor contains a BTI reference to a sampler state */ |
| ANV_DESCRIPTOR_BTI_SAMPLER_STATE = BITFIELD_BIT(1), |
| /** The descriptor contains an actual buffer view */ |
| ANV_DESCRIPTOR_BUFFER_VIEW = BITFIELD_BIT(2), |
| /** The descriptor contains inline uniform data */ |
| ANV_DESCRIPTOR_INLINE_UNIFORM = BITFIELD_BIT(3), |
| /** anv_address_range_descriptor with a buffer address and range */ |
| ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE = BITFIELD_BIT(4), |
| /** Bindless surface handle (through anv_sampled_image_descriptor) */ |
| ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE = BITFIELD_BIT(5), |
| /** Storage image handles (through anv_storage_image_descriptor) */ |
| ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE = BITFIELD_BIT(6), |
| /** The descriptor contains a single RENDER_SURFACE_STATE */ |
| ANV_DESCRIPTOR_SURFACE = BITFIELD_BIT(7), |
| /** The descriptor contains a SAMPLER_STATE */ |
| ANV_DESCRIPTOR_SAMPLER = BITFIELD_BIT(8), |
| /** A tuple of RENDER_SURFACE_STATE & SAMPLER_STATE */ |
| ANV_DESCRIPTOR_SURFACE_SAMPLER = BITFIELD_BIT(9), |
| }; |
| |
| struct anv_descriptor_set_binding_layout { |
| /* The type of the descriptors in this binding */ |
| VkDescriptorType type; |
| |
| /* Flags provided when this binding was created */ |
| VkDescriptorBindingFlags flags; |
| |
| /* Bitfield representing the type of data this descriptor contains */ |
| enum anv_descriptor_data data; |
| |
| /* Maximum number of YCbCr texture/sampler planes */ |
| uint8_t max_plane_count; |
| |
| /* Number of array elements in this binding (or size in bytes for inline |
| * uniform data) |
| */ |
| uint32_t array_size; |
| |
| /* Index into the flattened descriptor set */ |
| uint32_t descriptor_index; |
| |
| /* Index into the dynamic state array for a dynamic buffer, relative to the |
| * set. |
| */ |
| int16_t dynamic_offset_index; |
| |
| /* Computed surface size from data (for one plane) */ |
| uint16_t descriptor_data_surface_size; |
| |
| /* Computed sampler size from data (for one plane) */ |
| uint16_t descriptor_data_sampler_size; |
| |
| /* Index into the descriptor set buffer views */ |
| int32_t buffer_view_index; |
| |
| /* Offset into the descriptor buffer where the surface descriptor lives */ |
| uint32_t descriptor_surface_offset; |
| |
| /* Offset into the descriptor buffer where the sampler descriptor lives */ |
| uint16_t descriptor_sampler_offset; |
| |
| /* Pre computed surface stride (with multiplane descriptor, the descriptor |
| * includes all the planes) |
| */ |
| uint16_t descriptor_surface_stride; |
| |
| /* Pre computed sampler stride (with multiplane descriptor, the descriptor |
| * includes all the planes) |
| */ |
| uint16_t descriptor_sampler_stride; |
| |
| /* Immutable samplers (or NULL if no immutable samplers) */ |
| struct anv_sampler **immutable_samplers; |
| }; |
| |
| enum anv_descriptor_set_layout_type { |
| ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN, |
| ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT, |
| ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, |
| ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER, |
| }; |
| |
| struct anv_descriptor_set_layout { |
| struct vk_object_base base; |
| |
| VkDescriptorSetLayoutCreateFlags flags; |
| |
| /* Type of descriptor set layout */ |
| enum anv_descriptor_set_layout_type type; |
| |
| /* Descriptor set layouts can be destroyed at almost any time */ |
| uint32_t ref_cnt; |
| |
| /* Number of bindings in this descriptor set */ |
| uint32_t binding_count; |
| |
| /* Total number of descriptors */ |
| uint32_t descriptor_count; |
| |
| /* Shader stages affected by this descriptor set */ |
| uint16_t shader_stages; |
| |
| /* Number of buffer views in this descriptor set */ |
| uint32_t buffer_view_count; |
| |
| /* Number of dynamic offsets used by this descriptor set */ |
| uint16_t dynamic_offset_count; |
| |
| /* For each dynamic buffer, which VkShaderStageFlagBits stages are using |
| * this buffer |
| */ |
| VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS]; |
| |
| /* Size of the descriptor buffer dedicated to surface states for this |
| * descriptor set |
| */ |
| uint32_t descriptor_buffer_surface_size; |
| |
| /* Size of the descriptor buffer dedicated to sampler states for this |
| * descriptor set |
| */ |
| uint32_t descriptor_buffer_sampler_size; |
| |
| /* Number of embedded sampler count */ |
| uint32_t embedded_sampler_count; |
| |
| /* Bindings in this descriptor set */ |
| struct anv_descriptor_set_binding_layout binding[0]; |
| }; |
| |
| bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice, |
| const struct anv_descriptor_set_layout *set, |
| const struct anv_descriptor_set_binding_layout *binding); |
| |
| bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice, |
| const struct anv_descriptor_set_layout *set, |
| const struct anv_descriptor_set_binding_layout *binding); |
| |
| void anv_descriptor_set_layout_destroy(struct anv_device *device, |
| struct anv_descriptor_set_layout *layout); |
| |
| void anv_descriptor_set_layout_print(const struct anv_descriptor_set_layout *layout); |
| |
| static inline struct anv_descriptor_set_layout * |
| anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout) |
| { |
| assert(layout && layout->ref_cnt >= 1); |
| p_atomic_inc(&layout->ref_cnt); |
| |
| return layout; |
| } |
| |
| static inline void |
| anv_descriptor_set_layout_unref(struct anv_device *device, |
| struct anv_descriptor_set_layout *layout) |
| { |
| assert(layout && layout->ref_cnt >= 1); |
| if (p_atomic_dec_zero(&layout->ref_cnt)) |
| anv_descriptor_set_layout_destroy(device, layout); |
| } |
| |
| struct anv_descriptor { |
| VkDescriptorType type; |
| |
| union { |
| struct { |
| VkImageLayout layout; |
| struct anv_image_view *image_view; |
| struct anv_sampler *sampler; |
| }; |
| |
| struct { |
| struct anv_buffer_view *set_buffer_view; |
| struct anv_buffer *buffer; |
| uint64_t offset; |
| uint64_t range; |
| uint64_t bind_range; |
| }; |
| |
| struct anv_buffer_view *buffer_view; |
| |
| struct vk_acceleration_structure *accel_struct; |
| }; |
| }; |
| |
| struct anv_descriptor_set { |
| struct vk_object_base base; |
| |
| struct anv_descriptor_pool *pool; |
| struct anv_descriptor_set_layout *layout; |
| |
| /* Amount of space occupied in the the pool by this descriptor set. It can |
| * be larger than the size of the descriptor set. |
| */ |
| uint32_t size; |
| |
| /* Is this descriptor set a push descriptor */ |
| bool is_push; |
| |
| /* Bitfield of descriptors for which we need to generate surface states. |
| * Only valid for push descriptors |
| */ |
| uint32_t generate_surface_states; |
| |
| /* State relative to anv_descriptor_pool::surface_bo */ |
| struct anv_state desc_surface_mem; |
| /* State relative to anv_descriptor_pool::sampler_bo */ |
| struct anv_state desc_sampler_mem; |
| /* Surface state for the descriptor buffer */ |
| struct anv_state desc_surface_state; |
| |
| /* Descriptor set address pointing to desc_surface_mem (we don't need one |
| * for sampler because they're never accessed other than by the HW through |
| * the shader sampler handle). |
| */ |
| struct anv_address desc_surface_addr; |
| |
| struct anv_address desc_sampler_addr; |
| |
| /* Descriptor offset from the |
| * device->va.internal_surface_state_pool.addr |
| * |
| * It just needs to be added to the binding table offset to be put into the |
| * HW BTI entry. |
| */ |
| uint32_t desc_offset; |
| |
| uint32_t buffer_view_count; |
| struct anv_buffer_view *buffer_views; |
| |
| /* Link to descriptor pool's desc_sets list . */ |
| struct list_head pool_link; |
| |
| uint32_t descriptor_count; |
| struct anv_descriptor descriptors[0]; |
| }; |
| |
| static inline bool |
| anv_descriptor_set_is_push(struct anv_descriptor_set *set) |
| { |
| return set->pool == NULL; |
| } |
| |
| struct anv_surface_state_data { |
| uint8_t data[ANV_SURFACE_STATE_SIZE]; |
| }; |
| |
| struct anv_buffer_state { |
| /** Surface state allocated from the bindless heap |
| * |
| * Only valid if anv_physical_device::indirect_descriptors is true |
| */ |
| struct anv_state state; |
| |
| /** Surface state after genxml packing |
| * |
| * Only valid if anv_physical_device::indirect_descriptors is false |
| */ |
| struct anv_surface_state_data state_data; |
| }; |
| |
| struct anv_buffer_view { |
| struct vk_buffer_view vk; |
| |
| struct anv_address address; |
| |
| struct anv_buffer_state general; |
| struct anv_buffer_state storage; |
| }; |
| |
| struct anv_push_descriptor_set { |
| struct anv_descriptor_set set; |
| |
| /* Put this field right behind anv_descriptor_set so it fills up the |
| * descriptors[0] field. */ |
| struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS]; |
| |
| /** True if the descriptor set buffer has been referenced by a draw or |
| * dispatch command. |
| */ |
| bool set_used_on_gpu; |
| |
| struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS]; |
| }; |
| |
| static inline struct anv_address |
| anv_descriptor_set_address(struct anv_descriptor_set *set) |
| { |
| if (anv_descriptor_set_is_push(set)) { |
| /* We have to flag push descriptor set as used on the GPU |
| * so that the next time we push descriptors, we grab a new memory. |
| */ |
| struct anv_push_descriptor_set *push_set = |
| (struct anv_push_descriptor_set *)set; |
| push_set->set_used_on_gpu = true; |
| } |
| |
| return set->desc_surface_addr; |
| } |
| |
| struct anv_descriptor_pool_heap { |
| /* BO allocated to back the pool (unused for host pools) */ |
| struct anv_bo *bo; |
| |
| /* Host memory allocated to back a host pool */ |
| void *host_mem; |
| |
| /* Heap tracking allocations in bo/host_mem */ |
| struct util_vma_heap heap; |
| |
| /* Size of the heap */ |
| uint32_t size; |
| |
| /* Allocated size in the heap */ |
| uint32_t alloc_size; |
| }; |
| |
| struct anv_descriptor_pool { |
| struct vk_object_base base; |
| |
| struct anv_descriptor_pool_heap surfaces; |
| struct anv_descriptor_pool_heap samplers; |
| |
| struct anv_state_stream surface_state_stream; |
| void *surface_state_free_list; |
| |
| /** List of anv_descriptor_set. */ |
| struct list_head desc_sets; |
| |
| /** Heap over host_mem */ |
| struct util_vma_heap host_heap; |
| |
| /** Allocated size of host_mem */ |
| uint32_t host_mem_size; |
| |
| /** |
| * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then |
| * surface_state_stream is unused. |
| */ |
| bool host_only; |
| |
| alignas(8) char host_mem[0]; |
| }; |
| |
| bool |
| anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer, |
| struct anv_push_descriptor_set *push_set, |
| struct anv_descriptor_set_layout *layout); |
| |
| void |
| anv_push_descriptor_set_finish(struct anv_push_descriptor_set *push_set); |
| |
| void |
| anv_descriptor_set_write_image_view(struct anv_device *device, |
| struct anv_descriptor_set *set, |
| const VkDescriptorImageInfo * const info, |
| VkDescriptorType type, |
| uint32_t binding, |
| uint32_t element); |
| |
| void |
| anv_descriptor_set_write_buffer_view(struct anv_device *device, |
| struct anv_descriptor_set *set, |
| VkDescriptorType type, |
| struct anv_buffer_view *buffer_view, |
| uint32_t binding, |
| uint32_t element); |
| |
| void |
| anv_descriptor_set_write_buffer(struct anv_device *device, |
| struct anv_descriptor_set *set, |
| VkDescriptorType type, |
| struct anv_buffer *buffer, |
| uint32_t binding, |
| uint32_t element, |
| VkDeviceSize offset, |
| VkDeviceSize range); |
| |
| void |
| anv_descriptor_write_surface_state(struct anv_device *device, |
| struct anv_descriptor *desc, |
| struct anv_state surface_state); |
| |
| void |
| anv_descriptor_set_write_acceleration_structure(struct anv_device *device, |
| struct anv_descriptor_set *set, |
| struct vk_acceleration_structure *accel, |
| uint32_t binding, |
| uint32_t element); |
| |
| void |
| anv_descriptor_set_write_inline_uniform_data(struct anv_device *device, |
| struct anv_descriptor_set *set, |
| uint32_t binding, |
| const void *data, |
| size_t offset, |
| size_t size); |
| |
| void |
| anv_descriptor_set_write(struct anv_device *device, |
| struct anv_descriptor_set *set_override, |
| uint32_t write_count, |
| const VkWriteDescriptorSet *writes); |
| |
| void |
| anv_descriptor_set_write_template(struct anv_device *device, |
| struct anv_descriptor_set *set, |
| const struct vk_descriptor_update_template *template, |
| const void *data); |
| |
| #define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 4) |
| #define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 3) |
| #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 2) |
| #define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 1) |
| #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX |
| |
| struct anv_pipeline_binding { |
| /** Index in the descriptor set |
| * |
| * This is a flattened index; the descriptor set layout is already taken |
| * into account. |
| */ |
| uint32_t index; |
| |
| /** Binding in the descriptor set. Not valid for any of the |
| * ANV_DESCRIPTOR_SET_* |
| */ |
| uint32_t binding; |
| |
| /** Offset in the descriptor buffer |
| * |
| * Relative to anv_descriptor_set::desc_addr. This is useful for |
| * ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding |
| * table entry. |
| */ |
| uint32_t set_offset; |
| |
| /** The descriptor set this surface corresponds to. |
| * |
| * The special ANV_DESCRIPTOR_SET_* values above indicates that this |
| * binding is not a normal descriptor set but something else. |
| */ |
| uint8_t set; |
| |
| union { |
| /** Plane in the binding index for images */ |
| uint8_t plane; |
| |
| /** Input attachment index (relative to the subpass) */ |
| uint8_t input_attachment_index; |
| |
| /** Dynamic offset index |
| * |
| * For dynamic UBOs and SSBOs, relative to set. |
| */ |
| uint8_t dynamic_offset_index; |
| }; |
| }; |
| |
| struct anv_embedded_sampler_key { |
| /** No need to track binding elements for embedded samplers as : |
| * |
| * VUID-VkDescriptorSetLayoutBinding-flags-08006: |
| * |
| * "If VkDescriptorSetLayoutCreateInfo:flags contains |
| * VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT, |
| * descriptorCount must: less than or equal to 1" |
| * |
| * The following struct can be safely hash as it doesn't include in |
| * address/offset. |
| */ |
| uint32_t sampler[4]; |
| uint32_t color[4]; |
| }; |
| |
| struct anv_pipeline_embedded_sampler_binding { |
| /** The descriptor set this sampler belongs to */ |
| uint8_t set; |
| |
| /** The binding in the set this sampler belongs to */ |
| uint32_t binding; |
| |
| /** The data configuring the sampler */ |
| struct anv_embedded_sampler_key key; |
| }; |
| |
| struct anv_push_range { |
| /** Index in the descriptor set */ |
| uint32_t index; |
| |
| /** Descriptor set index */ |
| uint8_t set; |
| |
| /** Dynamic offset index (for dynamic UBOs), relative to set. */ |
| uint8_t dynamic_offset_index; |
| |
| /** Start offset in units of 32B */ |
| uint8_t start; |
| |
| /** Range in units of 32B */ |
| uint8_t length; |
| }; |
| |
| struct anv_pipeline_sets_layout { |
| struct anv_device *device; |
| |
| struct { |
| struct anv_descriptor_set_layout *layout; |
| uint32_t dynamic_offset_start; |
| } set[MAX_SETS]; |
| |
| enum anv_descriptor_set_layout_type type; |
| |
| uint32_t num_sets; |
| uint32_t num_dynamic_buffers; |
| int push_descriptor_set_index; |
| |
| bool independent_sets; |
| |
| unsigned char sha1[20]; |
| }; |
| |
| void anv_pipeline_sets_layout_init(struct anv_pipeline_sets_layout *layout, |
| struct anv_device *device, |
| bool independent_sets); |
| |
| void anv_pipeline_sets_layout_fini(struct anv_pipeline_sets_layout *layout); |
| |
| void anv_pipeline_sets_layout_add(struct anv_pipeline_sets_layout *layout, |
| uint32_t set_idx, |
| struct anv_descriptor_set_layout *set_layout); |
| |
| uint32_t |
| anv_pipeline_sets_layout_embedded_sampler_count(const struct anv_pipeline_sets_layout *layout); |
| |
| void anv_pipeline_sets_layout_hash(struct anv_pipeline_sets_layout *layout); |
| |
| void anv_pipeline_sets_layout_print(const struct anv_pipeline_sets_layout *layout); |
| |
| struct anv_pipeline_layout { |
| struct vk_object_base base; |
| |
| struct anv_pipeline_sets_layout sets_layout; |
| }; |
| |
| const struct anv_descriptor_set_layout * |
| anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout, |
| uint8_t *desc_idx); |
| |
| struct anv_sparse_binding_data { |
| uint64_t address; |
| uint64_t size; |
| |
| /* This is kept only because it's given to us by vma_alloc() and need to be |
| * passed back to vma_free(), we have no other particular use for it |
| */ |
| struct util_vma_heap *vma_heap; |
| }; |
| |
| #define ANV_SPARSE_BLOCK_SIZE (64 * 1024) |
| |
| static inline bool |
| anv_sparse_binding_is_enabled(struct anv_device *device) |
| { |
| return device->vk.enabled_features.sparseBinding; |
| } |
| |
| static inline bool |
| anv_sparse_residency_is_enabled(struct anv_device *device) |
| { |
| return device->vk.enabled_features.sparseResidencyBuffer || |
| device->vk.enabled_features.sparseResidencyImage2D || |
| device->vk.enabled_features.sparseResidencyImage3D || |
| device->vk.enabled_features.sparseResidency2Samples || |
| device->vk.enabled_features.sparseResidency4Samples || |
| device->vk.enabled_features.sparseResidency8Samples || |
| device->vk.enabled_features.sparseResidency16Samples || |
| device->vk.enabled_features.sparseResidencyAliased; |
| } |
| |
| VkResult anv_init_sparse_bindings(struct anv_device *device, |
| uint64_t size, |
| struct anv_sparse_binding_data *sparse, |
| enum anv_bo_alloc_flags alloc_flags, |
| uint64_t client_address, |
| struct anv_address *out_address); |
| void anv_free_sparse_bindings(struct anv_device *device, |
| struct anv_sparse_binding_data *sparse); |
| VkResult anv_sparse_bind_buffer(struct anv_device *device, |
| struct anv_buffer *buffer, |
| const VkSparseMemoryBind *vk_bind, |
| struct anv_sparse_submission *submit); |
| VkResult anv_sparse_bind_image_opaque(struct anv_device *device, |
| struct anv_image *image, |
| const VkSparseMemoryBind *vk_bind, |
| struct anv_sparse_submission *submit); |
| VkResult anv_sparse_bind_image_memory(struct anv_queue *queue, |
| struct anv_image *image, |
| const VkSparseImageMemoryBind *bind, |
| struct anv_sparse_submission *submit); |
| VkResult anv_sparse_bind(struct anv_device *device, |
| struct anv_sparse_submission *sparse_submit); |
| |
| VkResult anv_sparse_trtt_garbage_collect_batches(struct anv_device *device, |
| bool wait_completion); |
| |
| VkSparseImageFormatProperties |
| anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice, |
| VkImageAspectFlags aspect, |
| VkImageType vk_image_type, |
| VkSampleCountFlagBits vk_samples, |
| struct isl_surf *surf); |
| void anv_sparse_calc_miptail_properties(struct anv_device *device, |
| struct anv_image *image, |
| VkImageAspectFlags vk_aspect, |
| uint32_t *imageMipTailFirstLod, |
| VkDeviceSize *imageMipTailSize, |
| VkDeviceSize *imageMipTailOffset, |
| VkDeviceSize *imageMipTailStride); |
| VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice, |
| VkImageCreateFlags flags, |
| VkImageTiling tiling, |
| VkSampleCountFlagBits samples, |
| VkImageType type, |
| VkFormat format); |
| |
| struct anv_buffer { |
| struct vk_buffer vk; |
| |
| /* Set when bound */ |
| struct anv_address address; |
| |
| struct anv_sparse_binding_data sparse_data; |
| }; |
| |
| static inline bool |
| anv_buffer_is_protected(const struct anv_buffer *buffer) |
| { |
| return buffer->vk.create_flags & VK_BUFFER_CREATE_PROTECTED_BIT; |
| } |
| |
| static inline bool |
| anv_buffer_is_sparse(const struct anv_buffer *buffer) |
| { |
| return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT; |
| } |
| |
| enum anv_cmd_dirty_bits { |
| ANV_CMD_DIRTY_PIPELINE = 1 << 0, |
| ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 1, |
| ANV_CMD_DIRTY_RENDER_AREA = 1 << 2, |
| ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 3, |
| ANV_CMD_DIRTY_XFB_ENABLE = 1 << 4, |
| ANV_CMD_DIRTY_RESTART_INDEX = 1 << 5, |
| ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE = 1 << 6, |
| ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE = 1 << 7, |
| }; |
| typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t; |
| |
| enum anv_pipe_bits { |
| ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0), |
| ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1), |
| ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2), |
| ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3), |
| ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4), |
| ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5), |
| ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6), |
| ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10), |
| ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11), |
| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12), |
| ANV_PIPE_DEPTH_STALL_BIT = (1 << 13), |
| |
| /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data |
| * cache work has completed. Available on Gfx12+. For earlier Gfx we |
| * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT. |
| */ |
| ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14), |
| ANV_PIPE_PSS_STALL_SYNC_BIT = (1 << 15), |
| |
| /* |
| * This bit flush data-port's Untyped L1 data cache (LSC L1). |
| */ |
| ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT = (1 << 16), |
| |
| /* This bit controls the flushing of the engine (Render, Compute) specific |
| * entries from the compression cache. |
| */ |
| ANV_PIPE_CCS_CACHE_FLUSH_BIT = (1 << 17), |
| |
| ANV_PIPE_TLB_INVALIDATE_BIT = (1 << 18), |
| |
| /* L3 Fabric Flush */ |
| ANV_PIPE_L3_FABRIC_FLUSH_BIT = (1 << 19), |
| |
| ANV_PIPE_CS_STALL_BIT = (1 << 20), |
| ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21), |
| |
| /* This bit does not exist directly in PIPE_CONTROL. Instead it means that |
| * a flush has happened but not a CS stall. The next time we do any sort |
| * of invalidation we need to insert a CS stall at that time. Otherwise, |
| * we would have to CS stall on every flush which could be bad. |
| */ |
| ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22), |
| |
| /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12 |
| * AUX-TT data has changed and we need to invalidate AUX-TT data. This is |
| * done by writing the AUX-TT register. |
| */ |
| ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 23), |
| |
| /* This bit does not exist directly in PIPE_CONTROL. It means that a |
| * PIPE_CONTROL with a post-sync operation will follow. This is used to |
| * implement a workaround for Gfx9. |
| */ |
| ANV_PIPE_POST_SYNC_BIT = (1 << 24), |
| |
| }; |
| |
| /* These bits track the state of buffer writes for queries. They get cleared |
| * based on PIPE_CONTROL emissions. |
| */ |
| enum anv_query_bits { |
| ANV_QUERY_WRITES_RT_FLUSH = (1 << 0), |
| |
| ANV_QUERY_WRITES_TILE_FLUSH = (1 << 1), |
| |
| ANV_QUERY_WRITES_CS_STALL = (1 << 2), |
| |
| ANV_QUERY_WRITES_DATA_FLUSH = (1 << 3), |
| }; |
| |
| /* It's not clear why DG2 doesn't have issues with L3/CS coherency. But it's |
| * likely related to performance workaround 14015868140. |
| * |
| * For now we enable this only on DG2 and platform prior to Gfx12 where there |
| * is no tile cache. |
| */ |
| #define ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) \ |
| (intel_device_info_is_dg2(devinfo)) |
| |
| /* Things we need to flush before accessing query data using the command |
| * streamer. |
| * |
| * Prior to DG2 experiments show that the command streamer is not coherent |
| * with the tile cache so we need to flush it to make any data visible to CS. |
| * |
| * Otherwise we want to flush the RT cache which is where blorp writes, either |
| * for clearing the query buffer or for clearing the destination buffer in |
| * vkCopyQueryPoolResults(). |
| */ |
| #define ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo) \ |
| (((!ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) && \ |
| (devinfo)->ver >= 12) ? \ |
| ANV_QUERY_WRITES_TILE_FLUSH : 0) | \ |
| ANV_QUERY_WRITES_RT_FLUSH | \ |
| ANV_QUERY_WRITES_CS_STALL) |
| #define ANV_QUERY_COMPUTE_WRITES_PENDING_BITS \ |
| (ANV_QUERY_WRITES_DATA_FLUSH | \ |
| ANV_QUERY_WRITES_CS_STALL) |
| |
| #define ANV_PIPE_QUERY_BITS(pending_query_bits) ( \ |
| ((pending_query_bits & ANV_QUERY_WRITES_RT_FLUSH) ? \ |
| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0) | \ |
| ((pending_query_bits & ANV_QUERY_WRITES_TILE_FLUSH) ? \ |
| ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \ |
| ((pending_query_bits & ANV_QUERY_WRITES_CS_STALL) ? \ |
| ANV_PIPE_CS_STALL_BIT : 0) | \ |
| ((pending_query_bits & ANV_QUERY_WRITES_DATA_FLUSH) ? \ |
| (ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \ |
| ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) : 0)) |
| |
| #define ANV_PIPE_FLUSH_BITS ( \ |
| ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \ |
| ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_TILE_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_L3_FABRIC_FLUSH_BIT) |
| |
| #define ANV_PIPE_BARRIER_FLUSH_BITS ( \ |
| ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \ |
| ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_TILE_CACHE_FLUSH_BIT) |
| |
| #define ANV_PIPE_STALL_BITS ( \ |
| ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \ |
| ANV_PIPE_DEPTH_STALL_BIT | \ |
| ANV_PIPE_CS_STALL_BIT | \ |
| ANV_PIPE_PSS_STALL_SYNC_BIT) |
| |
| #define ANV_PIPE_INVALIDATE_BITS ( \ |
| ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \ |
| ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \ |
| ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \ |
| ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \ |
| ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \ |
| ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) |
| |
| /* PIPE_CONTROL bits that should be set only in 3D RCS mode. |
| * For more details see genX(emit_apply_pipe_flushes). |
| */ |
| #define ANV_PIPE_GFX_BITS ( \ |
| ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_TILE_CACHE_FLUSH_BIT | \ |
| ANV_PIPE_DEPTH_STALL_BIT | \ |
| ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \ |
| (GFX_VERx10 >= 125 ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0) | \ |
| ANV_PIPE_VF_CACHE_INVALIDATE_BIT) |
| |
| /* PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode. |
| * For more details see genX(emit_apply_pipe_flushes). |
| * |
| * Documentation says that untyped L1 dataport cache flush is controlled by |
| * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register: |
| * |
| * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush: |
| * |
| * "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to |
| * "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1 |
| * cache based on the programming of HDC_Chicken0 register bits 13:11." |
| * |
| * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1 |
| * cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the |
| * PIPE_CONTROL command." |
| * |
| * As part of Wa_22010960976 & Wa_14013347512, i915 is programming |
| * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol |
| * Dataport flush, and UAV coherency barrier event"). So there is no need |
| * to set "Untyped Data-Port Cache" in 3D mode. |
| * |
| * On MTL the HDC_CHICKEN0 default values changed to match what was programmed |
| * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the |
| * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0 |
| * register to force L1 untyped flush with HDC pipeline flush has no effect on |
| * MTL. |
| * |
| * It seems like the HW change completely disconnected L1 untyped flush from |
| * HDC pipeline flush with no way to bring that behavior back. So leave the L1 |
| * untyped flush active in 3D mode on all platforms since it doesn't seems to |
| * cause issues there too. |
| * |
| * Maybe we'll have some GPGPU only bits here at some point. |
| */ |
| #define ANV_PIPE_GPGPU_BITS (0) |
| |
| enum intel_ds_stall_flag |
| anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits); |
| |
| #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \ |
| VK_IMAGE_ASPECT_PLANE_0_BIT | \ |
| VK_IMAGE_ASPECT_PLANE_1_BIT | \ |
| VK_IMAGE_ASPECT_PLANE_2_BIT) |
| |
| #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \ |
| VK_IMAGE_ASPECT_COLOR_BIT | \ |
| VK_IMAGE_ASPECT_PLANES_BITS_ANV) |
| |
| struct anv_vertex_binding { |
| struct anv_buffer * buffer; |
| VkDeviceSize offset; |
| VkDeviceSize size; |
| }; |
| |
| struct anv_xfb_binding { |
| struct anv_buffer * buffer; |
| VkDeviceSize offset; |
| VkDeviceSize size; |
| }; |
| |
| struct anv_push_constants { |
| /** Push constant data provided by the client through vkPushConstants */ |
| uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; |
| |
| #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1) |
| #define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1)) |
| |
| /** |
| * Base offsets for descriptor sets from |
| * |
| * The offset has different meaning depending on a number of factors : |
| * |
| * - with descriptor sets (direct or indirect), this relative |
| * pdevice->va.descriptor_pool |
| * |
| * - with descriptor buffers on DG2+, relative |
| * device->va.descriptor_buffer_pool |
| * |
| * - with descriptor buffers prior to DG2, relative the programmed value |
| * in STATE_BASE_ADDRESS::BindlessSurfaceStateBaseAddress |
| */ |
| uint32_t desc_surface_offsets[MAX_SETS]; |
| |
| /** |
| * Base offsets for descriptor sets from |
| */ |
| uint32_t desc_sampler_offsets[MAX_SETS]; |
| |
| /** Dynamic offsets for dynamic UBOs and SSBOs */ |
| uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; |
| |
| /** Surface buffer base offset |
| * |
| * Only used prior to DG2 with descriptor buffers. |
| * |
| * (surfaces_base_offset + desc_offsets[set_index]) is relative to |
| * device->va.descriptor_buffer_pool and can be used to compute a 64bit |
| * address to the descriptor buffer (using load_desc_set_address_intel). |
| */ |
| uint32_t surfaces_base_offset; |
| |
| /* Robust access pushed registers. */ |
| uint64_t push_reg_mask[MESA_SHADER_STAGES]; |
| |
| /** Ray query globals (RT_DISPATCH_GLOBALS) */ |
| uint64_t ray_query_globals; |
| |
| union { |
| struct { |
| /** Dynamic MSAA value */ |
| uint32_t fs_msaa_flags; |
| |
| /** Dynamic TCS input vertices */ |
| uint32_t tcs_input_vertices; |
| } gfx; |
| |
| struct { |
| /** Base workgroup ID |
| * |
| * Used for vkCmdDispatchBase. |
| */ |
| uint32_t base_work_group_id[3]; |
| |
| /** gl_NumWorkgroups */ |
| uint32_t num_work_groups[3]; |
| |
| /** Subgroup ID |
| * |
| * This is never set by software but is implicitly filled out when |
| * uploading the push constants for compute shaders. |
| * |
| * This *MUST* be the last field of the anv_push_constants structure. |
| */ |
| uint32_t subgroup_id; |
| } cs; |
| }; |
| }; |
| |
| struct anv_surface_state { |
| /** Surface state allocated from the bindless heap |
| * |
| * Can be NULL if unused. |
| */ |
| struct anv_state state; |
| |
| /** Surface state after genxml packing |
| * |
| * Same data as in state. |
| */ |
| struct anv_surface_state_data state_data; |
| |
| /** Address of the surface referred to by this state |
| * |
| * This address is relative to the start of the BO. |
| */ |
| struct anv_address address; |
| /* Address of the aux surface, if any |
| * |
| * This field is ANV_NULL_ADDRESS if and only if no aux surface exists. |
| * |
| * With the exception of gfx8, the bottom 12 bits of this address' offset |
| * include extra aux information. |
| */ |
| struct anv_address aux_address; |
| /* Address of the clear color, if any |
| * |
| * This address is relative to the start of the BO. |
| */ |
| struct anv_address clear_address; |
| }; |
| |
| struct anv_attachment { |
| VkFormat vk_format; |
| const struct anv_image_view *iview; |
| VkImageLayout layout; |
| enum isl_aux_usage aux_usage; |
| struct anv_surface_state surface_state; |
| |
| VkResolveModeFlagBits resolve_mode; |
| const struct anv_image_view *resolve_iview; |
| VkImageLayout resolve_layout; |
| }; |
| |
| /** State tracking for vertex buffer flushes |
| * |
| * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory |
| * addresses. If you happen to have two vertex buffers which get placed |
| * exactly 4 GiB apart and use them in back-to-back draw calls, you can get |
| * collisions. In order to solve this problem, we track vertex address ranges |
| * which are live in the cache and invalidate the cache if one ever exceeds 32 |
| * bits. |
| */ |
| struct anv_vb_cache_range { |
| /* Virtual address at which the live vertex buffer cache range starts for |
| * this vertex buffer index. |
| */ |
| uint64_t start; |
| |
| /* Virtual address of the byte after where vertex buffer cache range ends. |
| * This is exclusive such that end - start is the size of the range. |
| */ |
| uint64_t end; |
| }; |
| |
| static inline void |
| anv_merge_vb_cache_range(struct anv_vb_cache_range *dirty, |
| const struct anv_vb_cache_range *bound) |
| { |
| if (dirty->start == dirty->end) { |
| *dirty = *bound; |
| } else if (bound->start != bound->end) { |
| dirty->start = MIN2(dirty->start, bound->start); |
| dirty->end = MAX2(dirty->end, bound->end); |
| } |
| } |
| |
| /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/ |
| static inline bool |
| anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound, |
| struct anv_vb_cache_range *dirty, |
| struct anv_address vb_address, |
| uint32_t vb_size) |
| { |
| if (vb_size == 0) { |
| bound->start = 0; |
| bound->end = 0; |
| return false; |
| } |
| |
| bound->start = intel_48b_address(anv_address_physical(vb_address)); |
| bound->end = bound->start + vb_size; |
| assert(bound->end > bound->start); /* No overflow */ |
| |
| /* Align everything to a cache line */ |
| bound->start &= ~(64ull - 1ull); |
| bound->end = align64(bound->end, 64); |
| |
| anv_merge_vb_cache_range(dirty, bound); |
| |
| /* If our range is larger than 32 bits, we have to flush */ |
| assert(bound->end - bound->start <= (1ull << 32)); |
| return (dirty->end - dirty->start) > (1ull << 32); |
| } |
| |
| /** |
| * State tracking for simple internal shaders |
| */ |
| struct anv_simple_shader { |
| /* The device associated with this emission */ |
| struct anv_device *device; |
| /* The command buffer associated with this emission (can be NULL) */ |
| struct anv_cmd_buffer *cmd_buffer; |
| /* State stream used for various internal allocations */ |
| struct anv_state_stream *dynamic_state_stream; |
| struct anv_state_stream *general_state_stream; |
| /* Where to emit the commands (can be different from cmd_buffer->batch) */ |
| struct anv_batch *batch; |
| /* Shader to use */ |
| struct anv_shader_bin *kernel; |
| /* L3 config used by the shader */ |
| const struct intel_l3_config *l3_config; |
| /* Current URB config */ |
| const struct intel_urb_config *urb_cfg; |
| |
| /* Managed by the simpler shader helper*/ |
| struct anv_state bt_state; |
| }; |
| |
| /** State tracking for particular pipeline bind point |
| * |
| * This struct is the base struct for anv_cmd_graphics_state and |
| * anv_cmd_compute_state. These are used to track state which is bound to a |
| * particular type of pipeline. Generic state that applies per-stage such as |
| * binding table offsets and push constants is tracked generically with a |
| * per-stage array in anv_cmd_state. |
| */ |
| struct anv_cmd_pipeline_state { |
| struct anv_descriptor_set *descriptors[MAX_SETS]; |
| struct { |
| bool bound; |
| /** |
| * Buffer index used by this descriptor set. |
| */ |
| int32_t buffer_index; /* -1 means push descriptor */ |
| /** |
| * Offset of the descriptor set in the descriptor buffer. |
| */ |
| uint32_t buffer_offset; |
| /** |
| * Final computed address to be emitted in the descriptor set surface |
| * state. |
| */ |
| uint64_t address; |
| /** |
| * The descriptor set surface state. |
| */ |
| struct anv_state state; |
| } descriptor_buffers[MAX_SETS]; |
| struct anv_push_descriptor_set push_descriptor; |
| |
| struct anv_push_constants push_constants; |
| |
| /** Tracks whether the push constant data has changed and need to be reemitted */ |
| bool push_constants_data_dirty; |
| |
| /* Push constant state allocated when flushing push constants. */ |
| struct anv_state push_constants_state; |
| |
| /** |
| * Dynamic buffer offsets. |
| * |
| * We have a maximum of MAX_DYNAMIC_BUFFERS per pipeline, but with |
| * independent sets we cannot know which how much in total is going to be |
| * used. As a result we need to store the maximum possible number per set. |
| * |
| * Those values are written into anv_push_constants::dynamic_offsets at |
| * flush time when have the pipeline with the final |
| * anv_pipeline_sets_layout. |
| */ |
| struct { |
| uint32_t offsets[MAX_DYNAMIC_BUFFERS]; |
| } dynamic_offsets[MAX_SETS]; |
| |
| /** |
| * The current bound pipeline. |
| */ |
| struct anv_pipeline *pipeline; |
| }; |
| |
| enum anv_depth_reg_mode { |
| ANV_DEPTH_REG_MODE_UNKNOWN = 0, |
| ANV_DEPTH_REG_MODE_HW_DEFAULT, |
| ANV_DEPTH_REG_MODE_D16_1X_MSAA, |
| }; |
| |
| /** State tracking for graphics pipeline |
| * |
| * This has anv_cmd_pipeline_state as a base struct to track things which get |
| * bound to a graphics pipeline. Along with general pipeline bind point state |
| * which is in the anv_cmd_pipeline_state base struct, it also contains other |
| * state which is graphics-specific. |
| */ |
| struct anv_cmd_graphics_state { |
| struct anv_cmd_pipeline_state base; |
| |
| VkRenderingFlags rendering_flags; |
| VkRect2D render_area; |
| uint32_t layer_count; |
| uint32_t samples; |
| uint32_t view_mask; |
| uint32_t color_att_count; |
| struct anv_state att_states; |
| struct anv_attachment color_att[MAX_RTS]; |
| struct anv_attachment depth_att; |
| struct anv_attachment stencil_att; |
| struct anv_state null_surface_state; |
| |
| /* Map of color output from the last dispatched fragment shader to color |
| * attachments in the render pass. |
| */ |
| uint8_t color_output_mapping[MAX_RTS]; |
| |
| anv_cmd_dirty_mask_t dirty; |
| uint32_t vb_dirty; |
| |
| struct anv_vb_cache_range ib_bound_range; |
| struct anv_vb_cache_range ib_dirty_range; |
| struct anv_vb_cache_range vb_bound_ranges[33]; |
| struct anv_vb_cache_range vb_dirty_ranges[33]; |
| |
| uint32_t restart_index; |
| |
| VkShaderStageFlags push_constant_stages; |
| |
| bool used_task_shader; |
| |
| struct anv_buffer *index_buffer; |
| uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ |
| uint32_t index_offset; |
| uint32_t index_size; |
| |
| uint32_t indirect_data_stride; |
| bool indirect_data_stride_aligned; |
| |
| struct vk_vertex_input_state vertex_input; |
| struct vk_sample_locations_state sample_locations; |
| |
| bool object_preemption; |
| bool has_uint_rt; |
| |
| /* State tracking for Wa_14018912822. */ |
| bool color_blend_zero; |
| bool alpha_blend_zero; |
| |
| /** |
| * State tracking for Wa_18020335297. |
| */ |
| bool viewport_set; |
| |
| struct intel_urb_config urb_cfg; |
| |
| uint32_t n_occlusion_queries; |
| |
| /** |
| * Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top |
| * of any command buffer it is disabled by disabling it in EndCommandBuffer |
| * and before invoking the secondary in ExecuteCommands. |
| */ |
| bool pma_fix_enabled; |
| |
| /** |
| * Whether or not we know for certain that HiZ is enabled for the current |
| * subpass. If, for whatever reason, we are unsure as to whether HiZ is |
| * enabled or not, this will be false. |
| */ |
| bool hiz_enabled; |
| |
| /** |
| * We ensure the registers for the gfx12 D16 fix are initialized at the |
| * first non-NULL depth stencil packet emission of every command buffer. |
| * For secondary command buffer execution, we transfer the state from the |
| * last command buffer to the primary (if known). |
| */ |
| enum anv_depth_reg_mode depth_reg_mode; |
| |
| struct anv_gfx_dynamic_state dyn_state; |
| }; |
| |
| /** State tracking for compute pipeline |
| * |
| * This has anv_cmd_pipeline_state as a base struct to track things which get |
| * bound to a compute pipeline. Along with general pipeline bind point state |
| * which is in the anv_cmd_pipeline_state base struct, it also contains other |
| * state which is compute-specific. |
| */ |
| struct anv_cmd_compute_state { |
| struct anv_cmd_pipeline_state base; |
| |
| bool pipeline_dirty; |
| |
| uint32_t scratch_size; |
| }; |
| |
| struct anv_cmd_ray_tracing_state { |
| struct anv_cmd_pipeline_state base; |
| |
| bool pipeline_dirty; |
| |
| struct { |
| struct anv_bo *bo; |
| struct brw_rt_scratch_layout layout; |
| } scratch; |
| |
| struct anv_address build_priv_mem_addr; |
| size_t build_priv_mem_size; |
| }; |
| |
| enum anv_cmd_descriptor_buffer_mode { |
| ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN, |
| ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY, |
| ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER, |
| }; |
| |
| /** State required while building cmd buffer */ |
| struct anv_cmd_state { |
| /* PIPELINE_SELECT.PipelineSelection */ |
| uint32_t current_pipeline; |
| const struct intel_l3_config * current_l3_config; |
| uint32_t last_aux_map_state; |
| |
| struct anv_cmd_graphics_state gfx; |
| struct anv_cmd_compute_state compute; |
| struct anv_cmd_ray_tracing_state rt; |
| |
| enum anv_pipe_bits pending_pipe_bits; |
| |
| /** |
| * Whether the last programmed STATE_BASE_ADDRESS references |
| * anv_device::dynamic_state_pool or anv_device::dynamic_state_pool_db for |
| * the dynamic state heap. |
| */ |
| enum anv_cmd_descriptor_buffer_mode current_db_mode; |
| |
| /** |
| * Whether the command buffer has pending descriptor buffers bound it. This |
| * variable changes before anv_device::current_db_mode. |
| */ |
| enum anv_cmd_descriptor_buffer_mode pending_db_mode; |
| |
| struct { |
| /** |
| * Tracks operations susceptible to interfere with queries in the |
| * destination buffer of vkCmdCopyQueryResults, we need those operations to |
| * have completed before we do the work of vkCmdCopyQueryResults. |
| */ |
| enum anv_query_bits buffer_write_bits; |
| |
| /** |
| * Tracks clear operations of query buffers that can interact with |
| * vkCmdQueryBegin*, vkCmdWriteTimestamp*, |
| * vkCmdWriteAccelerationStructuresPropertiesKHR, etc... |
| * |
| * We need the clearing of the buffer completed before with write data with |
| * the command streamer or a shader. |
| */ |
| enum anv_query_bits clear_bits; |
| } queries; |
| |
| VkShaderStageFlags descriptors_dirty; |
| VkShaderStageFlags push_descriptors_dirty; |
| /** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */ |
| VkShaderStageFlags push_constants_dirty; |
| |
| struct { |
| uint64_t surfaces_address; |
| uint64_t samplers_address; |
| bool dirty; |
| VkShaderStageFlags offsets_dirty; |
| uint64_t address[MAX_SETS]; |
| } descriptor_buffers; |
| |
| struct anv_vertex_binding vertex_bindings[MAX_VBS]; |
| bool xfb_enabled; |
| struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS]; |
| struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES]; |
| struct anv_state samplers[MESA_VULKAN_SHADER_STAGES]; |
| |
| unsigned char sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20]; |
| unsigned char surface_sha1s[MESA_VULKAN_SHADER_STAGES][20]; |
| unsigned char push_sha1s[MESA_VULKAN_SHADER_STAGES][20]; |
| |
| /* The last auxiliary surface operation (or equivalent operation) provided |
| * to genX(cmd_buffer_update_color_aux_op). |
| */ |
| enum isl_aux_op color_aux_op; |
| |
| /** |
| * Whether RHWO optimization is enabled (Wa_1508744258). |
| */ |
| bool rhwo_optimization_enabled; |
| |
| /** |
| * Pending state of the RHWO optimization, to be applied at the next |
| * genX(cmd_buffer_apply_pipe_flushes). |
| */ |
| bool pending_rhwo_optimization_enabled; |
| |
| bool conditional_render_enabled; |
| |
| /** |
| * Last rendering scale argument provided to |
| * genX(cmd_buffer_emit_hashing_mode)(). |
| */ |
| unsigned current_hash_scale; |
| |
| /** |
| * A buffer used for spill/fill of ray queries. |
| */ |
| struct anv_bo * ray_query_shadow_bo; |
| |
| /** Pointer to the last emitted COMPUTE_WALKER. |
| * |
| * This is used to edit the instruction post emission to replace the "Post |
| * Sync" field for utrace timestamp emission. |
| */ |
| void *last_compute_walker; |
| |
| /** Pointer to the last emitted EXECUTE_INDIRECT_DISPATCH. |
| * |
| * This is used to edit the instruction post emission to replace the "Post |
| * Sync" field for utrace timestamp emission. |
| */ |
| void *last_indirect_dispatch; |
| }; |
| |
| #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192 |
| #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024) |
| |
| enum anv_cmd_buffer_exec_mode { |
| ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, |
| ANV_CMD_BUFFER_EXEC_MODE_EMIT, |
| ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT, |
| ANV_CMD_BUFFER_EXEC_MODE_CHAIN, |
| ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, |
| ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN, |
| }; |
| |
| struct anv_measure_batch; |
| |
| struct anv_cmd_buffer { |
| struct vk_command_buffer vk; |
| |
| struct anv_device * device; |
| struct anv_queue_family * queue_family; |
| |
| /** Batch where the main commands live */ |
| struct anv_batch batch; |
| |
| /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was |
| * recorded upon calling vkEndCommandBuffer(). This is useful if we need to |
| * rewrite the end to chain multiple batch together at vkQueueSubmit(). |
| */ |
| void * batch_end; |
| |
| /* Fields required for the actual chain of anv_batch_bo's. |
| * |
| * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). |
| */ |
| struct list_head batch_bos; |
| enum anv_cmd_buffer_exec_mode exec_mode; |
| |
| /* A vector of anv_batch_bo pointers for every batch or surface buffer |
| * referenced by this command buffer |
| * |
| * initialized by anv_cmd_buffer_init_batch_bo_chain() |
| */ |
| struct u_vector seen_bbos; |
| |
| /* A vector of int32_t's for every block of binding tables. |
| * |
| * initialized by anv_cmd_buffer_init_batch_bo_chain() |
| */ |
| struct u_vector bt_block_states; |
| struct anv_state bt_next; |
| |
| struct anv_reloc_list surface_relocs; |
| |
| /* Serial for tracking buffer completion */ |
| uint32_t serial; |
| |
| /* Stream objects for storing temporary data */ |
| struct anv_state_stream surface_state_stream; |
| struct anv_state_stream dynamic_state_stream; |
| struct anv_state_stream general_state_stream; |
| struct anv_state_stream indirect_push_descriptor_stream; |
| struct anv_state_stream push_descriptor_buffer_stream; |
| |
| VkCommandBufferUsageFlags usage_flags; |
| |
| struct anv_query_pool *perf_query_pool; |
| |
| struct anv_cmd_state state; |
| |
| struct anv_address return_addr; |
| |
| /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */ |
| uint64_t intel_perf_marker; |
| |
| struct anv_measure_batch *measure; |
| |
| /** |
| * KHR_performance_query requires self modifying command buffers and this |
| * array has the location of modifying commands to the query begin and end |
| * instructions storing performance counters. The array length is |
| * anv_physical_device::n_perf_query_commands. |
| */ |
| struct mi_address_token *self_mod_locations; |
| |
| /** |
| * Index tracking which of the self_mod_locations items have already been |
| * used. |
| */ |
| uint32_t perf_reloc_idx; |
| |
| /** |
| * Sum of all the anv_batch_bo written sizes for this command buffer |
| * including any executed secondary command buffer. |
| */ |
| uint32_t total_batch_size; |
| |
| struct { |
| /** Batch generating part of the anv_cmd_buffer::batch */ |
| struct anv_batch batch; |
| |
| /** |
| * Location in anv_cmd_buffer::batch at which we left some space to |
| * insert a MI_BATCH_BUFFER_START into the |
| * anv_cmd_buffer::generation::batch if needed. |
| */ |
| struct anv_address jump_addr; |
| |
| /** |
| * Location in anv_cmd_buffer::batch at which the generation batch |
| * should jump back to. |
| */ |
| struct anv_address return_addr; |
| |
| /** List of anv_batch_bo used for generation |
| * |
| * We have to keep this separated of the anv_cmd_buffer::batch_bos that |
| * is used for a chaining optimization. |
| */ |
| struct list_head batch_bos; |
| |
| /** Ring buffer of generated commands |
| * |
| * When generating draws in ring mode, this buffer will hold generated |
| * 3DPRIMITIVE commands. |
| */ |
| struct anv_bo *ring_bo; |
| |
| /** |
| * State tracking of the generation shader (only used for the non-ring |
| * mode). |
| */ |
| struct anv_simple_shader shader_state; |
| } generation; |
| |
| /** |
| * A vector of anv_bo pointers for chunks of memory used by the command |
| * buffer that are too large to be allocated through dynamic_state_stream. |
| * This is the case for large enough acceleration structures. |
| * |
| * initialized by anv_cmd_buffer_init_batch_bo_chain() |
| */ |
| struct u_vector dynamic_bos; |
| |
| /** |
| * Structure holding tracepoints recorded in the command buffer. |
| */ |
| struct u_trace trace; |
| |
| struct { |
| struct anv_video_session *vid; |
| struct anv_video_session_params *params; |
| } video; |
| |
| /** |
| * Companion RCS command buffer to support the MSAA operations on compute |
| * queue. |
| */ |
| struct anv_cmd_buffer *companion_rcs_cmd_buffer; |
| |
| /** |
| * Whether this command buffer is a companion command buffer of compute one. |
| */ |
| bool is_companion_rcs_cmd_buffer; |
| |
| }; |
| |
| extern const struct vk_command_buffer_ops anv_cmd_buffer_ops; |
| |
| /* Determine whether we can chain a given cmd_buffer to another one. We need |
| * to make sure that we can edit the end of the batch to point to next one, |
| * which requires the command buffer to not be used simultaneously. |
| * |
| * We could in theory also implement chaining with companion command buffers, |
| * but let's sparse ourselves some pain and misery. This optimization has no |
| * benefit on the brand new Xe kernel driver. |
| */ |
| static inline bool |
| anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer) |
| { |
| return !(cmd_buffer->usage_flags & |
| VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) && |
| !(cmd_buffer->is_companion_rcs_cmd_buffer); |
| } |
| |
| static inline bool |
| anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer *cmd_buffer) |
| { |
| struct anv_queue_family *queue_family = cmd_buffer->queue_family; |
| return (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0; |
| } |
| |
| static inline bool |
| anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer *cmd_buffer) |
| { |
| struct anv_queue_family *queue_family = cmd_buffer->queue_family; |
| return ((queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) | |
| (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) != 0; |
| } |
| |
| static inline bool |
| anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer *cmd_buffer) |
| { |
| struct anv_queue_family *queue_family = cmd_buffer->queue_family; |
| return queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE; |
| } |
| |
| static inline bool |
| anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer *cmd_buffer) |
| { |
| struct anv_queue_family *queue_family = cmd_buffer->queue_family; |
| return queue_family->engine_class == INTEL_ENGINE_CLASS_COPY; |
| } |
| |
| static inline bool |
| anv_cmd_buffer_is_render_or_compute_queue(const struct anv_cmd_buffer *cmd_buffer) |
| { |
| return anv_cmd_buffer_is_render_queue(cmd_buffer) || |
| anv_cmd_buffer_is_compute_queue(cmd_buffer); |
| } |
| |
| static inline uint8_t |
| anv_get_ray_query_bo_index(struct anv_cmd_buffer *cmd_buffer) |
| { |
| if (intel_needs_workaround(cmd_buffer->device->isl_dev.info, 14022863161)) |
| return anv_cmd_buffer_is_compute_queue(cmd_buffer) ? 1 : 0; |
| return 0; |
| } |
| |
| static inline struct anv_address |
| anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer *cmd_buffer, |
| struct anv_state state) |
| { |
| return anv_state_pool_state_address( |
| &cmd_buffer->device->dynamic_state_pool, state); |
| } |
| |
| static inline uint64_t |
| anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer *cmd_buffer, |
| int32_t buffer_index) |
| { |
| if (buffer_index == -1) |
| return cmd_buffer->device->physical->va.push_descriptor_buffer_pool.addr; |
| |
| return cmd_buffer->state.descriptor_buffers.address[buffer_index]; |
| } |
| |
| VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); |
| void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); |
| void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); |
| void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); |
| void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, |
| struct anv_cmd_buffer *secondary); |
| void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); |
| VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue, |
| struct anv_cmd_buffer *cmd_buffer, |
| const VkSemaphore *in_semaphores, |
| const uint64_t *in_wait_values, |
| uint32_t num_in_semaphores, |
| const VkSemaphore *out_semaphores, |
| const uint64_t *out_signal_values, |
| uint32_t num_out_semaphores, |
| VkFence fence, |
| int perf_query_pass); |
| |
| void anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer, |
| UNUSED VkCommandBufferResetFlags flags); |
| |
| struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, |
| const void *data, uint32_t size, uint32_t alignment); |
| struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t *a, uint32_t *b, |
| uint32_t dwords, uint32_t alignment); |
| |
| struct anv_address |
| anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); |
| struct anv_state |
| anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t entries, uint32_t *state_offset); |
| struct anv_state |
| anv_cmd_buffer_alloc_surface_states(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t count); |
| struct anv_state |
| anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t size, uint32_t alignment); |
| struct anv_state |
| anv_cmd_buffer_alloc_general_state(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t size, uint32_t alignment); |
| static inline struct anv_state |
| anv_cmd_buffer_alloc_temporary_state(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t size, uint32_t alignment) |
| { |
| struct anv_state state = |
| anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, |
| size, alignment); |
| if (state.map == NULL) |
| anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| return state; |
| } |
| static inline struct anv_address |
| anv_cmd_buffer_temporary_state_address(struct anv_cmd_buffer *cmd_buffer, |
| struct anv_state state) |
| { |
| return anv_state_pool_state_address( |
| &cmd_buffer->device->dynamic_state_pool, state); |
| } |
| |
| void |
| anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers, |
| uint32_t num_cmd_buffers); |
| void |
| anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue, |
| uint32_t cmd_buffer_count, |
| struct anv_cmd_buffer **cmd_buffers, |
| struct anv_query_pool *perf_query_pool, |
| uint32_t perf_query_pass); |
| void |
| anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers, |
| uint32_t num_cmd_buffers); |
| |
| void |
| anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer, |
| enum anv_pipe_bits flushed_bits); |
| |
| /** |
| * A allocation tied to a command buffer. |
| * |
| * Don't use anv_cmd_alloc::address::map to write memory from userspace, use |
| * anv_cmd_alloc::map instead. |
| */ |
| struct anv_cmd_alloc { |
| struct anv_address address; |
| void *map; |
| size_t size; |
| }; |
| |
| #define ANV_EMPTY_ALLOC ((struct anv_cmd_alloc) { .map = NULL, .size = 0 }) |
| |
| static inline bool |
| anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc) |
| { |
| return alloc.size == 0; |
| } |
| |
| struct anv_cmd_alloc |
| anv_cmd_buffer_alloc_space(struct anv_cmd_buffer *cmd_buffer, |
| size_t size, uint32_t alignment, |
| bool private); |
| |
| VkResult |
| anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); |
| |
| void anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer *cmd_buffer); |
| |
| struct anv_state |
| anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer); |
| struct anv_state |
| anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); |
| |
| VkResult |
| anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t num_entries, |
| uint32_t *state_offset, |
| struct anv_state *bt_state); |
| |
| void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer); |
| |
| static inline unsigned |
| anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer) |
| { |
| struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; |
| return MAX2(1, util_bitcount(gfx->view_mask)); |
| } |
| |
| /* Save/restore cmd buffer states for meta operations */ |
| enum anv_cmd_saved_state_flags { |
| ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE = BITFIELD_BIT(0), |
| ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0 = BITFIELD_BIT(1), |
| ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL = BITFIELD_BIT(2), |
| ANV_CMD_SAVED_STATE_PUSH_CONSTANTS = BITFIELD_BIT(3), |
| }; |
| |
| struct anv_cmd_saved_state { |
| uint32_t flags; |
| |
| struct anv_pipeline *pipeline; |
| struct anv_descriptor_set *descriptor_set[MAX_SETS]; |
| uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; |
| }; |
| |
| void anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer, |
| uint32_t flags, |
| struct anv_cmd_saved_state *state); |
| |
| void anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer, |
| struct anv_cmd_saved_state *state); |
| |
| enum anv_bo_sync_state { |
| /** Indicates that this is a new (or newly reset fence) */ |
| ANV_BO_SYNC_STATE_RESET, |
| |
| /** Indicates that this fence has been submitted to the GPU but is still |
| * (as far as we know) in use by the GPU. |
| */ |
| ANV_BO_SYNC_STATE_SUBMITTED, |
| |
| ANV_BO_SYNC_STATE_SIGNALED, |
| }; |
| |
| struct anv_bo_sync { |
| struct vk_sync sync; |
| |
| enum anv_bo_sync_state state; |
| struct anv_bo *bo; |
| }; |
| |
| extern const struct vk_sync_type anv_bo_sync_type; |
| |
| static inline bool |
| vk_sync_is_anv_bo_sync(const struct vk_sync *sync) |
| { |
| return sync->type == &anv_bo_sync_type; |
| } |
| |
| VkResult anv_create_sync_for_memory(struct vk_device *device, |
| VkDeviceMemory memory, |
| bool signal_memory, |
| struct vk_sync **sync_out); |
| |
| struct anv_event { |
| struct vk_object_base base; |
| uint64_t semaphore; |
| struct anv_state state; |
| }; |
| |
| #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1) |
| |
| #define anv_foreach_stage(stage, stage_bits) \ |
| for (gl_shader_stage stage, \ |
| __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ |
| stage = __builtin_ffs(__tmp) - 1, __tmp; \ |
| __tmp &= ~(1 << (stage))) |
| |
| struct anv_pipeline_bind_map { |
| unsigned char surface_sha1[20]; |
| unsigned char sampler_sha1[20]; |
| unsigned char push_sha1[20]; |
| |
| uint32_t surface_count; |
| uint32_t sampler_count; |
| uint32_t embedded_sampler_count; |
| uint16_t kernel_args_size; |
| uint16_t kernel_arg_count; |
| |
| struct anv_pipeline_binding * surface_to_descriptor; |
| struct anv_pipeline_binding * sampler_to_descriptor; |
| struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding; |
| struct brw_kernel_arg_desc * kernel_args; |
| |
| struct anv_push_range push_ranges[4]; |
| }; |
| |
| struct anv_push_descriptor_info { |
| /* A bitfield of descriptors used. */ |
| uint32_t used_descriptors; |
| |
| /* A bitfield of UBOs bindings fully promoted to push constants. */ |
| uint32_t fully_promoted_ubo_descriptors; |
| |
| /* */ |
| uint8_t used_set_buffer; |
| }; |
| |
| /* A list of values we push to implement some of the dynamic states */ |
| enum anv_dynamic_push_bits { |
| ANV_DYNAMIC_PUSH_INPUT_VERTICES = BITFIELD_BIT(0), |
| }; |
| |
| struct anv_shader_upload_params { |
| gl_shader_stage stage; |
| |
| const void *key_data; |
| uint32_t key_size; |
| |
| const void *kernel_data; |
| uint32_t kernel_size; |
| |
| const struct brw_stage_prog_data *prog_data; |
| uint32_t prog_data_size; |
| |
| const struct brw_compile_stats *stats; |
| uint32_t num_stats; |
| |
| const struct nir_xfb_info *xfb_info; |
| |
| const struct anv_pipeline_bind_map *bind_map; |
| |
| const struct anv_push_descriptor_info *push_desc_info; |
| |
| enum anv_dynamic_push_bits dynamic_push_values; |
| }; |
| |
| struct anv_embedded_sampler { |
| uint32_t ref_cnt; |
| |
| struct anv_embedded_sampler_key key; |
| |
| struct anv_state sampler_state; |
| struct anv_state border_color_state; |
| }; |
| |
| struct anv_shader_bin { |
| struct vk_pipeline_cache_object base; |
| |
| gl_shader_stage stage; |
| |
| struct anv_state kernel; |
| uint32_t kernel_size; |
| |
| const struct brw_stage_prog_data *prog_data; |
| uint32_t prog_data_size; |
| |
| struct brw_compile_stats stats[3]; |
| uint32_t num_stats; |
| |
| struct nir_xfb_info *xfb_info; |
| |
| struct anv_push_descriptor_info push_desc_info; |
| |
| struct anv_pipeline_bind_map bind_map; |
| |
| enum anv_dynamic_push_bits dynamic_push_values; |
| |
| /* Not saved in the pipeline cache. |
| * |
| * Array of pointers of length bind_map.embedded_sampler_count |
| */ |
| struct anv_embedded_sampler **embedded_samplers; |
| }; |
| |
| static inline struct anv_shader_bin * |
| anv_shader_bin_ref(struct anv_shader_bin *shader) |
| { |
| vk_pipeline_cache_object_ref(&shader->base); |
| |
| return shader; |
| } |
| |
| static inline void |
| anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) |
| { |
| vk_pipeline_cache_object_unref(&device->vk, &shader->base); |
| } |
| |
| struct anv_pipeline_executable { |
| gl_shader_stage stage; |
| |
| struct brw_compile_stats stats; |
| |
| char *nir; |
| char *disasm; |
| }; |
| |
| enum anv_pipeline_type { |
| ANV_PIPELINE_GRAPHICS, |
| ANV_PIPELINE_GRAPHICS_LIB, |
| ANV_PIPELINE_COMPUTE, |
| ANV_PIPELINE_RAY_TRACING, |
| }; |
| |
| struct anv_pipeline { |
| struct vk_object_base base; |
| |
| struct anv_device * device; |
| |
| struct anv_batch batch; |
| struct anv_reloc_list batch_relocs; |
| |
| void * mem_ctx; |
| |
| enum anv_pipeline_type type; |
| VkPipelineCreateFlags2KHR flags; |
| |
| VkShaderStageFlags active_stages; |
| |
| uint32_t ray_queries; |
| |
| /** |
| * Mask of stages that are accessing push descriptors. |
| */ |
| VkShaderStageFlags use_push_descriptor; |
| |
| /** |
| * Mask of stages that are accessing the push descriptors buffer. |
| */ |
| VkShaderStageFlags use_push_descriptor_buffer; |
| |
| /** |
| * Maximum scratch size for all shaders in this pipeline. |
| */ |
| uint32_t scratch_size; |
| |
| /* Layout of the sets used by the pipeline. */ |
| struct anv_pipeline_sets_layout layout; |
| |
| struct util_dynarray executables; |
| |
| const struct intel_l3_config * l3_config; |
| }; |
| |
| /* The base graphics pipeline object only hold shaders. */ |
| struct anv_graphics_base_pipeline { |
| struct anv_pipeline base; |
| |
| struct vk_sample_locations_state sample_locations; |
| |
| /* Shaders */ |
| struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT]; |
| |
| /* A small hash based of shader_info::source_sha1 for identifying |
| * shaders in renderdoc/shader-db. |
| */ |
| uint32_t source_hashes[ANV_GRAPHICS_SHADER_STAGE_COUNT]; |
| |
| /* Feedback index in |
| * VkPipelineCreationFeedbackCreateInfo::pPipelineStageCreationFeedbacks |
| * |
| * For pipeline libraries, we need to remember the order at creation when |
| * included into a linked pipeline. |
| */ |
| uint32_t feedback_index[ANV_GRAPHICS_SHADER_STAGE_COUNT]; |
| |
| /* Robustness flags used shaders |
| */ |
| enum brw_robustness_flags robust_flags[ANV_GRAPHICS_SHADER_STAGE_COUNT]; |
| |
| /* True if at the time the fragment shader was compiled, it didn't have all |
| * the information to avoid INTEL_MSAA_FLAG_ENABLE_DYNAMIC. |
| */ |
| bool fragment_dynamic; |
| }; |
| |
| /* The library graphics pipeline object has a partial graphic state and |
| * possibly some shaders. If requested, shaders are also present in NIR early |
| * form. |
| */ |
| struct anv_graphics_lib_pipeline { |
| struct anv_graphics_base_pipeline base; |
| |
| VkGraphicsPipelineLibraryFlagsEXT lib_flags; |
| |
| struct vk_graphics_pipeline_all_state all_state; |
| struct vk_graphics_pipeline_state state; |
| |
| /* Retained shaders for link optimization. */ |
| struct { |
| /* This hash is the same as computed in |
| * anv_graphics_pipeline_gather_shaders(). |
| */ |
| unsigned char shader_sha1[20]; |
| |
| enum gl_subgroup_size subgroup_size_type; |
| |
| /* Hold on the value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT |
| * from library that introduces the stage, so it remains consistent. |
| */ |
| bool view_index_from_device_index; |
| |
| /* NIR captured in anv_pipeline_stage_get_nir(), includes specialization |
| * constants. |
| */ |
| nir_shader * nir; |
| } retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT]; |
| |
| /* Whether the shaders have been retained */ |
| bool retain_shaders; |
| }; |
| |
| struct anv_gfx_state_ptr { |
| /* Both in dwords */ |
| uint16_t offset; |
| uint16_t len; |
| }; |
| |
| /* The final graphics pipeline object has all the graphics state ready to be |
| * programmed into HW packets (dynamic_state field) or fully baked in its |
| * batch. |
| */ |
| struct anv_graphics_pipeline { |
| struct anv_graphics_base_pipeline base; |
| |
| struct vk_vertex_input_state vertex_input; |
| struct vk_sample_locations_state sample_locations; |
| struct vk_dynamic_graphics_state dynamic_state; |
| |
| /* If true, the patch control points are passed through push constants |
| * (anv_push_constants::gfx::tcs_input_vertices) |
| */ |
| bool dynamic_patch_control_points; |
| |
| uint32_t view_mask; |
| uint32_t instance_multiplier; |
| |
| bool rp_has_ds_self_dep; |
| |
| bool kill_pixel; |
| bool uses_xfb; |
| bool sample_shading_enable; |
| float min_sample_shading; |
| |
| /* Number of VERTEX_ELEMENT_STATE input elements used by the shader */ |
| uint32_t vs_input_elements; |
| |
| /* Number of VERTEX_ELEMENT_STATE elements we need to implement some of the |
| * draw parameters |
| */ |
| uint32_t svgs_count; |
| |
| /* Pre computed VERTEX_ELEMENT_STATE structures for the vertex input that |
| * can be copied into the anv_cmd_buffer behind a 3DSTATE_VERTEX_BUFFER. |
| * |
| * When MESA_VK_DYNAMIC_VI is not dynamic |
| * |
| * vertex_input_elems = vs_input_elements + svgs_count |
| * |
| * All the VERTEX_ELEMENT_STATE can be directly copied behind a |
| * 3DSTATE_VERTEX_ELEMENTS instruction in the command buffer. Otherwise |
| * this array only holds the svgs_count elements. |
| */ |
| uint32_t vertex_input_elems; |
| uint32_t vertex_input_data[2 * 31 /* MAX_VES + 2 internal */]; |
| |
| /* Number of color outputs used by the fragment shader. */ |
| uint8_t num_color_outputs; |
| /* Map of color output of the fragment shader to color attachments in the |
| * render pass. |
| */ |
| uint8_t color_output_mapping[MAX_RTS]; |
| |
| /* Pre computed CS instructions that can directly be copied into |
| * anv_cmd_buffer. |
| */ |
| uint32_t batch_data[480]; |
| |
| /* Urb setup utilized by this pipeline. */ |
| struct intel_urb_config urb_cfg; |
| |
| /* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */ |
| struct { |
| struct anv_gfx_state_ptr urb; |
| struct anv_gfx_state_ptr vf_sgvs; |
| struct anv_gfx_state_ptr vf_sgvs_2; |
| struct anv_gfx_state_ptr vf_sgvs_instancing; |
| struct anv_gfx_state_ptr vf_instancing; |
| struct anv_gfx_state_ptr primitive_replication; |
| struct anv_gfx_state_ptr sbe; |
| struct anv_gfx_state_ptr sbe_swiz; |
| struct anv_gfx_state_ptr so_decl_list; |
| struct anv_gfx_state_ptr vs; |
| struct anv_gfx_state_ptr hs; |
| struct anv_gfx_state_ptr ds; |
| struct anv_gfx_state_ptr vs_protected; |
| struct anv_gfx_state_ptr hs_protected; |
| struct anv_gfx_state_ptr ds_protected; |
| |
| struct anv_gfx_state_ptr task_control; |
| struct anv_gfx_state_ptr task_control_protected; |
| struct anv_gfx_state_ptr task_shader; |
| struct anv_gfx_state_ptr task_redistrib; |
| struct anv_gfx_state_ptr clip_mesh; |
| struct anv_gfx_state_ptr mesh_control; |
| struct anv_gfx_state_ptr mesh_control_protected; |
| struct anv_gfx_state_ptr mesh_shader; |
| struct anv_gfx_state_ptr mesh_distrib; |
| struct anv_gfx_state_ptr sbe_mesh; |
| } final; |
| |
| /* Pre packed CS instructions & structures that need to be merged later |
| * with dynamic state. |
| */ |
| struct { |
| struct anv_gfx_state_ptr clip; |
| struct anv_gfx_state_ptr sf; |
| struct anv_gfx_state_ptr ps_extra; |
| struct anv_gfx_state_ptr wm; |
| struct anv_gfx_state_ptr so; |
| struct anv_gfx_state_ptr gs; |
| struct anv_gfx_state_ptr gs_protected; |
| struct anv_gfx_state_ptr te; |
| struct anv_gfx_state_ptr ps; |
| struct anv_gfx_state_ptr ps_protected; |
| struct anv_gfx_state_ptr vfg; |
| } partial; |
| }; |
| |
| #define anv_batch_emit_pipeline_state(batch, pipeline, state) \ |
| do { \ |
| if ((pipeline)->state.len == 0) \ |
| break; \ |
| uint32_t *dw; \ |
| dw = anv_batch_emit_dwords((batch), (pipeline)->state.len); \ |
| if (!dw) \ |
| break; \ |
| memcpy(dw, &(pipeline)->batch_data[(pipeline)->state.offset], \ |
| 4 * (pipeline)->state.len); \ |
| } while (0) |
| |
| #define anv_batch_emit_pipeline_state_protected(batch, pipeline, \ |
| state, protected) \ |
| do { \ |
| struct anv_gfx_state_ptr *_cmd_state = protected ? \ |
| &(pipeline)->state##_protected : &(pipeline)->state; \ |
| if (_cmd_state->len == 0) \ |
| break; \ |
| uint32_t *dw; \ |
| dw = anv_batch_emit_dwords((batch), _cmd_state->len); \ |
| if (!dw) \ |
| break; \ |
| memcpy(dw, &(pipeline)->batch_data[_cmd_state->offset], \ |
| 4 * _cmd_state->len); \ |
| } while (0) |
| |
| |
| struct anv_compute_pipeline { |
| struct anv_pipeline base; |
| |
| struct anv_shader_bin * cs; |
| uint32_t batch_data[9]; |
| uint32_t interface_descriptor_data[8]; |
| |
| /* A small hash based of shader_info::source_sha1 for identifying shaders |
| * in renderdoc/shader-db. |
| */ |
| uint32_t source_hash; |
| }; |
| |
| struct anv_rt_shader_group { |
| VkRayTracingShaderGroupTypeKHR type; |
| |
| /* Whether this group was imported from another pipeline */ |
| bool imported; |
| |
| struct anv_shader_bin *general; |
| struct anv_shader_bin *closest_hit; |
| struct anv_shader_bin *any_hit; |
| struct anv_shader_bin *intersection; |
| |
| /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */ |
| uint32_t handle[8]; |
| }; |
| |
| struct anv_ray_tracing_pipeline { |
| struct anv_pipeline base; |
| |
| /* All shaders in the pipeline */ |
| struct util_dynarray shaders; |
| |
| uint32_t group_count; |
| struct anv_rt_shader_group * groups; |
| |
| /* If non-zero, this is the default computed stack size as per the stack |
| * size computation in the Vulkan spec. If zero, that indicates that the |
| * client has requested a dynamic stack size. |
| */ |
| uint32_t stack_size; |
| }; |
| |
| #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ |
| static inline struct anv_##pipe_type##_pipeline * \ |
| anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \ |
| { \ |
| assert(pipeline->type == pipe_enum); \ |
| return (struct anv_##pipe_type##_pipeline *) pipeline; \ |
| } |
| |
| ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS) |
| ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB) |
| ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE) |
| ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING) |
| |
| /* Can't use the macro because we need to handle both types. */ |
| static inline struct anv_graphics_base_pipeline * |
| anv_pipeline_to_graphics_base(struct anv_pipeline *pipeline) |
| { |
| assert(pipeline->type == ANV_PIPELINE_GRAPHICS || |
| pipeline->type == ANV_PIPELINE_GRAPHICS_LIB); |
| return (struct anv_graphics_base_pipeline *) pipeline; |
| } |
| |
| static inline bool |
| anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline, |
| gl_shader_stage stage) |
| { |
| return (pipeline->base.base.active_stages & mesa_to_vk_shader_stage(stage)) != 0; |
| } |
| |
| static inline bool |
| anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline, |
| gl_shader_stage stage) |
| { |
| return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0; |
| } |
| |
| static inline bool |
| anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline) |
| { |
| return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX); |
| } |
| |
| static inline bool |
| anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline) |
| { |
| return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH); |
| } |
| |
| static inline bool |
| anv_gfx_all_color_write_masked(const struct anv_cmd_graphics_state *gfx, |
| const struct vk_dynamic_graphics_state *dyn) |
| { |
| uint8_t color_writes = dyn->cb.color_write_enables; |
| |
| /* All writes disabled through vkCmdSetColorWriteEnableEXT */ |
| if ((color_writes & ((1u << gfx->color_att_count) - 1)) == 0) |
| return true; |
| |
| /* Or all write masks are empty */ |
| for (uint32_t i = 0; i < gfx->color_att_count; i++) { |
| if (dyn->cb.attachments[i].write_mask != 0) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static inline void |
| anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state *state) |
| { |
| state->has_uint_rt = false; |
| for (unsigned a = 0; a < state->color_att_count; a++) { |
| if (vk_format_is_int(state->color_att[a].vk_format)) { |
| state->has_uint_rt = true; |
| break; |
| } |
| } |
| } |
| |
| #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \ |
| static inline const struct brw_##prefix##_prog_data * \ |
| get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \ |
| { \ |
| if (anv_pipeline_has_stage(pipeline, stage)) { \ |
| return (const struct brw_##prefix##_prog_data *) \ |
| pipeline->base.shaders[stage]->prog_data; \ |
| } else { \ |
| return NULL; \ |
| } \ |
| } |
| |
| ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) |
| ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL) |
| ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL) |
| ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) |
| ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) |
| ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH) |
| ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK) |
| |
| static inline const struct brw_cs_prog_data * |
| get_cs_prog_data(const struct anv_compute_pipeline *pipeline) |
| { |
| assert(pipeline->cs); |
| return (const struct brw_cs_prog_data *) pipeline->cs->prog_data; |
| } |
| |
| static inline const struct brw_vue_prog_data * |
| anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline) |
| { |
| if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) |
| return &get_gs_prog_data(pipeline)->base; |
| else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) |
| return &get_tes_prog_data(pipeline)->base; |
| else |
| return &get_vs_prog_data(pipeline)->base; |
| } |
| |
| VkResult |
| anv_device_init_rt_shaders(struct anv_device *device); |
| |
| void |
| anv_device_finish_rt_shaders(struct anv_device *device); |
| |
| struct anv_kernel_arg { |
| bool is_ptr; |
| uint16_t size; |
| |
| union { |
| uint64_t u64; |
| void *ptr; |
| }; |
| }; |
| |
| struct anv_kernel { |
| #ifndef NDEBUG |
| const char *name; |
| #endif |
| struct anv_shader_bin *bin; |
| const struct intel_l3_config *l3_config; |
| }; |
| |
| struct anv_format_plane { |
| enum isl_format isl_format:16; |
| struct isl_swizzle swizzle; |
| |
| /* What aspect is associated to this plane */ |
| VkImageAspectFlags aspect; |
| }; |
| |
| struct anv_format { |
| struct anv_format_plane planes[3]; |
| VkFormat vk_format; |
| uint8_t n_planes; |
| bool can_ycbcr; |
| bool can_video; |
| }; |
| |
| static inline void |
| anv_assert_valid_aspect_set(VkImageAspectFlags aspects) |
| { |
| if (util_bitcount(aspects) == 1) { |
| assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT | |
| VK_IMAGE_ASPECT_DEPTH_BIT | |
| VK_IMAGE_ASPECT_STENCIL_BIT | |
| VK_IMAGE_ASPECT_PLANE_0_BIT | |
| VK_IMAGE_ASPECT_PLANE_1_BIT | |
| VK_IMAGE_ASPECT_PLANE_2_BIT)); |
| } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) { |
| assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT || |
| aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT | |
| VK_IMAGE_ASPECT_PLANE_1_BIT) || |
| aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT | |
| VK_IMAGE_ASPECT_PLANE_1_BIT | |
| VK_IMAGE_ASPECT_PLANE_2_BIT)); |
| } else { |
| assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | |
| VK_IMAGE_ASPECT_STENCIL_BIT)); |
| } |
| } |
| |
| /** |
| * Return the aspect's plane relative to all_aspects. For an image, for |
| * instance, all_aspects would be the set of aspects in the image. For |
| * an image view, all_aspects would be the subset of aspects represented |
| * by that particular view. |
| */ |
| static inline uint32_t |
| anv_aspect_to_plane(VkImageAspectFlags all_aspects, |
| VkImageAspectFlagBits aspect) |
| { |
| anv_assert_valid_aspect_set(all_aspects); |
| assert(util_bitcount(aspect) == 1); |
| assert(!(aspect & ~all_aspects)); |
| |
| /* Because we always put image and view planes in aspect-bit-order, the |
| * plane index is the number of bits in all_aspects before aspect. |
| */ |
| return util_bitcount(all_aspects & (aspect - 1)); |
| } |
| |
| #define anv_foreach_image_aspect_bit(b, image, aspects) \ |
| u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects)) |
| |
| const struct anv_format * |
| anv_get_format(VkFormat format); |
| |
| static inline uint32_t |
| anv_get_format_planes(VkFormat vk_format) |
| { |
| const struct anv_format *format = anv_get_format(vk_format); |
| |
| return format != NULL ? format->n_planes : 0; |
| } |
| |
| struct anv_format_plane |
| anv_get_format_plane(const struct intel_device_info *devinfo, |
| VkFormat vk_format, uint32_t plane, |
| VkImageTiling tiling); |
| |
| struct anv_format_plane |
| anv_get_format_aspect(const struct intel_device_info *devinfo, |
| VkFormat vk_format, |
| VkImageAspectFlagBits aspect, VkImageTiling tiling); |
| |
| static inline enum isl_format |
| anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format, |
| VkImageAspectFlags aspect, VkImageTiling tiling) |
| { |
| return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format; |
| } |
| |
| bool anv_format_supports_ccs_e(const struct intel_device_info *devinfo, |
| const enum isl_format format); |
| |
| bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo, |
| VkImageCreateFlags create_flags, |
| VkFormat vk_format, VkImageTiling vk_tiling, |
| VkImageUsageFlags vk_usage, |
| const VkImageFormatListCreateInfo *fmt_list); |
| |
| extern VkFormat |
| vk_format_from_android(unsigned android_format, unsigned android_usage); |
| |
| static inline VkFormat |
| anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format) |
| { |
| if (pdevice->flush_astc_ldr_void_extent_denorms) { |
| const struct util_format_description *desc = |
| vk_format_description(format); |
| if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC && |
| desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) |
| return format; |
| } |
| |
| if (pdevice->emu_astc_ldr) |
| return vk_texcompress_astc_emulation_format(format); |
| |
| return VK_FORMAT_UNDEFINED; |
| } |
| |
| static inline bool |
| anv_is_format_emulated(const struct anv_physical_device *pdevice, VkFormat format) |
| { |
| return anv_get_emulation_format(pdevice, format) != VK_FORMAT_UNDEFINED; |
| } |
| |
| static inline struct isl_swizzle |
| anv_swizzle_for_render(struct isl_swizzle swizzle) |
| { |
| /* Sometimes the swizzle will have alpha map to one. We do this to fake |
| * RGB as RGBA for texturing |
| */ |
| assert(swizzle.a == ISL_CHANNEL_SELECT_ONE || |
| swizzle.a == ISL_CHANNEL_SELECT_ALPHA); |
| |
| /* But it doesn't matter what we render to that channel */ |
| swizzle.a = ISL_CHANNEL_SELECT_ALPHA; |
| |
| return swizzle; |
| } |
| |
| void |
| anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm); |
| |
| /** |
| * Describes how each part of anv_image will be bound to memory. |
| */ |
| struct anv_image_memory_range { |
| /** |
| * Disjoint bindings into which each portion of the image will be bound. |
| * |
| * Binding images to memory can be complicated and invold binding different |
| * portions of the image to different memory objects or regions. For most |
| * images, everything lives in the MAIN binding and gets bound by |
| * vkBindImageMemory. For disjoint multi-planar images, each plane has |
| * a unique, disjoint binding and gets bound by vkBindImageMemory2 with |
| * VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are |
| * implicit or driver-managed and live in special-case bindings. |
| */ |
| enum anv_image_memory_binding { |
| /** |
| * Used if and only if image is not multi-planar disjoint. Bound by |
| * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo. |
| */ |
| ANV_IMAGE_MEMORY_BINDING_MAIN, |
| |
| /** |
| * Used if and only if image is multi-planar disjoint. Bound by |
| * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo. |
| */ |
| ANV_IMAGE_MEMORY_BINDING_PLANE_0, |
| ANV_IMAGE_MEMORY_BINDING_PLANE_1, |
| ANV_IMAGE_MEMORY_BINDING_PLANE_2, |
| |
| /** |
| * Driver-private bo. In special cases we may store the aux surface and/or |
| * aux state in this binding. |
| */ |
| ANV_IMAGE_MEMORY_BINDING_PRIVATE, |
| |
| /** Sentinel */ |
| ANV_IMAGE_MEMORY_BINDING_END, |
| } binding; |
| |
| uint32_t alignment; |
| uint64_t size; |
| |
| /** |
| * Offset is relative to the start of the binding created by |
| * vkBindImageMemory, not to the start of the bo. |
| */ |
| uint64_t offset; |
| }; |
| |
| /** |
| * Subsurface of an anv_image. |
| */ |
| struct anv_surface { |
| struct isl_surf isl; |
| struct anv_image_memory_range memory_range; |
| }; |
| |
| static inline bool MUST_CHECK |
| anv_surface_is_valid(const struct anv_surface *surface) |
| { |
| return surface->isl.size_B > 0 && surface->memory_range.size > 0; |
| } |
| |
| struct anv_image { |
| struct vk_image vk; |
| |
| uint32_t n_planes; |
| |
| /** |
| * Image has multi-planar format and was created with |
| * VK_IMAGE_CREATE_DISJOINT_BIT. |
| */ |
| bool disjoint; |
| |
| /** |
| * Image is a WSI image |
| */ |
| bool from_wsi; |
| |
| /** |
| * Image was imported from an struct AHardwareBuffer. We have to delay |
| * final image creation until bind time. |
| */ |
| bool from_ahb; |
| |
| /** |
| * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo |
| * must be released when the image is destroyed. |
| */ |
| bool from_gralloc; |
| |
| /** |
| * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC |
| * LDR workaround or emulation. |
| */ |
| VkFormat emu_plane_format; |
| |
| /** |
| * The set of formats that will be used with the first plane of this image. |
| * |
| * Assuming all view formats have the same bits-per-channel, we support the |
| * largest number of variations which may exist. |
| */ |
| enum isl_format view_formats[5]; |
| unsigned num_view_formats; |
| |
| /** |
| * The memory bindings created by vkCreateImage and vkBindImageMemory. |
| * |
| * For details on the image's memory layout, see check_memory_bindings(). |
| * |
| * vkCreateImage constructs the `memory_range` for each |
| * anv_image_memory_binding. After vkCreateImage, each binding is valid if |
| * and only if `memory_range::size > 0`. |
| * |
| * vkBindImageMemory binds each valid `memory_range` to an `address`. |
| * Usually, the app will provide the address via the parameters of |
| * vkBindImageMemory. However, special-case bindings may be bound to |
| * driver-private memory. |
| * |
| * If needed a host pointer to the image is mapped for host image copies. |
| */ |
| struct anv_image_binding { |
| struct anv_image_memory_range memory_range; |
| struct anv_address address; |
| struct anv_sparse_binding_data sparse_data; |
| void *host_map; |
| uint64_t map_delta; |
| uint64_t map_size; |
| } bindings[ANV_IMAGE_MEMORY_BINDING_END]; |
| |
| /** |
| * Image subsurfaces |
| * |
| * For each foo, anv_image::planes[x].surface is valid if and only if |
| * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane() |
| * to figure the number associated with a given aspect. |
| * |
| * The hardware requires that the depth buffer and stencil buffer be |
| * separate surfaces. From Vulkan's perspective, though, depth and stencil |
| * reside in the same VkImage. To satisfy both the hardware and Vulkan, we |
| * allocate the depth and stencil buffers as separate surfaces in the same |
| * bo. |
| */ |
| struct anv_image_plane { |
| struct anv_surface primary_surface; |
| |
| /** |
| * The base aux usage for this image. For color images, this can be |
| * either CCS_E or CCS_D depending on whether or not we can reliably |
| * leave CCS on all the time. |
| */ |
| enum isl_aux_usage aux_usage; |
| |
| struct anv_surface aux_surface; |
| |
| /** Location of the compression control surface. */ |
| struct anv_image_memory_range compr_ctrl_memory_range; |
| |
| /** Location of the fast clear state. */ |
| struct anv_image_memory_range fast_clear_memory_range; |
| |
| /** |
| * Whether this image can be fast cleared with non-zero clear colors. |
| * This can happen with mutable images when formats of different bit |
| * sizes per components are used. |
| * |
| * On Gfx9+, because the clear colors are stored as a 4 components 32bit |
| * values, we can clear in R16G16_UNORM (store 2 16bit values in the |
| * components 0 & 1 of the clear color) and then draw in R32_UINT which |
| * would interpret the clear color as a single component value, using |
| * only the first 16bit component of the previous written clear color. |
| * |
| * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this |
| * boolean will prevent the usage of CC_ONE. |
| */ |
| bool can_non_zero_fast_clear; |
| |
| struct { |
| /** Whether the image has CCS data mapped through AUX-TT. */ |
| bool mapped; |
| |
| /** Main address of the mapping. */ |
| uint64_t addr; |
| |
| /** Size of the mapping. */ |
| uint64_t size; |
| } aux_tt; |
| } planes[3]; |
| |
| struct anv_image_memory_range vid_dmv_top_surface; |
| |
| /* Link in the anv_device.image_private_objects list */ |
| struct list_head link; |
| }; |
| |
| static inline bool |
| anv_image_is_protected(const struct anv_image *image) |
| { |
| return image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT; |
| } |
| |
| static inline bool |
| anv_image_is_sparse(const struct anv_image *image) |
| { |
| return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT; |
| } |
| |
| static inline bool |
| anv_image_is_externally_shared(const struct anv_image *image) |
| { |
| return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID || |
| image->vk.external_handle_types != 0; |
| } |
| |
| static inline bool |
| anv_image_has_private_binding(const struct anv_image *image) |
| { |
| const struct anv_image_binding private_binding = |
| image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE]; |
| return private_binding.memory_range.size != 0; |
| } |
| |
| static inline bool |
| anv_image_format_is_d16_or_s8(const struct anv_image *image) |
| { |
| return image->vk.format == VK_FORMAT_D16_UNORM || |
| image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT || |
| image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || |
| image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT || |
| image->vk.format == VK_FORMAT_S8_UINT; |
| } |
| |
| static inline bool |
| anv_image_can_host_memcpy(const struct anv_image *image) |
| { |
| const struct isl_surf *surf = &image->planes[0].primary_surface.isl; |
| struct isl_tile_info tile_info; |
| isl_surf_get_tile_info(surf, &tile_info); |
| |
| const bool array_pitch_aligned_to_tile = |
| surf->array_pitch_el_rows % tile_info.logical_extent_el.height == 0; |
| |
| return image->vk.tiling != VK_IMAGE_TILING_LINEAR && |
| image->n_planes == 1 && |
| array_pitch_aligned_to_tile && |
| image->vk.mip_levels == 1; |
| } |
| |
| /* The ordering of this enum is important */ |
| enum anv_fast_clear_type { |
| /** Image does not have/support any fast-clear blocks */ |
| ANV_FAST_CLEAR_NONE = 0, |
| /** Image has/supports fast-clear but only to the default value */ |
| ANV_FAST_CLEAR_DEFAULT_VALUE = 1, |
| /** Image has/supports fast-clear with an arbitrary fast-clear value */ |
| ANV_FAST_CLEAR_ANY = 2, |
| }; |
| |
| /** |
| * Return the aspect's _format_ plane, not its _memory_ plane (using the |
| * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a |
| * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain |
| * VK_IMAGE_ASPECT_MEMORY_PLANE_* . |
| */ |
| static inline uint32_t |
| anv_image_aspect_to_plane(const struct anv_image *image, |
| VkImageAspectFlagBits aspect) |
| { |
| return anv_aspect_to_plane(image->vk.aspects, aspect); |
| } |
| |
| /* Returns the number of auxiliary buffer levels attached to an image. */ |
| static inline uint8_t |
| anv_image_aux_levels(const struct anv_image * const image, |
| VkImageAspectFlagBits aspect) |
| { |
| uint32_t plane = anv_image_aspect_to_plane(image, aspect); |
| if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) |
| return 0; |
| |
| return image->vk.mip_levels; |
| } |
| |
| /* Returns the number of auxiliary buffer layers attached to an image. */ |
| static inline uint32_t |
| anv_image_aux_layers(const struct anv_image * const image, |
| VkImageAspectFlagBits aspect, |
| const uint8_t miplevel) |
| { |
| assert(image); |
| |
| /* The miplevel must exist in the main buffer. */ |
| assert(miplevel < image->vk.mip_levels); |
| |
| if (miplevel >= anv_image_aux_levels(image, aspect)) { |
| /* There are no layers with auxiliary data because the miplevel has no |
| * auxiliary data. |
| */ |
| return 0; |
| } |
| |
| return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel); |
| } |
| |
| static inline struct anv_address MUST_CHECK |
| anv_image_address(const struct anv_image *image, |
| const struct anv_image_memory_range *mem_range) |
| { |
| const struct anv_image_binding *binding = &image->bindings[mem_range->binding]; |
| assert(binding->memory_range.offset == 0); |
| |
| if (mem_range->size == 0) |
| return ANV_NULL_ADDRESS; |
| |
| return anv_address_add(binding->address, mem_range->offset); |
| } |
| |
| bool |
| anv_image_view_formats_incomplete(const struct anv_image *image); |
| |
| static inline struct anv_address |
| anv_image_get_clear_color_addr(UNUSED const struct anv_device *device, |
| const struct anv_image *image, |
| enum isl_format view_format, |
| VkImageAspectFlagBits aspect, |
| bool for_sampler) |
| { |
| uint32_t plane = anv_image_aspect_to_plane(image, aspect); |
| const struct anv_image_memory_range *mem_range = |
| &image->planes[plane].fast_clear_memory_range; |
| |
| const struct anv_address base_addr = anv_image_address(image, mem_range); |
| if (anv_address_is_null(base_addr)) |
| return ANV_NULL_ADDRESS; |
| |
| if (view_format == ISL_FORMAT_UNSUPPORTED) |
| view_format = image->planes[plane].primary_surface.isl.format; |
| |
| uint64_t access_offset = device->info->ver == 9 && for_sampler ? 16 : 0; |
| const unsigned clear_state_size = device->info->ver >= 11 ? 64 : 32; |
| for (int i = 0; i < image->num_view_formats; i++) { |
| if (view_format == image->view_formats[i]) { |
| uint64_t entry_offset = i * clear_state_size + access_offset; |
| return anv_address_add(base_addr, entry_offset); |
| } |
| } |
| |
| assert(anv_image_view_formats_incomplete(image)); |
| return anv_address_add(base_addr, access_offset); |
| } |
| |
| static inline struct anv_address |
| anv_image_get_fast_clear_type_addr(const struct anv_device *device, |
| const struct anv_image *image, |
| VkImageAspectFlagBits aspect) |
| { |
| /* Xe2+ platforms don't need fast clear type. We shouldn't get here. */ |
| assert(device->info->ver < 20); |
| struct anv_address addr = |
| anv_image_get_clear_color_addr(device, image, ISL_FORMAT_UNSUPPORTED, |
| aspect, false); |
| |
| /* Refer to add_aux_state_tracking_buffer(). */ |
| unsigned clear_color_state_size; |
| if (device->info->ver >= 11) { |
| assert(device->isl_dev.ss.clear_color_state_size == 32); |
| clear_color_state_size = (image->num_view_formats - 1) * 64 + 32 - 8; |
| } else { |
| assert(device->isl_dev.ss.clear_value_size == 16); |
| clear_color_state_size = image->num_view_formats * 16 * 2; |
| } |
| |
| return anv_address_add(addr, clear_color_state_size); |
| } |
| |
| static inline struct anv_address |
| anv_image_get_compression_state_addr(const struct anv_device *device, |
| const struct anv_image *image, |
| VkImageAspectFlagBits aspect, |
| uint32_t level, uint32_t array_layer) |
| { |
| /* Xe2+ platforms don't use compression state. We shouldn't get here. */ |
| assert(device->info->ver < 20); |
| assert(level < anv_image_aux_levels(image, aspect)); |
| assert(array_layer < anv_image_aux_layers(image, aspect, level)); |
| UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect); |
| assert(isl_aux_usage_has_ccs_e(image->planes[plane].aux_usage)); |
| |
| /* Relative to start of the plane's fast clear type */ |
| uint32_t offset; |
| |
| offset = 4; /* Go past the fast clear type */ |
| |
| if (image->vk.image_type == VK_IMAGE_TYPE_3D) { |
| for (uint32_t l = 0; l < level; l++) |
| offset += u_minify(image->vk.extent.depth, l) * 4; |
| } else { |
| offset += level * image->vk.array_layers * 4; |
| } |
| |
| offset += array_layer * 4; |
| |
| assert(offset < image->planes[plane].fast_clear_memory_range.size); |
| |
| return anv_address_add( |
| anv_image_get_fast_clear_type_addr(device, image, aspect), |
| offset); |
| } |
| |
| static inline const struct anv_image_memory_range * |
| anv_image_get_aux_memory_range(const struct anv_image *image, |
| uint32_t plane) |
| { |
| if (image->planes[plane].aux_surface.memory_range.size > 0) |
| return &image->planes[plane].aux_surface.memory_range; |
| else |
| return &image->planes[plane].compr_ctrl_memory_range; |
| } |
| |
| /* Returns true if a HiZ-enabled depth buffer can be sampled from. */ |
| static inline bool |
| anv_can_sample_with_hiz(const struct intel_device_info * const devinfo, |
| const struct anv_image *image) |
| { |
| if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) |
| return false; |
| |
| /* For Gfx8-11, there are some restrictions around sampling from HiZ. |
| * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode |
| * say: |
| * |
| * "If this field is set to AUX_HIZ, Number of Multisamples must |
| * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D." |
| */ |
| if (image->vk.image_type == VK_IMAGE_TYPE_3D) |
| return false; |
| |
| if (!devinfo->has_sample_with_hiz) |
| return false; |
| |
| return image->vk.samples == 1; |
| } |
| |
| /* Returns true if an MCS-enabled buffer can be sampled from. */ |
| static inline bool |
| anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo, |
| const struct anv_image *image) |
| { |
| assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT); |
| const uint32_t plane = |
| anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT); |
| |
| assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage)); |
| |
| const struct anv_surface *anv_surf = &image->planes[plane].primary_surface; |
| |
| /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears. |
| * See HSD 1707282275, wa_14013111325. Due to the use of |
| * format-reinterpretation, a simplified workaround is implemented. |
| */ |
| if (intel_needs_workaround(devinfo, 14013111325) && |
| isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static inline bool |
| anv_image_plane_uses_aux_map(const struct anv_device *device, |
| const struct anv_image *image, |
| uint32_t plane) |
| { |
| return device->info->has_aux_map && |
| isl_aux_usage_has_ccs(image->planes[plane].aux_usage); |
| } |
| |
| static inline bool |
| anv_image_uses_aux_map(const struct anv_device *device, |
| const struct anv_image *image) |
| { |
| for (uint32_t p = 0; p < image->n_planes; ++p) { |
| if (anv_image_plane_uses_aux_map(device, image, p)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static inline bool |
| anv_bo_allows_aux_map(const struct anv_device *device, |
| const struct anv_bo *bo) |
| { |
| if (device->aux_map_ctx == NULL) |
| return false; |
| |
| return (bo->alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) != 0; |
| } |
| |
| static inline bool |
| anv_address_allows_aux_map(const struct anv_device *device, |
| struct anv_address addr) |
| { |
| if (device->aux_map_ctx == NULL) |
| return false; |
| |
| /* Technically, we really only care about what offset the image is bound |
| * into on the BO, but we don't have that information here. As a heuristic, |
| * rely on the BO offset instead. |
| */ |
| if (anv_address_physical(addr) % |
| intel_aux_map_get_alignment(device->aux_map_ctx) != 0) |
| return false; |
| |
| return true; |
| } |
| |
| void |
| anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| VkImageAspectFlagBits aspect, |
| enum isl_aux_usage aux_usage, |
| uint32_t level, |
| uint32_t base_layer, |
| uint32_t layer_count); |
| |
| void |
| anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| const enum isl_format format, |
| const struct isl_swizzle swizzle, |
| union isl_color_value clear_color); |
| |
| void |
| anv_cmd_buffer_load_clear_color(struct anv_cmd_buffer *cmd_buffer, |
| struct anv_state state, |
| const struct anv_image_view *iview); |
| |
| enum anv_image_memory_binding |
| anv_image_aspect_to_binding(struct anv_image *image, |
| VkImageAspectFlags aspect); |
| |
| void |
| anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| VkImageAspectFlagBits aspect, |
| enum isl_aux_usage aux_usage, |
| enum isl_format format, struct isl_swizzle swizzle, |
| uint32_t level, uint32_t base_layer, uint32_t layer_count, |
| VkRect2D area, union isl_color_value clear_color); |
| void |
| anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| VkImageAspectFlags aspects, |
| enum isl_aux_usage depth_aux_usage, |
| uint32_t level, |
| uint32_t base_layer, uint32_t layer_count, |
| VkRect2D area, |
| const VkClearDepthStencilValue *clear_value); |
| void |
| anv_attachment_msaa_resolve(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_attachment *att, |
| VkImageLayout layout, |
| VkImageAspectFlagBits aspect); |
| |
| static inline union isl_color_value |
| anv_image_hiz_clear_value(const struct anv_image *image) |
| { |
| /* The benchmarks we're tracking tend to prefer clearing depth buffers to |
| * 0.0f when the depth buffers are part of images with multiple aspects. |
| * Otherwise, they tend to prefer clearing depth buffers to 1.0f. |
| */ |
| if (image->n_planes == 2) |
| return (union isl_color_value) { .f32 = { 0.0f, } }; |
| else |
| return (union isl_color_value) { .f32 = { 1.0f, } }; |
| } |
| |
| void |
| anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| VkImageAspectFlagBits aspect, uint32_t level, |
| uint32_t base_layer, uint32_t layer_count, |
| enum isl_aux_op hiz_op); |
| void |
| anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| VkImageAspectFlags aspects, |
| uint32_t level, |
| uint32_t base_layer, uint32_t layer_count, |
| VkRect2D area, |
| const VkClearDepthStencilValue *clear_value); |
| void |
| anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| enum isl_format format, struct isl_swizzle swizzle, |
| VkImageAspectFlagBits aspect, |
| uint32_t base_layer, uint32_t layer_count, |
| enum isl_aux_op mcs_op, union isl_color_value *clear_value, |
| bool predicate); |
| void |
| anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| enum isl_format format, struct isl_swizzle swizzle, |
| VkImageAspectFlagBits aspect, uint32_t level, |
| uint32_t base_layer, uint32_t layer_count, |
| enum isl_aux_op ccs_op, union isl_color_value *clear_value, |
| bool predicate); |
| |
| isl_surf_usage_flags_t |
| anv_image_choose_isl_surf_usage(struct anv_physical_device *device, |
| VkImageCreateFlags vk_create_flags, |
| VkImageUsageFlags vk_usage, |
| isl_surf_usage_flags_t isl_extra_usage, |
| VkImageAspectFlagBits aspect, |
| VkImageCompressionFlagsEXT comp_flags); |
| |
| void |
| anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer, |
| struct anv_address address, |
| VkDeviceSize size, |
| uint32_t data, |
| bool protected); |
| |
| VkResult |
| anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer); |
| |
| bool |
| anv_can_hiz_clear_ds_view(struct anv_device *device, |
| const struct anv_image_view *iview, |
| VkImageLayout layout, |
| VkImageAspectFlags clear_aspects, |
| float depth_clear_value, |
| VkRect2D render_area, |
| const VkQueueFlagBits queue_flags); |
| |
| bool |
| anv_can_fast_clear_color(const struct anv_cmd_buffer *cmd_buffer, |
| const struct anv_image *image, |
| unsigned level, |
| const struct VkClearRect *clear_rect, |
| VkImageLayout layout, |
| enum isl_format view_format, |
| union isl_color_value clear_color); |
| |
| enum isl_aux_state ATTRIBUTE_PURE |
| anv_layout_to_aux_state(const struct intel_device_info * const devinfo, |
| const struct anv_image *image, |
| const VkImageAspectFlagBits aspect, |
| const VkImageLayout layout, |
| const VkQueueFlagBits queue_flags); |
| |
| enum isl_aux_usage ATTRIBUTE_PURE |
| anv_layout_to_aux_usage(const struct intel_device_info * const devinfo, |
| const struct anv_image *image, |
| const VkImageAspectFlagBits aspect, |
| const VkImageUsageFlagBits usage, |
| const VkImageLayout layout, |
| const VkQueueFlagBits queue_flags); |
| |
| enum anv_fast_clear_type ATTRIBUTE_PURE |
| anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo, |
| const struct anv_image * const image, |
| const VkImageAspectFlagBits aspect, |
| const VkImageLayout layout, |
| const VkQueueFlagBits queue_flags); |
| |
| bool ATTRIBUTE_PURE |
| anv_layout_has_untracked_aux_writes(const struct intel_device_info * const devinfo, |
| const struct anv_image * const image, |
| const VkImageAspectFlagBits aspect, |
| const VkImageLayout layout, |
| const VkQueueFlagBits queue_flags); |
| |
| static inline bool |
| anv_image_aspects_compatible(VkImageAspectFlags aspects1, |
| VkImageAspectFlags aspects2) |
| { |
| if (aspects1 == aspects2) |
| return true; |
| |
| /* Only 1 color aspects are compatibles. */ |
| if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 && |
| (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 && |
| util_bitcount(aspects1) == util_bitcount(aspects2)) |
| return true; |
| |
| return false; |
| } |
| |
| struct anv_image_view { |
| struct vk_image_view vk; |
| |
| const struct anv_image *image; /**< VkImageViewCreateInfo::image */ |
| |
| unsigned n_planes; |
| |
| /** |
| * True if the surface states (if any) are owned by some anv_state_stream |
| * from internal_surface_state_pool. |
| */ |
| bool use_surface_state_stream; |
| |
| struct { |
| struct isl_view isl; |
| |
| /** |
| * A version of the image view for storage usage (can apply 3D image |
| * slicing). |
| */ |
| struct isl_view isl_storage; |
| |
| /** |
| * RENDER_SURFACE_STATE when using image as a sampler surface with an |
| * image layout of SHADER_READ_ONLY_OPTIMAL or |
| * DEPTH_STENCIL_READ_ONLY_OPTIMAL. |
| */ |
| struct anv_surface_state optimal_sampler; |
| |
| /** |
| * RENDER_SURFACE_STATE when using image as a sampler surface with an |
| * image layout of GENERAL. |
| */ |
| struct anv_surface_state general_sampler; |
| |
| /** |
| * RENDER_SURFACE_STATE when using image as a storage image. |
| */ |
| struct anv_surface_state storage; |
| } planes[3]; |
| }; |
| |
| enum anv_image_view_state_flags { |
| ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 0), |
| }; |
| |
| void anv_image_fill_surface_state(struct anv_device *device, |
| const struct anv_image *image, |
| VkImageAspectFlagBits aspect, |
| const struct isl_view *view, |
| isl_surf_usage_flags_t view_usage, |
| enum isl_aux_usage aux_usage, |
| const union isl_color_value *clear_color, |
| enum anv_image_view_state_flags flags, |
| struct anv_surface_state *state_inout); |
| |
| |
| static inline const struct anv_surface_state * |
| anv_image_view_texture_surface_state(const struct anv_image_view *iview, |
| uint32_t plane, VkImageLayout layout) |
| { |
| return (layout == VK_IMAGE_LAYOUT_GENERAL || |
| layout == VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR) ? |
| &iview->planes[plane].general_sampler : |
| &iview->planes[plane].optimal_sampler; |
| } |
| |
| static inline const struct anv_surface_state * |
| anv_image_view_storage_surface_state(const struct anv_image_view *iview) |
| { |
| return &iview->planes[0].storage; |
| } |
| |
| static inline bool |
| anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state *state, |
| const struct anv_image *image) |
| { |
| for (unsigned a = 0; a < state->color_att_count; a++) { |
| if (state->color_att[a].iview && |
| state->color_att[a].iview->image == image) |
| return true; |
| } |
| |
| if (state->depth_att.iview && state->depth_att.iview->image == image) |
| return true; |
| if (state->stencil_att.iview && state->stencil_att.iview->image == image) |
| return true; |
| |
| return false; |
| } |
| |
| struct anv_image_create_info { |
| const VkImageCreateInfo *vk_info; |
| |
| /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */ |
| isl_tiling_flags_t isl_tiling_flags; |
| |
| /** These flags will be added to any derived from VkImageCreateInfo. */ |
| isl_surf_usage_flags_t isl_extra_usage_flags; |
| |
| /** An opt-in stride in pixels, should be 0 for implicit layouts */ |
| uint32_t stride; |
| |
| /** Whether to allocate private binding */ |
| bool no_private_binding_alloc; |
| }; |
| |
| VkResult anv_image_init(struct anv_device *device, struct anv_image *image, |
| const struct anv_image_create_info *create_info); |
| |
| void anv_image_finish(struct anv_image *image); |
| |
| void anv_image_get_memory_requirements(struct anv_device *device, |
| struct anv_image *image, |
| VkImageAspectFlags aspects, |
| VkMemoryRequirements2 *pMemoryRequirements); |
| |
| void anv_image_view_init(struct anv_device *device, |
| struct anv_image_view *iview, |
| const VkImageViewCreateInfo *pCreateInfo, |
| struct anv_state_stream *state_stream); |
| |
| void anv_image_view_finish(struct anv_image_view *iview); |
| |
| enum isl_format |
| anv_isl_format_for_descriptor_type(const struct anv_device *device, |
| VkDescriptorType type); |
| |
| static inline isl_surf_usage_flags_t |
| anv_isl_usage_for_descriptor_type(const VkDescriptorType type) |
| { |
| switch(type) { |
| case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: |
| case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: |
| return ISL_SURF_USAGE_CONSTANT_BUFFER_BIT; |
| default: |
| return ISL_SURF_USAGE_STORAGE_BIT; |
| } |
| } |
| |
| VkFormatFeatureFlags2 |
| anv_get_image_format_features2(const struct anv_physical_device *physical_device, |
| VkFormat vk_format, |
| const struct anv_format *anv_format, |
| VkImageTiling vk_tiling, |
| const struct isl_drm_modifier_info *isl_mod_info); |
| |
| void anv_fill_buffer_surface_state(struct anv_device *device, |
| void *surface_state_ptr, |
| enum isl_format format, |
| struct isl_swizzle swizzle, |
| isl_surf_usage_flags_t usage, |
| struct anv_address address, |
| uint32_t range, uint32_t stride); |
| |
| |
| struct gfx8_border_color { |
| union { |
| float float32[4]; |
| uint32_t uint32[4]; |
| }; |
| /* Pad out to 64 bytes */ |
| uint32_t _pad[12]; |
| }; |
| |
| struct anv_sampler { |
| struct vk_sampler vk; |
| |
| /* Hash of the sampler state + border color, useful for embedded samplers |
| * and included in the descriptor layout hash. |
| */ |
| unsigned char sha1[20]; |
| |
| uint32_t state[3][4]; |
| /* Packed SAMPLER_STATE without the border color pointer. */ |
| uint32_t state_no_bc[3][4]; |
| uint32_t n_planes; |
| |
| /* Blob of sampler state data which is guaranteed to be 32-byte aligned |
| * and with a 32-byte stride for use as bindless samplers. |
| */ |
| struct anv_state bindless_state; |
| |
| struct anv_state custom_border_color; |
| }; |
| |
| |
| struct anv_query_pool { |
| struct vk_query_pool vk; |
| |
| /** Stride between queries, in bytes */ |
| uint32_t stride; |
| /** Number of slots in this query pool */ |
| struct anv_bo * bo; |
| |
| /** Location for the KHR_performance_query small batch updating |
| * ANV_PERF_QUERY_OFFSET_REG |
| */ |
| uint32_t khr_perf_preambles_offset; |
| |
| /** Size of each small batch */ |
| uint32_t khr_perf_preamble_stride; |
| |
| /* KHR perf queries : */ |
| /** Query pass size in bytes(availability + padding + query data) */ |
| uint32_t pass_size; |
| /** Offset of the query data within a pass */ |
| uint32_t data_offset; |
| /** query data / 2 */ |
| uint32_t snapshot_size; |
| uint32_t n_counters; |
| struct intel_perf_counter_pass *counter_pass; |
| uint32_t n_passes; |
| struct intel_perf_query_info **pass_query; |
| |
| /* Video encoding queries */ |
| VkVideoCodecOperationFlagsKHR codec; |
| }; |
| |
| static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool, |
| uint32_t pass) |
| { |
| return pool->khr_perf_preambles_offset + |
| pool->khr_perf_preamble_stride * pass; |
| } |
| |
| struct anv_vid_mem { |
| struct anv_device_memory *mem; |
| VkDeviceSize offset; |
| VkDeviceSize size; |
| }; |
| |
| #define ANV_MB_WIDTH 16 |
| #define ANV_MB_HEIGHT 16 |
| #define ANV_VIDEO_H264_MAX_NUM_REF_FRAME 16 |
| #define ANV_VIDEO_H265_MAX_NUM_REF_FRAME 16 |
| #define ANV_VIDEO_H265_HCP_NUM_REF_FRAME 8 |
| #define ANV_MAX_H265_CTB_SIZE 64 |
| |
| enum anv_vid_mem_h264_types { |
| ANV_VID_MEM_H264_INTRA_ROW_STORE, |
| ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE, |
| ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH, |
| ANV_VID_MEM_H264_MPR_ROW_SCRATCH, |
| ANV_VID_MEM_H264_MAX, |
| }; |
| |
| enum anv_vid_mem_h265_types { |
| ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE, |
| ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE, |
| ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN, |
| ANV_VID_MEM_H265_METADATA_LINE, |
| ANV_VID_MEM_H265_METADATA_TILE_LINE, |
| ANV_VID_MEM_H265_METADATA_TILE_COLUMN, |
| ANV_VID_MEM_H265_SAO_LINE, |
| ANV_VID_MEM_H265_SAO_TILE_LINE, |
| ANV_VID_MEM_H265_SAO_TILE_COLUMN, |
| ANV_VID_MEM_H265_DEC_MAX, |
| ANV_VID_MEM_H265_SSE_SRC_PIX_ROW_STORE = ANV_VID_MEM_H265_DEC_MAX, |
| ANV_VID_MEM_H265_ENC_MAX, |
| }; |
| |
| struct anv_video_session { |
| struct vk_video_session vk; |
| |
| /* the decoder needs some private memory allocations */ |
| struct anv_vid_mem vid_mem[ANV_VID_MEM_H265_ENC_MAX]; |
| }; |
| |
| struct anv_video_session_params { |
| struct vk_video_session_parameters vk; |
| VkVideoEncodeRateControlModeFlagBitsKHR rc_mode; |
| }; |
| |
| void |
| anv_dump_pipe_bits(enum anv_pipe_bits bits, FILE *f); |
| |
| void |
| anv_cmd_buffer_pending_pipe_debug(struct anv_cmd_buffer *cmd_buffer, |
| enum anv_pipe_bits bits, |
| const char* reason); |
| |
| static inline void |
| anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer, |
| enum anv_pipe_bits bits, |
| const char* reason) |
| { |
| cmd_buffer->state.pending_pipe_bits |= bits; |
| if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) { |
| anv_cmd_buffer_pending_pipe_debug(cmd_buffer, bits, reason); |
| } |
| } |
| |
| struct anv_performance_configuration_intel { |
| struct vk_object_base base; |
| |
| struct intel_perf_registers *register_config; |
| |
| uint64_t config_id; |
| }; |
| |
| void anv_physical_device_init_va_ranges(struct anv_physical_device *device); |
| void anv_physical_device_init_perf(struct anv_physical_device *device, int fd); |
| void anv_device_perf_init(struct anv_device *device); |
| void anv_device_perf_close(struct anv_device *device); |
| void anv_perf_write_pass_results(struct intel_perf_config *perf, |
| struct anv_query_pool *pool, uint32_t pass, |
| const struct intel_perf_query_result *accumulated_results, |
| union VkPerformanceCounterResultKHR *results); |
| |
| void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir, |
| struct nir_shader *fs_nir, |
| struct anv_device *device, |
| const VkGraphicsPipelineCreateInfo *info); |
| |
| /* Use to emit a series of memcpy operations */ |
| struct anv_memcpy_state { |
| struct anv_device *device; |
| struct anv_cmd_buffer *cmd_buffer; |
| struct anv_batch *batch; |
| |
| /* Configuration programmed by the memcpy operation */ |
| struct intel_urb_config urb_cfg; |
| |
| struct anv_vb_cache_range vb_bound; |
| struct anv_vb_cache_range vb_dirty; |
| }; |
| |
| VkResult anv_device_init_internal_kernels(struct anv_device *device); |
| void anv_device_finish_internal_kernels(struct anv_device *device); |
| VkResult anv_device_get_internal_shader(struct anv_device *device, |
| enum anv_internal_kernel_name name, |
| struct anv_shader_bin **out_bin); |
| |
| VkResult anv_device_init_astc_emu(struct anv_device *device); |
| void anv_device_finish_astc_emu(struct anv_device *device); |
| void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer, |
| struct anv_image *image, |
| VkImageLayout layout, |
| const VkImageSubresourceLayers *subresource, |
| VkOffset3D block_offset, |
| VkExtent3D block_extent); |
| |
| /* This structure is used in 2 scenarios : |
| * |
| * - copy utrace timestamps from command buffer so that command buffer can |
| * be resubmitted multiple times without the recorded timestamps being |
| * overwritten before they're read back |
| * |
| * - emit trace points for queue debug tagging |
| * (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT) |
| */ |
| struct anv_utrace_submit { |
| struct anv_async_submit base; |
| |
| /* structure used by the perfetto glue */ |
| struct intel_ds_flush_data ds; |
| |
| /* Stream for temporary allocations */ |
| struct anv_state_stream dynamic_state_stream; |
| struct anv_state_stream general_state_stream; |
| |
| /* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit |
| * timestamps) |
| */ |
| uint64_t last_full_timestamp; |
| |
| /* Memcpy state tracking (only used for timestamp copies on render engine) */ |
| struct anv_memcpy_state memcpy_state; |
| |
| /* Memcpy state tracking (only used for timestamp copies on compute engine) */ |
| struct anv_simple_shader simple_state; |
| }; |
| |
| void anv_device_utrace_init(struct anv_device *device); |
| void anv_device_utrace_finish(struct anv_device *device); |
| VkResult |
| anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, |
| uint32_t cmd_buffer_count, |
| struct anv_cmd_buffer **cmd_buffers, |
| struct anv_utrace_submit **out_submit); |
| |
| void |
| anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx, |
| void *cmdstream, |
| void *ts_from, uint64_t from_offset_B, |
| void *ts_to, uint64_t to_offset_B, |
| uint64_t size_B); |
| |
| static bool |
| anv_has_cooperative_matrix(const struct anv_physical_device *device) |
| { |
| return device->has_cooperative_matrix; |
| } |
| |
| #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ |
| VK_FROM_HANDLE(__anv_type, __name, __handle) |
| |
| VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer, |
| VK_OBJECT_TYPE_COMMAND_BUFFER) |
| VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) |
| VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) |
| VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice, |
| VK_OBJECT_TYPE_PHYSICAL_DEVICE) |
| VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) |
| |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer, |
| VK_OBJECT_TYPE_BUFFER) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, vk.base, VkBufferView, |
| VK_OBJECT_TYPE_BUFFER_VIEW) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool, |
| VK_OBJECT_TYPE_DESCRIPTOR_POOL) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet, |
| VK_OBJECT_TYPE_DESCRIPTOR_SET) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base, |
| VkDescriptorSetLayout, |
| VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, vk.base, VkDeviceMemory, |
| VK_OBJECT_TYPE_DEVICE_MEMORY) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView, |
| VK_OBJECT_TYPE_IMAGE_VIEW); |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline, |
| VK_OBJECT_TYPE_PIPELINE) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout, |
| VK_OBJECT_TYPE_PIPELINE_LAYOUT) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool, |
| VK_OBJECT_TYPE_QUERY_POOL) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler, |
| VK_OBJECT_TYPE_SAMPLER) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base, |
| VkPerformanceConfigurationINTEL, |
| VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base, |
| VkVideoSessionKHR, |
| VK_OBJECT_TYPE_VIDEO_SESSION_KHR) |
| VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session_params, vk.base, |
| VkVideoSessionParametersKHR, |
| VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR) |
| |
| #define anv_genX(devinfo, thing) ({ \ |
| __typeof(&gfx9_##thing) genX_thing; \ |
| switch ((devinfo)->verx10) { \ |
| case 90: \ |
| genX_thing = &gfx9_##thing; \ |
| break; \ |
| case 110: \ |
| genX_thing = &gfx11_##thing; \ |
| break; \ |
| case 120: \ |
| genX_thing = &gfx12_##thing; \ |
| break; \ |
| case 125: \ |
| genX_thing = &gfx125_##thing; \ |
| break; \ |
| case 200: \ |
| genX_thing = &gfx20_##thing; \ |
| break; \ |
| case 300: \ |
| genX_thing = &gfx30_##thing; \ |
| break; \ |
| default: \ |
| unreachable("Unknown hardware generation"); \ |
| } \ |
| genX_thing; \ |
| }) |
| |
| /* Gen-specific function declarations */ |
| #ifdef genX |
| # include "anv_genX.h" |
| #else |
| # define genX(x) gfx9_##x |
| # include "anv_genX.h" |
| # undef genX |
| # define genX(x) gfx11_##x |
| # include "anv_genX.h" |
| # undef genX |
| # define genX(x) gfx12_##x |
| # include "anv_genX.h" |
| # undef genX |
| # define genX(x) gfx125_##x |
| # include "anv_genX.h" |
| # undef genX |
| # define genX(x) gfx20_##x |
| # include "anv_genX.h" |
| # undef genX |
| # define genX(x) gfx30_##x |
| # include "anv_genX.h" |
| # undef genX |
| #endif |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif /* ANV_PRIVATE_H */ |