blob: 520f6f7075f981730aa924528395d781728820b6 [file] [log] [blame]
/*
* Copyright © 2019 Raspberry Pi
*
* based in part on anv driver which is:
* Copyright © 2015 Intel Corporation
*
* based in part on radv driver which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef V3DV_PRIVATE_H
#define V3DV_PRIVATE_H
#include <stdio.h>
#include <string.h>
#include <vulkan/vulkan.h>
#include <vulkan/vk_icd.h>
#include <vk_enum_to_str.h>
#include <xf86drm.h>
#ifdef HAVE_VALGRIND
#include <valgrind/valgrind.h>
#include <valgrind/memcheck.h>
#define VG(x) x
#else
#define VG(x) ((void)0)
#endif
#include "common/v3d_device_info.h"
#include "common/v3d_limits.h"
#include "compiler/shader_enums.h"
#include "compiler/spirv/nir_spirv.h"
#include "compiler/v3d_compiler.h"
#include "vk_debug_report.h"
#include "util/set.h"
#include "util/hash_table.h"
#include "util/xmlconfig.h"
#include "v3dv_entrypoints.h"
#include "v3dv_extensions.h"
#include "v3dv_bo.h"
#include "drm-uapi/v3d_drm.h"
/* FIXME: hooks for the packet definition functions. */
static inline void
pack_emit_reloc(void *cl, const void *reloc) {}
#define __gen_user_data struct v3dv_cl
#define __gen_address_type struct v3dv_cl_reloc
#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
(reloc)->offset)
#define __gen_emit_reloc cl_pack_emit_reloc
#define __gen_unpack_address(cl, s, e) __unpack_address(cl, s, e)
#include "v3dv_cl.h"
#include "vk_alloc.h"
#include "simulator/v3d_simulator.h"
/* FIXME: pipe_box from Gallium. Needed for some v3d_tiling.c functions.
* In the future we might want to drop that depedency, but for now it is
* good enough.
*/
#include "util/u_box.h"
#include "wsi_common.h"
#include "broadcom/cle/v3dx_pack.h"
/* A non-fatal assert. Useful for debugging. */
#ifdef DEBUG
#define v3dv_assert(x) ({ \
if (unlikely(!(x))) \
fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
})
#else
#define v3dv_assert(x)
#endif
#define perf_debug(...) do { \
if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
fprintf(stderr, __VA_ARGS__); \
} while (0)
#define for_each_bit(b, dword) \
for (uint32_t __dword = (dword); \
(b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b)))
#define typed_memcpy(dest, src, count) ({ \
STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
memcpy((dest), (src), (count) * sizeof(*(src))); \
})
#define NSEC_PER_SEC 1000000000ull
/* From vulkan spec "If the multiple viewports feature is not enabled,
* scissorCount must be 1", ditto for viewportCount. For now we don't support
* that feature.
*/
#define MAX_VIEWPORTS 1
#define MAX_SCISSORS 1
#define MAX_VBS 16
#define MAX_VERTEX_ATTRIBS 16
#define MAX_SETS 16
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_BUFFERS \
(MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
struct v3dv_instance;
#ifdef USE_V3D_SIMULATOR
#define using_v3d_simulator true
#else
#define using_v3d_simulator false
#endif
struct v3d_simulator_file;
struct v3dv_physical_device {
VK_LOADER_DATA _loader_data;
struct v3dv_instance *instance;
struct v3dv_device_extension_table supported_extensions;
struct v3dv_physical_device_dispatch_table dispatch;
char *name;
int32_t render_fd;
int32_t display_fd;
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
struct wsi_device wsi_device;
VkPhysicalDeviceMemoryProperties memory;
struct v3d_device_info devinfo;
struct v3d_simulator_file *sim_file;
const struct v3d_compiler *compiler;
uint32_t next_program_id;
struct {
bool merge_jobs;
} options;
};
VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
struct v3dv_app_info {
const char *app_name;
uint32_t app_version;
const char *engine_name;
uint32_t engine_version;
uint32_t api_version;
};
struct v3dv_instance {
VK_LOADER_DATA _loader_data;
VkAllocationCallbacks alloc;
struct v3dv_app_info app_info;
struct v3dv_instance_extension_table enabled_extensions;
struct v3dv_instance_dispatch_table dispatch;
struct v3dv_device_dispatch_table device_dispatch;
int physicalDeviceCount;
struct v3dv_physical_device physicalDevice;
struct vk_debug_report_instance debug_report_callbacks;
};
/* Tracks wait threads spawned from a single vkQueueSubmit call */
struct v3dv_queue_submit_wait_info {
struct list_head list_link;
struct v3dv_device *device;
/* List of wait threads spawned for any command buffers in a particular
* call to vkQueueSubmit.
*/
uint32_t wait_thread_count;
struct {
pthread_t thread;
bool finished;
} wait_threads[16];
/* The master wait thread for the entire submit. This will wait for all
* other threads in this submit to complete before processing signal
* semaphores and fences.
*/
pthread_t master_wait_thread;
/* List of semaphores (and fence) to signal after all wait threads completed
* and all command buffer jobs in the submission have been sent to the GPU.
*/
uint32_t signal_semaphore_count;
VkSemaphore *signal_semaphores;
VkFence fence;
};
struct v3dv_queue {
VK_LOADER_DATA _loader_data;
struct v3dv_device *device;
VkDeviceQueueCreateFlags flags;
/* A list of active v3dv_queue_submit_wait_info */
struct list_head submit_wait_list;
/* A mutex to prevent concurrent access to the list of wait threads */
mtx_t mutex;
};
struct v3dv_meta_color_clear_pipeline {
VkPipeline pipeline;
VkRenderPass pass;
bool free_render_pass;
};
struct v3dv_meta_depth_clear_pipeline {
VkPipeline pipeline;
};
struct v3dv_meta_blit_pipeline {
VkPipeline pipeline;
VkRenderPass pass;
};
#define V3DV_META_BLIT_CACHE_KEY_SIZE (3 * sizeof(uint32_t))
struct v3dv_pipeline_cache {
VK_LOADER_DATA _loader_data;
struct v3dv_device *device;
mtx_t mutex;
};
struct v3dv_device {
VK_LOADER_DATA _loader_data;
VkAllocationCallbacks alloc;
struct v3dv_instance *instance;
struct v3dv_device_extension_table enabled_extensions;
struct v3dv_device_dispatch_table dispatch;
int32_t render_fd;
int32_t display_fd;
struct v3d_device_info devinfo;
struct v3dv_queue queue;
/* A sync object to track the last job submitted to the GPU. */
uint32_t last_job_sync;
/* A mutex to prevent concurrent access to last_job_sync from the queue */
mtx_t mutex;
/* Resources used for meta operations */
struct {
mtx_t mtx;
struct {
VkPipelineLayout playout;
struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
} color_clear;
struct {
VkPipelineLayout playout;
struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
} depth_clear;
struct {
VkDescriptorSetLayout dslayout;
VkPipelineLayout playout;
struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
} blit;
} meta;
struct v3dv_bo_cache {
/** List of struct v3d_bo freed, by age. */
struct list_head time_list;
/** List of struct v3d_bo freed, per size, by age. */
struct list_head *size_list;
uint32_t size_list_size;
mtx_t lock;
uint32_t cache_size;
uint32_t cache_count;
uint32_t max_cache_size;
} bo_cache;
uint32_t bo_size;
uint32_t bo_count;
};
struct v3dv_device_memory {
struct v3dv_bo *bo;
const VkMemoryType *type;
bool has_bo_ownership;
};
#define V3D_OUTPUT_IMAGE_FORMAT_NO 255
#define TEXTURE_DATA_FORMAT_NO 255
struct v3dv_format {
bool supported;
/* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
uint8_t rt_type;
/* One of V3D33_TEXTURE_DATA_FORMAT_*. */
uint8_t tex_type;
/* Swizzle to apply to the RGBA shader output for storing to the tile
* buffer, to the RGBA tile buffer to produce shader input (for
* blending), and for turning the rgba8888 texture sampler return
* value into shader rgba values.
*/
uint8_t swizzle[4];
/* Whether the return value is 16F/I/UI or 32F/I/UI. */
uint8_t return_size;
/* If the format supports (linear) filtering when texturing. */
bool supports_filtering;
};
/**
* Tiling mode enum used for v3d_resource.c, which maps directly to the Memory
* Format field of render target and Z/Stencil config.
*/
enum v3d_tiling_mode {
/* Untiled resources. Not valid as texture inputs. */
VC5_TILING_RASTER,
/* Single line of u-tiles. */
VC5_TILING_LINEARTILE,
/* Departure from standard 4-UIF block column format. */
VC5_TILING_UBLINEAR_1_COLUMN,
/* Departure from standard 4-UIF block column format. */
VC5_TILING_UBLINEAR_2_COLUMN,
/* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
* split 2x2 into utiles.
*/
VC5_TILING_UIF_NO_XOR,
/* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
* split 2x2 into utiles.
*/
VC5_TILING_UIF_XOR,
};
struct v3d_resource_slice {
uint32_t offset;
uint32_t stride;
uint32_t padded_height;
/* Size of a single pane of the slice. For 3D textures, there will be
* a number of panes equal to the minified, power-of-two-aligned
* depth.
*/
uint32_t size;
uint8_t ub_pad;
enum v3d_tiling_mode tiling;
uint32_t padded_height_of_output_image_in_uif_blocks;
};
struct v3dv_image {
VkImageType type;
VkImageAspectFlags aspects;
VkExtent3D extent;
uint32_t levels;
uint32_t array_size;
uint32_t samples;
VkImageUsageFlags usage;
VkImageCreateFlags create_flags;
VkImageTiling tiling;
VkFormat vk_format;
const struct v3dv_format *format;
uint32_t cpp;
uint64_t drm_format_mod;
bool tiled;
struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
uint64_t size; /* Total size in bytes */
uint32_t cube_map_stride;
uint32_t alignment;
struct v3dv_device_memory *mem;
VkDeviceSize mem_offset;
};
VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
struct v3dv_image_view {
const struct v3dv_image *image;
VkImageAspectFlags aspects;
VkExtent3D extent;
VkImageViewType type;
VkFormat vk_format;
const struct v3dv_format *format;
bool swap_rb;
uint32_t internal_bpp;
uint32_t internal_type;
uint32_t base_level;
uint32_t max_level;
uint32_t first_layer;
uint32_t last_layer;
uint32_t offset;
/* Precomputed (composed from createinfo->components and formar swizzle)
* swizzles to pass in to the shader key.
*
* This could be also included on the descriptor bo, but the shader state
* packet doesn't need it on a bo, so we can just avoid a memory copy
*/
uint8_t swizzle[4];
/* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
* during UpdateDescriptorSets.
*
* Empirical tests show that cube arrays need a different shader state
* depending on whether they are used with a sampler or not, so for these
* we generate two states and select the one to use based on the descriptor
* type.
*/
uint8_t texture_shader_state[2][cl_packet_length(TEXTURE_SHADER_STATE)];
};
uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
struct v3dv_buffer {
VkDeviceSize size;
VkBufferUsageFlags usage;
uint32_t alignment;
struct v3dv_device_memory *mem;
VkDeviceSize mem_offset;
};
struct v3dv_buffer_view {
const struct v3dv_buffer *buffer;
VkFormat vk_format;
const struct v3dv_format *format;
uint32_t internal_bpp;
uint32_t internal_type;
uint32_t offset;
uint32_t size;
uint32_t num_elements;
};
struct v3dv_subpass_attachment {
uint32_t attachment;
VkImageLayout layout;
};
struct v3dv_subpass {
uint32_t input_count;
struct v3dv_subpass_attachment *input_attachments;
uint32_t color_count;
struct v3dv_subpass_attachment *color_attachments;
struct v3dv_subpass_attachment *resolve_attachments;
struct v3dv_subpass_attachment ds_attachment;
bool has_srgb_rt;
};
struct v3dv_render_pass_attachment {
VkAttachmentDescription desc;
uint32_t first_subpass;
uint32_t last_subpass;
};
struct v3dv_render_pass {
uint32_t attachment_count;
struct v3dv_render_pass_attachment *attachments;
uint32_t subpass_count;
struct v3dv_subpass *subpasses;
struct v3dv_subpass_attachment *subpass_attachments;
};
void v3dv_subpass_get_granularity(struct v3dv_render_pass *pass,
uint32_t subpass_idx,
VkExtent2D *granularity);
struct v3dv_framebuffer {
uint32_t width;
uint32_t height;
uint32_t layers;
uint32_t attachment_count;
uint32_t color_attachment_count;
struct v3dv_image_view *attachments[0];
};
struct v3dv_frame_tiling {
uint32_t width;
uint32_t height;
uint32_t render_target_count;
uint32_t internal_bpp;
uint32_t layers;
uint32_t tile_width;
uint32_t tile_height;
uint32_t draw_tiles_x;
uint32_t draw_tiles_y;
uint32_t supertile_width;
uint32_t supertile_height;
uint32_t frame_width_in_supertiles;
uint32_t frame_height_in_supertiles;
};
uint8_t v3dv_framebuffer_compute_internal_bpp(const struct v3dv_framebuffer *framebuffer,
const struct v3dv_subpass *subpass);
struct v3dv_cmd_pool {
VkAllocationCallbacks alloc;
struct list_head cmd_buffers;
};
enum v3dv_cmd_buffer_status {
V3DV_CMD_BUFFER_STATUS_NEW = 0,
V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
};
union v3dv_clear_value {
uint32_t color[4];
struct {
float z;
uint8_t s;
};
};
struct v3dv_cmd_buffer_attachment_state {
/* The original clear value as provided by the Vulkan API */
VkClearValue vk_clear_value;
/* The hardware clear value */
union v3dv_clear_value clear_value;
};
void v3dv_get_hw_clear_color(const VkClearColorValue *color,
uint32_t internal_type,
uint32_t internal_size,
uint32_t *hw_color);
struct v3dv_viewport_state {
uint32_t count;
VkViewport viewports[MAX_VIEWPORTS];
float translate[MAX_VIEWPORTS][3];
float scale[MAX_VIEWPORTS][3];
};
struct v3dv_scissor_state {
uint32_t count;
VkRect2D scissors[MAX_SCISSORS];
};
/* Mostly a v3dv mapping of VkDynamicState, used to track which data as
* defined as dynamic
*/
enum v3dv_dynamic_state_bits {
V3DV_DYNAMIC_VIEWPORT = 1 << 0,
V3DV_DYNAMIC_SCISSOR = 1 << 1,
V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,
V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,
V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,
V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
V3DV_DYNAMIC_ALL = (1 << 8) - 1,
};
/* Flags for dirty pipeline state.
*/
enum v3dv_cmd_dirty_bits {
V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,
V3DV_CMD_DIRTY_SCISSOR = 1 << 1,
V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,
V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,
V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,
V3DV_CMD_DIRTY_PIPELINE = 1 << 5,
V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 6,
V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 7,
V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 8,
V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 9,
V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 10,
V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 11,
V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 12,
V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 13,
V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 14,
};
struct v3dv_dynamic_state {
/**
* Bitmask of (1 << VK_DYNAMIC_STATE_*).
* Defines the set of saved dynamic state.
*/
uint32_t mask;
struct v3dv_viewport_state viewport;
struct v3dv_scissor_state scissor;
struct {
uint32_t front;
uint32_t back;
} stencil_compare_mask;
struct {
uint32_t front;
uint32_t back;
} stencil_write_mask;
struct {
uint32_t front;
uint32_t back;
} stencil_reference;
float blend_constants[4];
struct {
float constant_factor;
float slope_factor;
} depth_bias;
float line_width;
};
extern const struct v3dv_dynamic_state default_dynamic_state;
void v3dv_viewport_compute_xform(const VkViewport *viewport,
float scale[3],
float translate[3]);
enum v3dv_ez_state {
VC5_EZ_UNDECIDED = 0,
VC5_EZ_GT_GE,
VC5_EZ_LT_LE,
VC5_EZ_DISABLED,
};
enum v3dv_job_type {
V3DV_JOB_TYPE_GPU_CL = 0,
V3DV_JOB_TYPE_GPU_CL_SECONDARY,
V3DV_JOB_TYPE_GPU_TFU,
V3DV_JOB_TYPE_GPU_CSD,
V3DV_JOB_TYPE_CPU_RESET_QUERIES,
V3DV_JOB_TYPE_CPU_END_QUERY,
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
V3DV_JOB_TYPE_CPU_SET_EVENT,
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
};
struct v3dv_reset_query_cpu_job_info {
struct v3dv_query_pool *pool;
uint32_t first;
uint32_t count;
};
struct v3dv_end_query_cpu_job_info {
struct v3dv_query_pool *pool;
uint32_t query;
};
struct v3dv_copy_query_results_cpu_job_info {
struct v3dv_query_pool *pool;
uint32_t first;
uint32_t count;
struct v3dv_buffer *dst;
uint32_t offset;
uint32_t stride;
VkQueryResultFlags flags;
};
struct v3dv_event_set_cpu_job_info {
struct v3dv_event *event;
int state;
};
struct v3dv_event_wait_cpu_job_info {
/* List of events to wait on */
uint32_t event_count;
struct v3dv_event **events;
/* Whether any postponed jobs after the wait should wait on semaphores */
bool sem_wait;
};
struct v3dv_clear_attachments_cpu_job_info {
uint32_t attachment_count;
VkClearAttachment attachments[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
uint32_t rect_count;
VkClearRect *rects;
};
struct v3dv_copy_buffer_to_image_cpu_job_info {
struct v3dv_image *image;
struct v3dv_buffer *buffer;
uint32_t buffer_offset;
uint32_t buffer_stride;
uint32_t buffer_layer_stride;
VkOffset3D image_offset;
VkExtent3D image_extent;
uint32_t mip_level;
uint32_t base_layer;
uint32_t layer_count;
};
struct v3dv_csd_indirect_cpu_job_info {
struct v3dv_buffer *buffer;
uint32_t offset;
struct v3dv_job *csd_job;
uint32_t wg_size;
uint32_t *wg_uniform_offsets[3];
bool needs_wg_uniform_rewrite;
};
struct v3dv_job {
struct list_head list_link;
/* We only create job clones when executing secondary command buffers into
* primaries. These clones don't make deep copies of the original object
* so we want to flag them to avoid freeing resources they don't own.
*/
bool is_clone;
enum v3dv_job_type type;
struct v3dv_device *device;
struct v3dv_cmd_buffer *cmd_buffer;
struct v3dv_cl bcl;
struct v3dv_cl rcl;
struct v3dv_cl indirect;
/* Set of all BOs referenced by the job. This will be used for making
* the list of BOs that the kernel will need to have paged in to
* execute our job.
*/
struct set *bos;
uint32_t bo_count;
struct v3dv_bo *tile_alloc;
struct v3dv_bo *tile_state;
bool tmu_dirty_rcl;
uint32_t first_subpass;
/* When the current subpass is split into multiple jobs, this flag is set
* to true for any jobs after the first in the same subpass.
*/
bool is_subpass_continue;
/* If this job is the last job emitted for a subpass. */
bool is_subpass_finish;
struct v3dv_frame_tiling frame_tiling;
enum v3dv_ez_state ez_state;
enum v3dv_ez_state first_ez_state;
/* Number of draw calls recorded into the job */
uint32_t draw_count;
/* A flag indicating whether we want to flush every draw separately. This
* can be used for debugging, or for cases where special circumstances
* require this behavior.
*/
bool always_flush;
/* Whether we need to serialize this job in our command stream */
bool serialize;
/* If this is a CL job, whether we should sync before binning */
bool needs_bcl_sync;
/* Job specs for CPU jobs */
union {
struct v3dv_reset_query_cpu_job_info query_reset;
struct v3dv_end_query_cpu_job_info query_end;
struct v3dv_copy_query_results_cpu_job_info query_copy_results;
struct v3dv_event_set_cpu_job_info event_set;
struct v3dv_event_wait_cpu_job_info event_wait;
struct v3dv_clear_attachments_cpu_job_info clear_attachments;
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
} cpu;
/* Job specs for TFU jobs */
struct drm_v3d_submit_tfu tfu;
/* Job specs for CSD jobs */
struct {
struct v3dv_bo *shared_memory;
uint32_t wg_count[3];
struct drm_v3d_submit_csd submit;
} csd;
};
void v3dv_job_init(struct v3dv_job *job,
enum v3dv_job_type type,
struct v3dv_device *device,
struct v3dv_cmd_buffer *cmd_buffer,
int32_t subpass_idx);
void v3dv_job_destroy(struct v3dv_job *job);
void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
void v3dv_job_emit_binning_flush(struct v3dv_job *job);
void v3dv_job_start_frame(struct v3dv_job *job,
uint32_t width,
uint32_t height,
uint32_t layers,
uint32_t render_target_count,
uint8_t max_internal_bpp);
struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
enum v3dv_job_type type,
struct v3dv_cmd_buffer *cmd_buffer,
uint32_t subpass_idx);
struct v3dv_vertex_binding {
struct v3dv_buffer *buffer;
VkDeviceSize offset;
};
struct v3dv_descriptor_state {
struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
uint32_t valid;
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
};
struct v3dv_cmd_buffer_state {
struct v3dv_render_pass *pass;
struct v3dv_framebuffer *framebuffer;
VkRect2D render_area;
/* Current job being recorded */
struct v3dv_job *job;
uint32_t subpass_idx;
struct v3dv_pipeline *pipeline;
struct v3dv_descriptor_state descriptor_state[2];
struct v3dv_dynamic_state dynamic;
uint32_t dirty;
/* Current clip window. We use this to check whether we have an active
* scissor, since in that case we can't use TLB clears and need to fallback
* to drawing rects.
*/
VkRect2D clip_window;
/* Whether our render area is aligned to tile boundaries. If this is false
* then we have tiles that are only partially covered by the render area,
* and therefore, we need to be careful with our loads and stores so we don't
* modify pixels for the tile area that is not covered by the render area.
* This means, for example, that we can't use the TLB to clear, since that
* always clears full tiles.
*/
bool tile_aligned_render_area;
uint32_t attachment_alloc_count;
struct v3dv_cmd_buffer_attachment_state *attachments;
struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
struct {
VkBuffer buffer;
VkDeviceSize offset;
uint8_t index_size;
} index_buffer;
/* Used to flag OOM conditions during command buffer recording */
bool oom;
/* Whether we have recorded a pipeline barrier that we still need to
* process.
*/
bool has_barrier;
bool has_bcl_barrier;
/* Command buffer state saved during a meta operation */
struct {
uint32_t subpass_idx;
VkRenderPass pass;
VkPipeline pipeline;
VkFramebuffer framebuffer;
uint32_t attachment_alloc_count;
uint32_t attachment_count;
struct v3dv_cmd_buffer_attachment_state *attachments;
bool tile_aligned_render_area;
VkRect2D render_area;
struct v3dv_dynamic_state dynamic;
struct v3dv_descriptor_state descriptor_state;
bool has_descriptor_state;
uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
} meta;
/* Command buffer state for queries */
struct {
/* A list of vkCmdQueryEnd commands recorded in the command buffer during
* a render pass. We queue these here and then schedule the corresponding
* CPU jobs for them at the time we finish the GPU job in which they have
* been recorded.
*/
struct {
uint32_t used_count;
uint32_t alloc_count;
struct v3dv_end_query_cpu_job_info *states;
} end;
/* This is not NULL if we have an active query, that is, we have called
* vkCmdBeginQuery but not vkCmdEndQuery.
*/
struct v3dv_bo *active_query;
} query;
};
/* The following struct represents the info from a descriptor that we store on
* the host memory. They are mostly links to other existing vulkan objects,
* like the image_view in order to access to swizzle info, or the buffer used
* for a UBO/SSBO, for example.
*
* FIXME: revisit if makes sense to just move everything that would be needed
* from a descriptor to the bo.
*/
struct v3dv_descriptor {
VkDescriptorType type;
union {
struct {
struct v3dv_image_view *image_view;
struct v3dv_sampler *sampler;
};
struct {
struct v3dv_buffer *buffer;
uint32_t offset;
uint32_t range;
};
};
};
/* The following v3dv_xxx_descriptor structs represent descriptor info that we
* upload to a bo, specifically a subregion of the descriptor pool bo.
*
* The general rule that we apply right now to decide which info goes to such
* bo is that we upload those that are referenced by an address when emitting
* a packet, so needed to be uploaded to an bo in any case.
*
* Note that these structs are mostly helpers that improve the semantics when
* doing all that, but we could do as other mesa vulkan drivers and just
* upload the info we know it is expected based on the context.
*
* Also note that the sizes are aligned, as there is an alignment requirement
* for addresses.
*/
struct v3dv_sampled_image_descriptor {
uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
};
struct v3dv_sampler_descriptor {
uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
};
struct v3dv_combined_image_sampler_descriptor {
uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
};
/* Aux struct as it is really common to have a pair bo/address. Called
* resource because it is really likely that we would need something like that
* if we work on reuse the same bo at different points (like the shader
* assembly).
*/
struct v3dv_resource {
struct v3dv_bo *bo;
uint32_t offset;
};
struct v3dv_query {
bool maybe_available;
struct v3dv_bo *bo;
};
struct v3dv_query_pool {
uint32_t query_count;
struct v3dv_query *queries;
};
VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t first,
uint32_t count,
void *data,
VkDeviceSize stride,
VkQueryResultFlags flags);
typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
uint64_t pobj,
VkAllocationCallbacks *alloc);
struct v3dv_cmd_buffer_private_obj {
struct list_head list_link;
uint64_t obj;
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
};
struct v3dv_cmd_buffer {
VK_LOADER_DATA _loader_data;
struct v3dv_device *device;
struct v3dv_cmd_pool *pool;
struct list_head pool_link;
/* Used at submit time to link command buffers in the submission that have
* spawned wait threads, so we can then wait on all of them to complete
* before we process any signal sempahores or fences.
*/
struct list_head list_link;
VkCommandBufferUsageFlags usage_flags;
VkCommandBufferLevel level;
enum v3dv_cmd_buffer_status status;
struct v3dv_cmd_buffer_state state;
uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
struct v3dv_resource push_constants_resource;
/* Collection of Vulkan objects created internally by the driver (typically
* during recording of meta operations) that are part of the command buffer
* and should be destroyed with it.
*/
struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
/* Per-command buffer resources for meta operations. */
struct {
struct {
VkDescriptorPool dspool;
} blit;
} meta;
/* List of jobs in the command buffer. For primary command buffers it
* represents the jobs we want to submit to the GPU. For secondary command
* buffers it represents jobs that will be merged into a primary command
* buffer via vkCmdExecuteCommands.
*/
struct list_head jobs;
};
struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
int32_t subpass_idx,
enum v3dv_job_type type);
void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t subpass_idx);
struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t subpass_idx);
void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
bool push_descriptor_state);
void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t dirty_dynamic_state,
bool needs_subpass_resume);
void v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
int rt,
uint32_t *rt_bpp,
uint32_t *rt_type,
uint32_t *rt_clamp);
void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
uint32_t first,
uint32_t count);
void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
uint32_t query,
VkQueryControlFlags flags);
void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
uint32_t query);
void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
uint32_t first,
uint32_t count,
struct v3dv_buffer *dst,
uint32_t offset,
uint32_t stride,
VkQueryResultFlags flags);
void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
struct drm_v3d_submit_tfu *tfu);
void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
const uint32_t *wg_counts);
void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
uint64_t obj,
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
struct v3dv_semaphore {
/* A syncobject handle associated with this semaphore */
uint32_t sync;
/* The file handle of a fence that we imported into our syncobject */
int32_t fd;
};
struct v3dv_fence {
/* A syncobject handle associated with this fence */
uint32_t sync;
/* The file handle of a fence that we imported into our syncobject */
int32_t fd;
};
struct v3dv_event {
int state;
};
struct v3dv_shader_module {
/* A NIR shader. We create NIR modules for shaders that are generated
* internally by the driver.
*/
struct nir_shader *nir;
/* A SPIR-V shader */
unsigned char sha1[20];
uint32_t size;
char data[0];
};
/* FIXME: the same function at anv, radv and tu, perhaps create common
* place?
*/
static inline gl_shader_stage
vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
{
assert(__builtin_popcount(vk_stage) == 1);
return ffs(vk_stage) - 1;
}
struct v3dv_shader_variant {
union {
struct v3d_prog_data *base;
struct v3d_vs_prog_data *vs;
struct v3d_fs_prog_data *fs;
struct v3d_compute_prog_data *cs;
} prog_data;
/* FIXME: using one bo per shader. Eventually we would be interested on
* reusing the same bo for all the shaders, like a bo per v3dv_pipeline for
* shaders.
*/
struct v3dv_bo *assembly_bo;
};
/*
* Per-stage info for each stage, useful so shader_module_compile_to_nir and
* other methods doesn't have so many parameters.
*
* FIXME: for the case of the coordinate shader and the vertex shader, module,
* entrypoint, spec_info and nir are the same. There are also info only
* relevant to some stages. But seemed too much a hassle to create a new
* struct only to handle that. Revisit if such kind of info starts to grow.
*/
struct v3dv_pipeline_stage {
struct v3dv_pipeline *pipeline;
gl_shader_stage stage;
/* FIXME: is_coord only make sense if stage == MESA_SHADER_VERTEX. Perhaps
* a stage base/vs/fs as keys and prog_data?
*/
bool is_coord;
const struct v3dv_shader_module *module;
const char *entrypoint;
const VkSpecializationInfo *spec_info;
nir_shader *nir;
/** A name for this program, so you can track it in shader-db output. */
uint32_t program_id;
/** How many variants of this program were compiled, for shader-db. */
uint32_t compiled_variant_count;
/* The following are the default v3d_key populated using
* VkCreateGraphicsPipelineCreateInfo. Variants will be created tweaking
* them, so we don't need to maintain a copy of that create info struct
* around
*/
union {
struct v3d_key base;
struct v3d_vs_key vs;
struct v3d_fs_key fs;
} key;
/* Cache with all the shader variant.
*/
struct hash_table *cache;
struct v3dv_shader_variant *current_variant;
/* FIXME: only make sense on vs, so perhaps a v3dv key like radv? or a kind
* of pipe_draw_info
*/
enum pipe_prim_type topology;
};
/* FIXME: although the full vpm_config is not required at this point, as we
* don't plan to initially support GS, it is more readable and serves as a
* placeholder, to have the struct and fill it with default values.
*/
struct vpm_config {
uint32_t As;
uint32_t Vc;
uint32_t Gs;
uint32_t Gd;
uint32_t Gv;
uint32_t Ve;
uint32_t gs_width;
};
/* We are using the descriptor pool entry for two things:
* * Track the allocated sets, so we can properly free it if needed
* * Track the suballocated pool bo regions, so if some descriptor set is
* freed, the gap could be reallocated later.
*
* Those only make sense if the pool was not created with the flag
* VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
*/
struct v3dv_descriptor_pool_entry
{
struct v3dv_descriptor_set *set;
/* Offset and size of the subregion allocated for this entry from the
* pool->bo
*/
uint32_t offset;
uint32_t size;
};
struct v3dv_descriptor_pool {
struct v3dv_bo *bo;
/* Current offset at the descriptor bo. 0 means that we didn't use it for
* any descriptor. If the descriptor bo is NULL, current offset is
* meaningless
*/
uint32_t current_offset;
/* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
* descriptor sets are handled as a whole as pool memory and handled by the
* following pointers. If set, they are not used, and individually
* descriptor sets are allocated/freed.
*/
uint8_t *host_memory_base;
uint8_t *host_memory_ptr;
uint8_t *host_memory_end;
uint32_t entry_count;
uint32_t max_entry_count;
struct v3dv_descriptor_pool_entry entries[0];
};
struct v3dv_descriptor_set {
struct v3dv_descriptor_pool *pool;
const struct v3dv_descriptor_set_layout *layout;
/* Offset relative to the descriptor pool bo for this set */
uint32_t base_offset;
/* The descriptors below can be indexed (set/binding) using the set_layout
*/
struct v3dv_descriptor descriptors[0];
};
struct v3dv_descriptor_set_binding_layout {
VkDescriptorType type;
/* Number of array elements in this binding */
uint32_t array_size;
/* Index into the flattend descriptor set */
uint32_t descriptor_index;
uint32_t dynamic_offset_count;
uint32_t dynamic_offset_index;
/* Offset into the descriptor set where this descriptor lives (final offset
* on the descriptor bo need to take into account set->base_offset)
*/
uint32_t descriptor_offset;
/* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
* if there are no immutable samplers.
*/
uint32_t immutable_samplers_offset;
};
struct v3dv_descriptor_set_layout {
VkDescriptorSetLayoutCreateFlags flags;
/* Number of bindings in this descriptor set */
uint32_t binding_count;
/* Total bo size needed for this descriptor set
*/
uint32_t bo_size;
/* Shader stages affected by this descriptor set */
uint16_t shader_stages;
/* Number of descriptors in this descriptor set */
uint32_t descriptor_count;
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
/* Bindings in this descriptor set */
struct v3dv_descriptor_set_binding_layout binding[0];
};
struct v3dv_pipeline_layout {
struct {
struct v3dv_descriptor_set_layout *layout;
uint32_t dynamic_offset_start;
} set[MAX_SETS];
uint32_t num_sets;
uint32_t dynamic_offset_count;
uint32_t push_constant_size;
};
struct v3dv_descriptor_map {
/* TODO: avoid fixed size array/justify the size */
unsigned num_desc; /* Number of descriptors */
int set[64];
int binding[64];
int array_index[64];
int array_size[64];
};
struct v3dv_sampler {
bool compare_enable;
bool unnormalized_coordinates;
bool clamp_to_transparent_black_border;
/* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
* configuration. If needed it will be copied to the descriptor info during
* UpdateDescriptorSets
*/
uint8_t sampler_state[cl_packet_length(SAMPLER_STATE)];
};
#define V3DV_NO_SAMPLER_IDX 666
/*
* Following two methods are using on the combined to/from texture/sampler
* indices maps at v3dv_pipeline.
*/
static inline uint32_t
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
uint32_t sampler_index)
{
return texture_index << 24 | sampler_index;
}
static inline void
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
uint32_t *texture_index,
uint32_t *sampler_index)
{
uint32_t texture = combined_index_key >> 24;
uint32_t sampler = combined_index_key & 0xffffff;
if (texture_index)
*texture_index = texture;
if (sampler_index)
*sampler_index = sampler;
}
struct v3dv_pipeline {
struct v3dv_device *device;
VkShaderStageFlags active_stages;
struct v3dv_render_pass *pass;
struct v3dv_subpass *subpass;
/* Note: We can't use just a MESA_SHADER_STAGES array as we need to track
* too the coordinate shader
*/
struct v3dv_pipeline_stage *vs;
struct v3dv_pipeline_stage *vs_bin;
struct v3dv_pipeline_stage *fs;
struct v3dv_pipeline_stage *cs;
/* Spilling memory requirements */
struct {
struct v3dv_bo *bo;
uint32_t size_per_thread;
} spill;
struct v3dv_dynamic_state dynamic_state;
struct v3dv_pipeline_layout *layout;
enum v3dv_ez_state ez_state;
bool primitive_restart;
/* Accessed by binding. So vb[binding]->stride is the stride of the vertex
* array with such binding
*/
struct v3dv_pipeline_vertex_binding {
uint32_t stride;
uint32_t instance_divisor;
} vb[MAX_VBS];
uint32_t vb_count;
/* Note that a lot of info from VkVertexInputAttributeDescription is
* already prepacked, so here we are only storing those that need recheck
* later. The array must be indexed by driver location, since that is the
* order in which we need to emit the attributes.
*/
struct v3dv_pipeline_vertex_attrib {
uint32_t binding;
uint32_t offset;
VkFormat vk_format;
} va[MAX_VERTEX_ATTRIBS];
uint32_t va_count;
struct v3dv_descriptor_map ubo_map;
struct v3dv_descriptor_map ssbo_map;
struct v3dv_descriptor_map sampler_map;
struct v3dv_descriptor_map texture_map;
/*
* Vulkan has separate texture and sampler objects. Previous sampler and
* texture map uses a sampler and texture index respectively, that can be
* different. But OpenGL combine both (or in other words, they are the
* same). The v3d compiler and all the nir lowerings that they use were
* written under that assumption. In order to not update all those, we
* combine the indexes, and we use the following maps to get one or the
* other. In general the driver side uses the tex/sampler indexes to gather
* resources, and the compiler side uses the combined index (so the v3d key
* texture info will be indexed using the combined index).
*/
struct hash_table *combined_index_map;
uint32_t combined_index_to_key_map[32];
uint32_t next_combined_index;
/* FIXME: this bo is another candidate to data to be uploaded using a
* resource manager, instead of a individual bo
*/
struct v3dv_bo *default_attribute_values;
struct vpm_config vpm_cfg;
struct vpm_config vpm_cfg_bin;
/* If the pipeline should emit any of the stencil configuration packets */
bool emit_stencil_cfg[2];
/* If the pipeline is using push constants */
bool use_push_constants;
/* Blend state */
struct {
/* Per-RT bit mask with blend enables */
uint8_t enables;
/* Per-RT prepacked blend config packets */
uint8_t cfg[V3D_MAX_DRAW_BUFFERS][cl_packet_length(BLEND_CFG)];
/* Flag indicating whether the blend factors in use require
* color constants.
*/
bool needs_color_constants;
/* Mask with enabled color channels for each RT (4 bits per RT) */
uint32_t color_write_masks;
} blend;
/* Depth bias */
struct {
bool enabled;
bool is_z16;
} depth_bias;
/* Packets prepacked during pipeline creation
*/
uint8_t cfg_bits[cl_packet_length(CFG_BITS)];
uint8_t shader_state_record[cl_packet_length(GL_SHADER_STATE_RECORD)];
uint8_t vcm_cache_size[cl_packet_length(VCM_CACHE_SIZE)];
uint8_t vertex_attrs[cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD) *
MAX_VERTEX_ATTRIBS];
uint8_t stencil_cfg[2][cl_packet_length(STENCIL_CFG)];
};
static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
{
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
!(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
}
const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
static inline uint32_t
v3dv_zs_buffer_from_aspect_bits(VkImageAspectFlags aspects)
{
const VkImageAspectFlags zs_aspects =
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
const VkImageAspectFlags filtered_aspects = aspects & zs_aspects;
if (filtered_aspects == zs_aspects)
return ZSTENCIL;
else if (filtered_aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
return Z;
else if (filtered_aspects == VK_IMAGE_ASPECT_STENCIL_BIT)
return STENCIL;
else
return NONE;
}
static inline uint32_t
v3dv_zs_buffer_from_vk_format(VkFormat format)
{
switch (format) {
case VK_FORMAT_D16_UNORM_S8_UINT:
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return ZSTENCIL;
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D32_SFLOAT:
case VK_FORMAT_X8_D24_UNORM_PACK32:
return Z;
case VK_FORMAT_S8_UINT:
return STENCIL;
default:
return NONE;
}
}
static inline uint32_t
v3dv_zs_buffer(bool depth, bool stencil)
{
if (depth && stencil)
return ZSTENCIL;
else if (depth)
return Z;
else if (stencil)
return STENCIL;
return NONE;
}
uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev);
uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
int v3dv_get_instance_entrypoint_index(const char *name);
int v3dv_get_device_entrypoint_index(const char *name);
int v3dv_get_physical_device_entrypoint_index(const char *name);
const char *v3dv_get_instance_entry_name(int index);
const char *v3dv_get_physical_device_entry_name(int index);
const char *v3dv_get_device_entry_name(int index);
bool
v3dv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
const struct v3dv_instance_extension_table *instance);
bool
v3dv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
const struct v3dv_instance_extension_table *instance);
bool
v3dv_device_entrypoint_is_enabled(int index, uint32_t core_version,
const struct v3dv_instance_extension_table *instance,
const struct v3dv_device_extension_table *device);
void *v3dv_lookup_entrypoint(const struct v3d_device_info *devinfo,
const char *name);
#define v3dv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error,
const char *file, int line,
const char *format, ...);
#define vk_error(instance, error) __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
#define vk_errorf(instance, error, format, ...) __vk_errorf(instance, error, __FILE__, __LINE__, format, ## __VA_ARGS__);
void v3dv_loge(const char *format, ...) v3dv_printflike(1, 2);
void v3dv_loge_v(const char *format, va_list va);
#define v3dv_debug_ignored_stype(sType) \
v3dv_loge("%s: ignored VkStructureType %u:%s\n", __func__, (sType), vk_StructureType_to_str(sType))
const struct v3dv_format *v3dv_get_format(VkFormat);
const uint8_t *v3dv_get_format_swizzle(VkFormat f);
void v3dv_get_internal_type_bpp_for_output_format(uint32_t format, uint32_t *type, uint32_t *bpp);
uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
bool v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
uint32_t tex_format);
uint32_t v3d_utile_width(int cpp);
uint32_t v3d_utile_height(int cpp);
void v3d_load_tiled_image(void *dst, uint32_t dst_stride,
void *src, uint32_t src_stride,
enum v3d_tiling_mode tiling_format,
int cpp, uint32_t image_h,
const struct pipe_box *box);
void v3d_store_tiled_image(void *dst, uint32_t dst_stride,
void *src, uint32_t src_stride,
enum v3d_tiling_mode tiling_format,
int cpp, uint32_t image_h,
const struct pipe_box *box);
struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline_stage *p_stage);
struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline_stage *p_stage,
uint32_t **wg_count_offsets);
struct v3dv_shader_variant *
v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
struct v3d_key *key,
size_t key_size,
const VkAllocationCallbacks *pAllocator,
VkResult *out_vk_result);
struct v3dv_descriptor *
v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index,
uint32_t *dynamic_offset);
const struct v3dv_sampler *
v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index);
struct v3dv_cl_reloc
v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index);
struct v3dv_image_view *
v3dv_descriptor_map_get_image_view(struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index);
struct v3dv_cl_reloc
v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index);
static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
const struct v3dv_descriptor_set_binding_layout *binding)
{
assert(binding->immutable_samplers_offset);
return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
}
#define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \
\
static inline struct __v3dv_type * \
__v3dv_type ## _from_handle(__VkType _handle) \
{ \
return (struct __v3dv_type *) _handle; \
} \
\
static inline __VkType \
__v3dv_type ## _to_handle(struct __v3dv_type *_obj) \
{ \
return (__VkType) _obj; \
}
#define V3DV_DEFINE_NONDISP_HANDLE_CASTS(__v3dv_type, __VkType) \
\
static inline struct __v3dv_type * \
__v3dv_type ## _from_handle(__VkType _handle) \
{ \
return (struct __v3dv_type *)(uintptr_t) _handle; \
} \
\
static inline __VkType \
__v3dv_type ## _to_handle(struct __v3dv_type *_obj) \
{ \
return (__VkType)(uintptr_t) _obj; \
}
#define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
struct __v3dv_type *__name = __v3dv_type ## _from_handle(__handle)
V3DV_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, VkCommandBuffer)
V3DV_DEFINE_HANDLE_CASTS(v3dv_device, VkDevice)
V3DV_DEFINE_HANDLE_CASTS(v3dv_instance, VkInstance)
V3DV_DEFINE_HANDLE_CASTS(v3dv_physical_device, VkPhysicalDevice)
V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, VkCommandPool)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, VkBuffer)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, VkBufferView)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, VkDescriptorPool)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, VkDescriptorSet)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, VkDescriptorSetLayout)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, VkEvent)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, VkFence)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, VkPipelineCache)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, VkQueryPool)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, VkSampler)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_shader_module, VkShaderModule)
/* This is defined as a macro so that it works for both
* VkImageSubresourceRange and VkImageSubresourceLayers
*/
#define v3dv_layer_count(_image, _range) \
((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
#define v3dv_level_count(_image, _range) \
((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \
(_image)->levels - (_range)->baseMipLevel : (_range)->levelCount)
static inline int
v3dv_ioctl(int fd, unsigned long request, void *arg)
{
if (using_v3d_simulator)
return v3d_simulator_ioctl(fd, request, arg);
else
return drmIoctl(fd, request, arg);
}
/* Flags OOM conditions in command buffer state.
*
* Note: notice that no-op jobs don't have a command buffer reference.
*/
static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
{
if (cmd_buffer) {
cmd_buffer->state.oom = true;
} else {
assert(job);
if (job->cmd_buffer)
job->cmd_buffer->state.oom = true;
}
}
#define v3dv_return_if_oom(_cmd_buffer, _job) do { \
const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
if (__cmd_buffer && __cmd_buffer->state.oom) \
return; \
const struct v3dv_job *__job = _job; \
if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
return; \
} while(0) \
#endif /* V3DV_PRIVATE_H */