radv: Add winsys support for submitting timeline syncobj.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5600>
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 3757632..2741870 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -169,8 +169,10 @@
 struct radv_winsys_sem_counts {
 	uint32_t syncobj_count;
 	uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
+	uint32_t timeline_syncobj_count;
 	uint32_t sem_count;
 	uint32_t *syncobj;
+	uint64_t *points;
 	struct radeon_winsys_sem **sem;
 };
 
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 2cce43c..4265ca5 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -1450,9 +1450,9 @@
 	return 0;
 }
 
-static struct drm_amdgpu_cs_chunk_sem *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
-									  const uint32_t *syncobj_override,
-									  struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
+static void *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
+						const uint32_t *syncobj_override,
+						struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
 {
 	const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
 	struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
@@ -1470,6 +1470,38 @@
 	return syncobj;
 }
 
+static void *
+radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *counts,
+                                            const uint32_t *syncobj_override,
+                                            struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
+{
+	const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
+	struct drm_amdgpu_cs_chunk_syncobj *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) *
+	                                                     (counts->syncobj_count + counts->timeline_syncobj_count));
+	if (!syncobj)
+		return NULL;
+
+	for (unsigned i = 0; i < counts->syncobj_count; i++) {
+		struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i];
+		sem->handle = src[i];
+		sem->flags = 0;
+		sem->point = 0;
+	}
+
+	for (unsigned i = 0; i < counts->timeline_syncobj_count; i++) {
+		struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i + counts->syncobj_count];
+		sem->handle = counts->syncobj[i + counts->syncobj_count];
+		sem->flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+		sem->point = counts->points[i];
+	}
+
+	chunk->chunk_id = chunk_id;
+	chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4 *
+		(counts->syncobj_count + counts->timeline_syncobj_count);
+	chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
+	return syncobj;
+}
+
 static int radv_amdgpu_cache_alloc_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *dst)
 {
 	pthread_mutex_lock(&ws->syncobj_lock);
@@ -1577,9 +1609,9 @@
 	struct drm_amdgpu_cs_chunk *chunks;
 	struct drm_amdgpu_cs_chunk_data *chunk_data;
 	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
-	struct drm_amdgpu_cs_chunk_sem *wait_syncobj = NULL, *signal_syncobj = NULL;
 	bool use_bo_list_create = ctx->ws->info.drm_minor < 27;
 	struct drm_amdgpu_bo_list_in bo_list_in;
+	void *wait_syncobj = NULL, *signal_syncobj = NULL;
 	uint32_t *in_syncobjs = NULL;
 	int i;
 	struct amdgpu_cs_fence *sem;
@@ -1630,15 +1662,22 @@
 						   &chunk_data[i]);
 	}
 
-	if (sem_info->wait.syncobj_count && sem_info->cs_emit_wait) {
+	if ((sem_info->wait.syncobj_count || sem_info->wait.timeline_syncobj_count) && sem_info->cs_emit_wait) {
 		r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs);
 		if (r)
 			goto error_out;
 
-		wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait,
-								  in_syncobjs,
-								  &chunks[num_chunks],
-								  AMDGPU_CHUNK_ID_SYNCOBJ_IN);
+		if (ctx->ws->info.has_timeline_syncobj) {
+			wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->wait,
+										   in_syncobjs,
+										   &chunks[num_chunks],
+										   AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
+		} else {
+			wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait,
+									  in_syncobjs,
+									  &chunks[num_chunks],
+									  AMDGPU_CHUNK_ID_SYNCOBJ_IN);
+		}
 		if (!wait_syncobj) {
 			result = VK_ERROR_OUT_OF_HOST_MEMORY;
 			goto error_out;
@@ -1679,11 +1718,18 @@
 		sem_info->cs_emit_wait = false;
 	}
 
-	if (sem_info->signal.syncobj_count && sem_info->cs_emit_signal) {
-		signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal,
-								    NULL,
-								    &chunks[num_chunks],
-								    AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
+	if ((sem_info->signal.syncobj_count || sem_info->signal.timeline_syncobj_count) && sem_info->cs_emit_signal) {
+		if (ctx->ws->info.has_timeline_syncobj) {
+			signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->signal,
+										     NULL,
+										     &chunks[num_chunks],
+										     AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
+		} else {
+			signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal,
+									    NULL,
+									    &chunks[num_chunks],
+									    AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
+		}
 		if (!signal_syncobj) {
 			result = VK_ERROR_OUT_OF_HOST_MEMORY;
 			goto error_out;