v3dv: add support for timestamp queries
V3D doesn't provide any means to acquire timestamps from the GPU
so we have to implement these in the CPU.
v2: enable timestampComputeAndGraphics and set timestampPeriod (Piñeiro)
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7373>
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 61a3b5c..2081207 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -5021,7 +5021,30 @@
VkQueryPool queryPool,
uint32_t query)
{
- unreachable("Timestamp queries are not supported.");
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool);
+
+ /* If this is called inside a render pass we need to finish the current
+ * job here...
+ */
+ if (cmd_buffer->state.pass)
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+
+ struct v3dv_job *job =
+ v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
+ V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
+ cmd_buffer, -1);
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ job->cpu.query_timestamp.pool = query_pool;
+ job->cpu.query_timestamp.query = query;
+
+ list_addtail(&job->list_link, &cmd_buffer->jobs);
+ cmd_buffer->state.job = NULL;
+
+ /* ...and resume the subpass after the timestamp */
+ if (cmd_buffer->state.pass)
+ v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
}
static void
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
index 55755f5..a5f002b 100644
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@@ -814,6 +814,11 @@
const VkSampleCountFlags supported_sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+ struct timespec clock_res;
+ clock_getres(CLOCK_MONOTONIC, &clock_res);
+ const float timestamp_period =
+ clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
+
/* FIXME: this will probably require an in-depth review */
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = 4096,
@@ -923,8 +928,8 @@
.sampledImageStencilSampleCounts = supported_sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
- .timestampComputeAndGraphics = false,
- .timestampPeriod = 0.0f,
+ .timestampComputeAndGraphics = true,
+ .timestampPeriod = timestamp_period,
.maxClipDistances = 8,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 8,
@@ -990,7 +995,7 @@
VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT,
.queueCount = 1,
- .timestampValidBits = 0, /* FIXME */
+ .timestampValidBits = 64,
.minImageTransferGranularity = { 1, 1, 1 },
};
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 0a916cb..2017941 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -744,6 +744,7 @@
V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
+ V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
};
struct v3dv_reset_query_cpu_job_info {
@@ -810,6 +811,11 @@
bool needs_wg_uniform_rewrite;
};
+struct v3dv_timestamp_query_cpu_job_info {
+ struct v3dv_query_pool *pool;
+ uint32_t query;
+};
+
struct v3dv_job {
struct list_head list_link;
@@ -881,6 +887,7 @@
struct v3dv_clear_attachments_cpu_job_info clear_attachments;
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
+ struct v3dv_timestamp_query_cpu_job_info query_timestamp;
} cpu;
/* Job specs for TFU jobs */
@@ -1084,10 +1091,14 @@
struct v3dv_query {
bool maybe_available;
- struct v3dv_bo *bo;
+ union {
+ struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */
+ uint64_t value; /* Used by CPU queries (timestamp) */
+ };
};
struct v3dv_query_pool {
+ VkQueryType query_type;
uint32_t query_count;
struct v3dv_query *queries;
};
diff --git a/src/broadcom/vulkan/v3dv_query.c b/src/broadcom/vulkan/v3dv_query.c
index 7c2ce10..7224de4 100644
--- a/src/broadcom/vulkan/v3dv_query.c
+++ b/src/broadcom/vulkan/v3dv_query.c
@@ -31,12 +31,12 @@
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
- assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
+ assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION ||
+ pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP);
assert(pCreateInfo->queryCount > 0);
-
/* FIXME: the hw allows us to allocate up to 16 queries in a single block
- * so we should try to use that.
+ * for occlussion queries so we should try to use that.
*/
struct v3dv_query_pool *pool =
vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
@@ -44,6 +44,7 @@
if (pool == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ pool->query_type = pCreateInfo->queryType;
pool->query_count = pCreateInfo->queryCount;
VkResult result;
@@ -59,16 +60,24 @@
uint32_t i;
for (i = 0; i < pool->query_count; i++) {
pool->queries[i].maybe_available = false;
- pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
- if (!pool->queries[i].bo) {
- result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- goto fail_alloc_bo;
- }
-
- /* For occlusion queries we only need a 4-byte counter */
- if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
- result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- goto fail_alloc_bo;
+ switch (pool->query_type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
+ if (!pool->queries[i].bo) {
+ result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ goto fail_alloc_bo;
+ }
+ /* For occlusion queries we only need a 4-byte counter */
+ if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
+ result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ goto fail_alloc_bo;
+ }
+ break;
+ case VK_QUERY_TYPE_TIMESTAMP:
+ pool->queries[i].value = 0;
+ break;
+ default:
+ unreachable("Unsupported query type");
}
}
@@ -98,21 +107,105 @@
if (!pool)
return;
- for (uint32_t i = 0; i < pool->query_count; i++)
- v3dv_bo_free(device, pool->queries[i].bo);
+ if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
+ for (uint32_t i = 0; i < pool->query_count; i++)
+ v3dv_bo_free(device, pool->queries[i].bo);
+ }
+
vk_free2(&device->alloc, pAllocator, pool->queries);
vk_free2(&device->alloc, pAllocator, pool);
}
static void
-write_query_result(void *dst, uint32_t idx, bool do_64bit, uint32_t value)
+write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
{
if (do_64bit) {
uint64_t *dst64 = (uint64_t *) dst;
dst64[idx] = value;
} else {
uint32_t *dst32 = (uint32_t *) dst;
- dst32[idx] = value;
+ dst32[idx] = (uint32_t) value;
+ }
+}
+
+static uint64_t
+get_occlusion_query_result(struct v3dv_device *device,
+ struct v3dv_query_pool *pool,
+ uint32_t query,
+ bool do_wait,
+ bool *available)
+{
+ assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
+
+ struct v3dv_query *q = &pool->queries[query];
+ assert(q->bo && q->bo->map);
+
+ if (do_wait) {
+ /* From the Vulkan 1.0 spec:
+ *
+ * "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
+ * become available in a finite amount of time (e.g. due to not
+ * issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
+ * error may occur."
+ */
+ if (!q->maybe_available)
+ return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+
+ if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull))
+ return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+
+ *available = true;
+ } else {
+ *available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0);
+ }
+
+ return (uint64_t) *((uint32_t *) q->bo->map);
+}
+
+static uint64_t
+get_timestamp_query_result(struct v3dv_device *device,
+ struct v3dv_query_pool *pool,
+ uint32_t query,
+ bool do_wait,
+ bool *available)
+{
+ assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
+
+ struct v3dv_query *q = &pool->queries[query];
+
+ if (do_wait) {
+ /* From the Vulkan 1.0 spec:
+ *
+ * "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
+ * become available in a finite amount of time (e.g. due to not
+ * issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
+ * error may occur."
+ */
+ if (!q->maybe_available)
+ return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+
+ *available = true;
+ } else {
+ *available = q->maybe_available;
+ }
+
+ return q->value;
+}
+
+static uint64_t
+get_query_result(struct v3dv_device *device,
+ struct v3dv_query_pool *pool,
+ uint32_t query,
+ bool do_wait,
+ bool *available)
+{
+ switch (pool->query_type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ return get_occlusion_query_result(device, pool, query, do_wait, available);
+ case VK_QUERY_TYPE_TIMESTAMP:
+ return get_timestamp_query_result(device, pool, query, do_wait, available);
+ default:
+ unreachable("Unsupported query type");
}
}
@@ -135,30 +228,8 @@
VkResult result = VK_SUCCESS;
for (uint32_t i = first; i < first + count; i++) {
- assert(pool->queries[i].bo && pool->queries[i].bo->map);
- struct v3dv_bo *bo = pool->queries[i].bo;
- const uint32_t *counter = (const uint32_t *) bo->map;
-
bool available;
- if (do_wait) {
- /* From the Vulkan 1.0 spec:
- *
- * "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
- * become available in a finite amount of time (e.g. due to not
- * issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
- * error may occur."
- */
- if (!pool->queries[i].maybe_available)
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
-
- if (!v3dv_bo_wait(device, bo, 0xffffffffffffffffull))
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
-
- available = true;
- } else {
- available = pool->queries[i].maybe_available &&
- v3dv_bo_wait(device, bo, 0);
- }
+ uint64_t value = get_query_result(device, pool, i, do_wait, &available);
/**
* From the Vulkan 1.0 spec:
@@ -174,7 +245,7 @@
const bool write_result = available || do_partial;
if (write_result)
- write_query_result(data, slot, do_64bit, *counter);
+ write_query_result(data, slot, do_64bit, value);
slot++;
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c
index 0899e74..722e6b4 100644
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -154,22 +154,37 @@
handle_reset_query_cpu_job(struct v3dv_job *job)
{
/* We are about to reset query counters so we need to make sure that
- * The GPU is not using them.
+ * The GPU is not using them. The exception is timestamp queries, since
+ * we handle those in the CPU.
*
* FIXME: we could avoid blocking the main thread for this if we use
* submission thread.
*/
- VkResult result = gpu_queue_wait_idle(&job->device->queue);
- if (result != VK_SUCCESS)
- return result;
-
struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
+ assert(info->pool);
+
+ if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
+ VkResult result = gpu_queue_wait_idle(&job->device->queue);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
for (uint32_t i = info->first; i < info->first + info->count; i++) {
assert(i < info->pool->query_count);
struct v3dv_query *query = &info->pool->queries[i];
query->maybe_available = false;
- uint32_t *counter = (uint32_t *) query->bo->map;
- *counter = 0;
+ switch (info->pool->query_type) {
+ case VK_QUERY_TYPE_OCCLUSION: {
+ uint32_t *counter = (uint32_t *) query->bo->map;
+ *counter = 0;
+ break;
+ }
+ case VK_QUERY_TYPE_TIMESTAMP:
+ query->value = 0;
+ break;
+ default:
+ unreachable("Unsupported query type");
+ }
}
return VK_SUCCESS;
@@ -420,6 +435,26 @@
}
static VkResult
+handle_timestamp_query_cpu_job(struct v3dv_job *job)
+{
+ assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
+ struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
+
+ /* Wait for completion of all work queued before the timestamp query */
+ v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
+
+ /* Compute timestamp */
+ struct timespec t;
+ clock_gettime(CLOCK_MONOTONIC, &t);
+ assert(info->query < info->pool->query_count);
+ struct v3dv_query *query = &info->pool->queries[info->query];
+ query->maybe_available = true;
+ query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
handle_csd_job(struct v3dv_queue *queue,
struct v3dv_job *job,
bool do_sem_wait);
@@ -705,6 +740,8 @@
return handle_copy_buffer_to_image_cpu_job(job);
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
+ case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
+ return handle_timestamp_query_cpu_job(job);
default:
unreachable("Unhandled job type");
}