blob: 9dfb0dab3e8069436e590d728110420cb0b4e12f [file] [log] [blame]
/*
* Copyright © 2018 Google, Inc.
* Copyright © 2015 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include <fcntl.h>
#include <sys/mman.h>
#include <xf86drm.h>
#include "tu_knl_drm.h"
#include "tu_device.h"
#include "tu_rmv.h"
VkResult
tu_allocate_userspace_iova(struct tu_device *dev,
uint64_t size,
uint64_t client_iova,
enum tu_bo_alloc_flags flags,
uint64_t *iova)
{
*iova = 0;
if (flags & TU_BO_ALLOC_REPLAYABLE) {
if (client_iova) {
if (util_vma_heap_alloc_addr(&dev->vma, client_iova, size)) {
*iova = client_iova;
} else {
return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
}
} else {
/* We have to separate replayable IOVAs from ordinary one in order to
* for them not to clash. The easiest way to do this is to allocate
* them from the other end of the address space.
*/
dev->vma.alloc_high = true;
*iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
}
} else {
dev->vma.alloc_high = false;
*iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
}
if (!*iova)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
return VK_SUCCESS;
}
int
tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
{
int prime_fd;
int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
DRM_CLOEXEC | DRM_RDWR, &prime_fd);
return ret == 0 ? prime_fd : -1;
}
void
tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
{
assert(bo->gem_handle);
u_rwlock_rdlock(&dev->dma_bo_lock);
if (!p_atomic_dec_zero(&bo->refcnt)) {
u_rwlock_rdunlock(&dev->dma_bo_lock);
return;
}
if (bo->map) {
TU_RMV(bo_unmap, dev, bo);
munmap(bo->map, bo->size);
}
TU_RMV(bo_destroy, dev, bo);
tu_debug_bos_del(dev, bo);
mtx_lock(&dev->bo_mutex);
dev->bo_count--;
dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
exchanging_bo->bo_list_idx = bo->bo_list_idx;
if (bo->implicit_sync)
dev->implicit_sync_bo_count--;
mtx_unlock(&dev->bo_mutex);
if (dev->physical_device->has_set_iova) {
mtx_lock(&dev->vma_mutex);
struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
u_vector_add(&dev->zombie_vmas);
vma->gem_handle = bo->gem_handle;
#ifdef TU_HAS_VIRTIO
vma->res_id = bo->res_id;
#endif
vma->iova = bo->iova;
vma->size = bo->size;
vma->fence = p_atomic_read(&dev->queues[0]->fence);
/* Must be cleared under the VMA mutex, or another thread could race to
* reap the VMA, closing the BO and letting a new GEM allocation produce
* this handle again.
*/
memset(bo, 0, sizeof(*bo));
mtx_unlock(&dev->vma_mutex);
} else {
/* Our BO structs are stored in a sparse array in the physical device,
* so we don't want to free the BO pointer, instead we want to reset it
* to 0, to signal that array entry as being free.
*/
uint32_t gem_handle = bo->gem_handle;
memset(bo, 0, sizeof(*bo));
/* Note that virtgpu GEM_CLOSE path is a bit different, but it does
* not use the !has_set_iova path so we can ignore that
*/
struct drm_gem_close req = {
.handle = gem_handle,
};
drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
}
u_rwlock_rdunlock(&dev->dma_bo_lock);
}
uint32_t
tu_syncobj_from_vk_sync(struct vk_sync *sync)
{
uint32_t syncobj = -1;
if (vk_sync_is_tu_timeline_sync(sync)) {
syncobj = to_tu_timeline_sync(sync)->syncobj;
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
}
assert(syncobj != -1);
return syncobj;
}
static VkResult
tu_timeline_sync_init(struct vk_device *vk_device,
struct vk_sync *vk_sync,
uint64_t initial_value)
{
struct tu_device *device = container_of(vk_device, struct tu_device, vk);
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
uint32_t flags = 0;
assert(device->fd >= 0);
int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
if (err < 0) {
return vk_error(device, VK_ERROR_DEVICE_LOST);
}
sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
TU_TIMELINE_SYNC_STATE_RESET;
return VK_SUCCESS;
}
static void
tu_timeline_sync_finish(struct vk_device *vk_device,
struct vk_sync *vk_sync)
{
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
assert(dev->fd >= 0);
ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
assert(err == 0);
}
static VkResult
tu_timeline_sync_reset(struct vk_device *vk_device,
struct vk_sync *vk_sync)
{
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
if (err) {
return vk_errorf(dev, VK_ERROR_UNKNOWN,
"DRM_IOCTL_SYNCOBJ_RESET failed: %m");
} else {
sync->state = TU_TIMELINE_SYNC_STATE_RESET;
}
return VK_SUCCESS;
}
static VkResult
drm_syncobj_wait(struct tu_device *device,
uint32_t *handles, uint32_t count_handles,
uint64_t timeout_nsec, bool wait_all)
{
uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
/* syncobj absolute timeouts are signed. clamp OS_TIMEOUT_INFINITE down. */
timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX);
int err = drmSyncobjWait(device->fd, handles,
count_handles, timeout_nsec,
syncobj_wait_flags,
NULL /* first_signaled */);
if (err && errno == ETIME) {
return VK_TIMEOUT;
} else if (err) {
return vk_errorf(device, VK_ERROR_UNKNOWN,
"DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
}
return VK_SUCCESS;
}
/* Based on anv_bo_sync_wait */
static VkResult
tu_timeline_sync_wait(struct vk_device *vk_device,
uint32_t wait_count,
const struct vk_sync_wait *waits,
enum vk_sync_wait_flags wait_flags,
uint64_t abs_timeout_ns)
{
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
uint32_t handles[wait_count];
uint32_t submit_count;
VkResult ret = VK_SUCCESS;
uint32_t pending = wait_count;
struct tu_timeline_sync *submitted_syncs[wait_count];
while (pending) {
pending = 0;
submit_count = 0;
for (unsigned i = 0; i < wait_count; ++i) {
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
pending++;
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
if (wait_flags & VK_SYNC_WAIT_ANY)
return VK_SUCCESS;
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
handles[submit_count] = sync->syncobj;
submitted_syncs[submit_count++] = sync;
}
}
}
if (submit_count > 0) {
do {
ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
} while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
if (ret == VK_SUCCESS) {
for (unsigned i = 0; i < submit_count; ++i) {
struct tu_timeline_sync *sync = submitted_syncs[i];
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
}
} else {
/* return error covering timeout */
return ret;
}
} else if (pending > 0) {
/* If we've hit this then someone decided to vkWaitForFences before
* they've actually submitted any of them to a queue. This is a
* fairly pessimal case, so it's ok to lock here and use a standard
* pthreads condition variable.
*/
pthread_mutex_lock(&dev->submit_mutex);
/* It's possible that some of the fences have changed state since the
* last time we checked. Now that we have the lock, check for
* pending fences again and don't wait if it's changed.
*/
uint32_t now_pending = 0;
for (uint32_t i = 0; i < wait_count; i++) {
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
now_pending++;
}
assert(now_pending <= pending);
if (now_pending == pending) {
struct timespec abstime = {
.tv_sec = abs_timeout_ns / NSEC_PER_SEC,
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
};
ASSERTED int ret;
ret = pthread_cond_timedwait(&dev->timeline_cond,
&dev->submit_mutex, &abstime);
assert(ret != EINVAL);
if (os_time_get_nano() >= abs_timeout_ns) {
pthread_mutex_unlock(&dev->submit_mutex);
return VK_TIMEOUT;
}
}
pthread_mutex_unlock(&dev->submit_mutex);
}
}
return ret;
}
const struct vk_sync_type tu_timeline_sync_type = {
.size = sizeof(struct tu_timeline_sync),
.features = (enum vk_sync_features)(
VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT |
VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT |
VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY |
VK_SYNC_FEATURE_WAIT_PENDING),
.init = tu_timeline_sync_init,
.finish = tu_timeline_sync_finish,
.reset = tu_timeline_sync_reset,
.wait_many = tu_timeline_sync_wait,
};