src/panfrost/encoder/pan_bo.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright 2019 Collabora, Ltd.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
  * Authors (Collabora):
  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  */
 #include <errno.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <xf86drm.h>
 #include <pthread.h>
 #include "drm-uapi/panfrost_drm.h"

 #include "pan_bo.h"

 #include "os/os_mman.h"

 #include "util/u_inlines.h"
 #include "util/u_math.h"

 /* This file implements a userspace BO cache. Allocating and freeing
  * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
  * adds more work than we would like at this point. So caching BOs in userspace
  * solves both of these problems and does not require kernel updates.
  *
  * Cached BOs are sorted into a bucket based on rounding their size down to the
  * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
  * objects. Putting a BO into the cache is accomplished by adding it to the
  * corresponding bucket. Getting a BO from the cache consists of finding the
  * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
  * BO and removing it from the bucket. We special case evicting all BOs from
  * the cache, since that's what helpful in practice and avoids extra logic
  * around the linked list.
  */

 static struct panfrost_bo *
 panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
                   uint32_t flags)
 {
         struct drm_panfrost_create_bo create_bo = { .size = size };
         struct panfrost_bo *bo;
         int ret;

         if (dev->kernel_version->version_major > 1 ||
             dev->kernel_version->version_minor >= 1) {
                 if (flags & PAN_BO_GROWABLE)
                         create_bo.flags |= PANFROST_BO_HEAP;
                 if (!(flags & PAN_BO_EXECUTE))
                         create_bo.flags |= PANFROST_BO_NOEXEC;
         }

         ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
         if (ret) {
                 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
                 return NULL;
         }

         bo = rzalloc(dev->memctx, struct panfrost_bo);
         assert(bo);
         bo->size = create_bo.size;
         bo->gpu = create_bo.offset;
         bo->gem_handle = create_bo.handle;
         bo->flags = flags;
         bo->dev = dev;
         return bo;
 }

 static void
 panfrost_bo_free(struct panfrost_bo *bo)
 {
         struct drm_gem_close gem_close = { .handle = bo->gem_handle };
         int ret;

         ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
         if (ret) {
                 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
                 assert(0);
         }

         ralloc_free(bo);
 }

 /* Returns true if the BO is ready, false otherwise.
  * access_type is encoding the type of access one wants to ensure is done.
  * Say you want to make sure all writers are done writing, you should pass
  * PAN_BO_ACCESS_WRITE.
  * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
  * PAN_BO_ACCESS_READ would work too as waiting for readers implies
  * waiting for writers as well, but we want to make things explicit and waiting
  * only for readers is impossible.
  */
 bool
 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
                  uint32_t access_type)
 {
         struct drm_panfrost_wait_bo req = {
                 .handle = bo->gem_handle,
 		.timeout_ns = timeout_ns,
         };
         int ret;

         assert(access_type == PAN_BO_ACCESS_WRITE ||
                access_type == PAN_BO_ACCESS_RW);

         /* If the BO has been exported or imported we can't rely on the cached
          * state, we need to call the WAIT_BO ioctl.
          */
         if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
                 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
                 if (!bo->gpu_access)
                         return true;

                 /* If the caller only wants to wait for writers and no
                  * writes are pending, we don't have to wait.
                  */
                 if (access_type == PAN_BO_ACCESS_WRITE &&
                     !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
                         return true;
         }

         /* The ioctl returns >= 0 value when the BO we are waiting for is ready
          * -1 otherwise.
          */
         ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
         if (ret != -1) {
                 /* Set gpu_access to 0 so that the next call to bo_wait()
                  * doesn't have to call the WAIT_BO ioctl.
                  */
                 bo->gpu_access = 0;
                 return true;
         }

         /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
          * is invalid, which shouldn't happen here.
          */
         assert(errno == ETIMEDOUT || errno == EBUSY);
         return false;
 }

 /* Helper to calculate the bucket index of a BO */

 static unsigned
 pan_bucket_index(unsigned size)
 {
         /* Round down to POT to compute a bucket index */

         unsigned bucket_index = util_logbase2(size);

         /* Clamp the bucket index; all huge allocations will be
          * sorted into the largest bucket */

         bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);

         /* The minimum bucket size must equal the minimum allocation
          * size; the maximum we clamped */

         assert(bucket_index >= MIN_BO_CACHE_BUCKET);
         assert(bucket_index <= MAX_BO_CACHE_BUCKET);

         /* Reindex from 0 */
         return (bucket_index - MIN_BO_CACHE_BUCKET);
 }

 static struct list_head *
 pan_bucket(struct panfrost_device *dev, unsigned size)
 {
         return &dev->bo_cache.buckets[pan_bucket_index(size)];
 }

 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
  * BO cache. If it succeeds, it returns that BO and removes the BO from the
  * cache. If it fails, it returns NULL signaling the caller to allocate a new
  * BO. */

 static struct panfrost_bo *
 panfrost_bo_cache_fetch(struct panfrost_device *dev,
                         size_t size, uint32_t flags, bool dontwait)
 {
         pthread_mutex_lock(&dev->bo_cache.lock);
         struct list_head *bucket = pan_bucket(dev, size);
         struct panfrost_bo *bo = NULL;

         /* Iterate the bucket looking for something suitable */
         list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
                                  bucket_link) {
                 if (entry->size < size || entry->flags != flags)
                         continue;

                 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
                                       PAN_BO_ACCESS_RW))
                         continue;

                 struct drm_panfrost_madvise madv = {
                         .handle = entry->gem_handle,
                         .madv = PANFROST_MADV_WILLNEED,
                 };
                 int ret;

                 /* This one works, splice it out of the cache */
                 list_del(&entry->bucket_link);
                 list_del(&entry->lru_link);

                 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
                 if (!ret && !madv.retained) {
                         panfrost_bo_free(entry);
                         continue;
                 }
                 /* Let's go! */
                 bo = entry;
                 break;
         }
         pthread_mutex_unlock(&dev->bo_cache.lock);

         return bo;
 }

 static void
 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
 {
         struct timespec time;

         clock_gettime(CLOCK_MONOTONIC, &time);
         list_for_each_entry_safe(struct panfrost_bo, entry,
                                  &dev->bo_cache.lru, lru_link) {
                 /* We want all entries that have been used more than 1 sec
                  * ago to be dropped, others can be kept.
                  * Note the <= 2 check and not <= 1. It's here to account for
                  * the fact that we're only testing ->tv_sec, not ->tv_nsec.
                  * That means we might keep entries that are between 1 and 2
                  * seconds old, but we don't really care, as long as unused BOs
                  * are dropped at some point.
                  */
                 if (time.tv_sec - entry->last_used <= 2)
                         break;

                 list_del(&entry->bucket_link);
                 list_del(&entry->lru_link);
                 panfrost_bo_free(entry);
         }
 }

 /* Tries to add a BO to the cache. Returns if it was
  * successful */

 static bool
 panfrost_bo_cache_put(struct panfrost_bo *bo)
 {
         struct panfrost_device *dev = bo->dev;

         if (bo->flags & PAN_BO_DONT_REUSE)
                 return false;

         pthread_mutex_lock(&dev->bo_cache.lock);
         struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
         struct drm_panfrost_madvise madv;
         struct timespec time;

         madv.handle = bo->gem_handle;
         madv.madv = PANFROST_MADV_DONTNEED;
 	madv.retained = 0;

         drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);

         /* Add us to the bucket */
         list_addtail(&bo->bucket_link, bucket);

         /* Add us to the LRU list and update the last_used field. */
         list_addtail(&bo->lru_link, &dev->bo_cache.lru);
         clock_gettime(CLOCK_MONOTONIC, &time);
         bo->last_used = time.tv_sec;

         /* Let's do some cleanup in the BO cache while we hold the
          * lock.
          */
         panfrost_bo_cache_evict_stale_bos(dev);
         pthread_mutex_unlock(&dev->bo_cache.lock);

         return true;
 }

 /* Evicts all BOs from the cache. Called during context
  * destroy or during low-memory situations (to free up
  * memory that may be unused by us just sitting in our
  * cache, but still reserved from the perspective of the
  * OS) */

 void
 panfrost_bo_cache_evict_all(
                 struct panfrost_device *dev)
 {
         pthread_mutex_lock(&dev->bo_cache.lock);
         for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
                 struct list_head *bucket = &dev->bo_cache.buckets[i];

                 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
                                          bucket_link) {
                         list_del(&entry->bucket_link);
                         list_del(&entry->lru_link);
                         panfrost_bo_free(entry);
                 }
         }
         pthread_mutex_unlock(&dev->bo_cache.lock);
 }

 void
 panfrost_bo_mmap(struct panfrost_bo *bo)
 {
         struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
         int ret;

         if (bo->cpu)
                 return;

         ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
         if (ret) {
                 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
                 assert(0);
         }

         bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
                           bo->dev->fd, mmap_bo.offset);
         if (bo->cpu == MAP_FAILED) {
                 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
                 assert(0);
         }
 }

 static void
 panfrost_bo_munmap(struct panfrost_bo *bo)
 {
         if (!bo->cpu)
                 return;

         if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
                 perror("munmap");
                 abort();
         }

         bo->cpu = NULL;
 }

 struct panfrost_bo *
 panfrost_bo_create(struct panfrost_device *dev, size_t size,
                    uint32_t flags)
 {
         struct panfrost_bo *bo;

         /* Kernel will fail (confusingly) with EPERM otherwise */
         assert(size > 0);

         /* To maximize BO cache usage, don't allocate tiny BOs */
         size = MAX2(size, 4096);

         /* GROWABLE BOs cannot be mmapped */
         if (flags & PAN_BO_GROWABLE)
                 assert(flags & PAN_BO_INVISIBLE);

         /* Before creating a BO, we first want to check the cache but without
          * waiting for BO readiness (BOs in the cache can still be referenced
          * by jobs that are not finished yet).
          * If the cached allocation fails we fall back on fresh BO allocation,
          * and if that fails too, we try one more time to allocate from the
          * cache, but this time we accept to wait.
          */
         bo = panfrost_bo_cache_fetch(dev, size, flags, true);
         if (!bo)
                 bo = panfrost_bo_alloc(dev, size, flags);
         if (!bo)
                 bo = panfrost_bo_cache_fetch(dev, size, flags, false);

         if (!bo)
                 fprintf(stderr, "BO creation failed\n");

         assert(bo);

         /* Only mmap now if we know we need to. For CPU-invisible buffers, we
          * never map since we don't care about their contents; they're purely
          * for GPU-internal use. But we do trace them anyway. */

         if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
                 panfrost_bo_mmap(bo);

         p_atomic_set(&bo->refcnt, 1);

         pthread_mutex_lock(&dev->active_bos_lock);
         _mesa_set_add(bo->dev->active_bos, bo);
         pthread_mutex_unlock(&dev->active_bos_lock);

         return bo;
 }

 void
 panfrost_bo_reference(struct panfrost_bo *bo)
 {
         if (bo) {
                 ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
                 assert(count != 1);
         }
 }

 void
 panfrost_bo_unreference(struct panfrost_bo *bo)
 {
         if (!bo)
                 return;

         /* Don't return to cache if there are still references */
         if (p_atomic_dec_return(&bo->refcnt))
                 return;

         struct panfrost_device *dev = bo->dev;

         pthread_mutex_lock(&dev->active_bos_lock);
         /* Someone might have imported this BO while we were waiting for the
          * lock, let's make sure it's still not referenced before freeing it.
          */
         if (p_atomic_read(&bo->refcnt) == 0) {
                 _mesa_set_remove_key(bo->dev->active_bos, bo);

                 /* When the reference count goes to zero, we need to cleanup */
                 panfrost_bo_munmap(bo);

                 /* Rather than freeing the BO now, we'll cache the BO for later
                  * allocations if we're allowed to.
                  */
                 if (!panfrost_bo_cache_put(bo))
                         panfrost_bo_free(bo);
         }
         pthread_mutex_unlock(&dev->active_bos_lock);
 }

 struct panfrost_bo *
 panfrost_bo_import(struct panfrost_device *dev, int fd)
 {
         struct panfrost_bo *bo, *newbo = rzalloc(dev->memctx, struct panfrost_bo);
         struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
         struct set_entry *entry;
         ASSERTED int ret;
         unsigned gem_handle;

         newbo->dev = dev;

         ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
         assert(!ret);

         newbo->gem_handle = gem_handle;

         pthread_mutex_lock(&dev->active_bos_lock);
         entry = _mesa_set_search_or_add(dev->active_bos, newbo);
         assert(entry);
         bo = (struct panfrost_bo *)entry->key;
         if (newbo == bo) {
                 get_bo_offset.handle = gem_handle;
                 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
                 assert(!ret);

                 newbo->gpu = (mali_ptr) get_bo_offset.offset;
                 newbo->size = lseek(fd, 0, SEEK_END);
                 newbo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED;
                 assert(newbo->size > 0);
                 p_atomic_set(&newbo->refcnt, 1);
                 // TODO map and unmap on demand?
                 panfrost_bo_mmap(newbo);
         } else {
                 ralloc_free(newbo);
                 /* bo->refcnt == 0 can happen if the BO
                  * was being released but panfrost_bo_import() acquired the
                  * lock before panfrost_bo_unreference(). In that case, refcnt
                  * is 0 and we can't use panfrost_bo_reference() directly, we
                  * have to re-initialize the refcnt().
                  * Note that panfrost_bo_unreference() checks
                  * refcnt value just after acquiring the lock to
                  * make sure the object is not freed if panfrost_bo_import()
                  * acquired it in the meantime.
                  */
                 if (p_atomic_read(&bo->refcnt) == 0)
                         p_atomic_set(&newbo->refcnt, 1);
                 else
                         panfrost_bo_reference(bo);
                 assert(bo->cpu);
         }
         pthread_mutex_unlock(&dev->active_bos_lock);

         return bo;
 }

 int
 panfrost_bo_export(struct panfrost_bo *bo)
 {
         struct drm_prime_handle args = {
                 .handle = bo->gem_handle,
                 .flags = DRM_CLOEXEC,
         };

         int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
         if (ret == -1)
                 return -1;

         bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED;
         return args.fd;
 }
	/*
	* Copyright 2019 Collabora, Ltd.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*
	* Authors (Collabora):
	* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	*/
	#include <errno.h>
	#include <stdio.h>
	#include <fcntl.h>
	#include <xf86drm.h>
	#include <pthread.h>
	#include "drm-uapi/panfrost_drm.h"

	#include "pan_bo.h"

	#include "os/os_mman.h"

	#include "util/u_inlines.h"
	#include "util/u_math.h"

	/* This file implements a userspace BO cache. Allocating and freeing
	* GPU-visible buffers is very expensive, and even the extra kernel roundtrips
	* adds more work than we would like at this point. So caching BOs in userspace
	* solves both of these problems and does not require kernel updates.
	*
	* Cached BOs are sorted into a bucket based on rounding their size down to the
	* nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
	* objects. Putting a BO into the cache is accomplished by adding it to the
	* corresponding bucket. Getting a BO from the cache consists of finding the
	* appropriate bucket and sorting. A cache eviction is a kernel-level free of a
	* BO and removing it from the bucket. We special case evicting all BOs from
	* the cache, since that's what helpful in practice and avoids extra logic
	* around the linked list.
	*/

	static struct panfrost_bo *
	panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
	uint32_t flags)
	{
	struct drm_panfrost_create_bo create_bo = { .size = size };
	struct panfrost_bo *bo;
	int ret;

	if (dev->kernel_version->version_major > 1 \|\|
	dev->kernel_version->version_minor >= 1) {
	if (flags & PAN_BO_GROWABLE)
	create_bo.flags \|= PANFROST_BO_HEAP;
	if (!(flags & PAN_BO_EXECUTE))
	create_bo.flags \|= PANFROST_BO_NOEXEC;
	}

	ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
	if (ret) {
	fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
	return NULL;
	}

	bo = rzalloc(dev->memctx, struct panfrost_bo);
	assert(bo);
	bo->size = create_bo.size;
	bo->gpu = create_bo.offset;
	bo->gem_handle = create_bo.handle;
	bo->flags = flags;
	bo->dev = dev;
	return bo;
	}

	static void
	panfrost_bo_free(struct panfrost_bo *bo)
	{
	struct drm_gem_close gem_close = { .handle = bo->gem_handle };
	int ret;

	ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
	if (ret) {
	fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
	assert(0);
	}

	ralloc_free(bo);
	}

	/* Returns true if the BO is ready, false otherwise.
	* access_type is encoding the type of access one wants to ensure is done.
	* Say you want to make sure all writers are done writing, you should pass
	* PAN_BO_ACCESS_WRITE.
	* If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
	* PAN_BO_ACCESS_READ would work too as waiting for readers implies
	* waiting for writers as well, but we want to make things explicit and waiting
	* only for readers is impossible.
	*/
	bool
	panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
	uint32_t access_type)
	{
	struct drm_panfrost_wait_bo req = {
	.handle = bo->gem_handle,
	.timeout_ns = timeout_ns,
	};
	int ret;

	assert(access_type == PAN_BO_ACCESS_WRITE \|\|
	access_type == PAN_BO_ACCESS_RW);

	/* If the BO has been exported or imported we can't rely on the cached
	* state, we need to call the WAIT_BO ioctl.
	*/
	if (!(bo->flags & (PAN_BO_IMPORTED \| PAN_BO_EXPORTED))) {
	/* If ->gpu_access is 0, the BO is idle, no need to wait. */
	if (!bo->gpu_access)
	return true;

	/* If the caller only wants to wait for writers and no
	* writes are pending, we don't have to wait.
	*/
	if (access_type == PAN_BO_ACCESS_WRITE &&
	!(bo->gpu_access & PAN_BO_ACCESS_WRITE))
	return true;
	}

	/* The ioctl returns >= 0 value when the BO we are waiting for is ready
	* -1 otherwise.
	*/
	ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
	if (ret != -1) {
	/* Set gpu_access to 0 so that the next call to bo_wait()
	* doesn't have to call the WAIT_BO ioctl.
	*/
	bo->gpu_access = 0;
	return true;
	}

	/* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
	* is invalid, which shouldn't happen here.
	*/
	assert(errno == ETIMEDOUT \|\| errno == EBUSY);
	return false;
	}

	/* Helper to calculate the bucket index of a BO */

	static unsigned
	pan_bucket_index(unsigned size)
	{
	/* Round down to POT to compute a bucket index */

	unsigned bucket_index = util_logbase2(size);

	/* Clamp the bucket index; all huge allocations will be
	* sorted into the largest bucket */

	bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);

	/* The minimum bucket size must equal the minimum allocation
	* size; the maximum we clamped */

	assert(bucket_index >= MIN_BO_CACHE_BUCKET);
	assert(bucket_index <= MAX_BO_CACHE_BUCKET);

	/* Reindex from 0 */
	return (bucket_index - MIN_BO_CACHE_BUCKET);
	}

	static struct list_head *
	pan_bucket(struct panfrost_device *dev, unsigned size)
	{
	return &dev->bo_cache.buckets[pan_bucket_index(size)];
	}

	/* Tries to fetch a BO of sufficient size with the appropriate flags from the
	* BO cache. If it succeeds, it returns that BO and removes the BO from the
	* cache. If it fails, it returns NULL signaling the caller to allocate a new
	* BO. */

	static struct panfrost_bo *
	panfrost_bo_cache_fetch(struct panfrost_device *dev,
	size_t size, uint32_t flags, bool dontwait)
	{
	pthread_mutex_lock(&dev->bo_cache.lock);
	struct list_head *bucket = pan_bucket(dev, size);
	struct panfrost_bo *bo = NULL;

	/* Iterate the bucket looking for something suitable */
	list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
	bucket_link) {
	if (entry->size < size \|\| entry->flags != flags)
	continue;

	if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
	PAN_BO_ACCESS_RW))
	continue;

	struct drm_panfrost_madvise madv = {
	.handle = entry->gem_handle,
	.madv = PANFROST_MADV_WILLNEED,
	};
	int ret;

	/* This one works, splice it out of the cache */
	list_del(&entry->bucket_link);
	list_del(&entry->lru_link);

	ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
	if (!ret && !madv.retained) {
	panfrost_bo_free(entry);
	continue;
	}
	/* Let's go! */
	bo = entry;
	break;
	}
	pthread_mutex_unlock(&dev->bo_cache.lock);

	return bo;
	}

	static void
	panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
	{
	struct timespec time;

	clock_gettime(CLOCK_MONOTONIC, &time);
	list_for_each_entry_safe(struct panfrost_bo, entry,
	&dev->bo_cache.lru, lru_link) {
	/* We want all entries that have been used more than 1 sec
	* ago to be dropped, others can be kept.
	* Note the <= 2 check and not <= 1. It's here to account for
	* the fact that we're only testing ->tv_sec, not ->tv_nsec.
	* That means we might keep entries that are between 1 and 2
	* seconds old, but we don't really care, as long as unused BOs
	* are dropped at some point.
	*/
	if (time.tv_sec - entry->last_used <= 2)
	break;

	list_del(&entry->bucket_link);
	list_del(&entry->lru_link);
	panfrost_bo_free(entry);
	}
	}

	/* Tries to add a BO to the cache. Returns if it was
	* successful */

	static bool
	panfrost_bo_cache_put(struct panfrost_bo *bo)
	{
	struct panfrost_device *dev = bo->dev;

	if (bo->flags & PAN_BO_DONT_REUSE)
	return false;

	pthread_mutex_lock(&dev->bo_cache.lock);
	struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
	struct drm_panfrost_madvise madv;
	struct timespec time;

	madv.handle = bo->gem_handle;
	madv.madv = PANFROST_MADV_DONTNEED;
	madv.retained = 0;

	drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);

	/* Add us to the bucket */
	list_addtail(&bo->bucket_link, bucket);

	/* Add us to the LRU list and update the last_used field. */
	list_addtail(&bo->lru_link, &dev->bo_cache.lru);
	clock_gettime(CLOCK_MONOTONIC, &time);
	bo->last_used = time.tv_sec;

	/* Let's do some cleanup in the BO cache while we hold the
	* lock.
	*/
	panfrost_bo_cache_evict_stale_bos(dev);
	pthread_mutex_unlock(&dev->bo_cache.lock);

	return true;
	}

	/* Evicts all BOs from the cache. Called during context
	* destroy or during low-memory situations (to free up
	* memory that may be unused by us just sitting in our
	* cache, but still reserved from the perspective of the
	* OS) */

	void
	panfrost_bo_cache_evict_all(
	struct panfrost_device *dev)
	{
	pthread_mutex_lock(&dev->bo_cache.lock);
	for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
	struct list_head *bucket = &dev->bo_cache.buckets[i];

	list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
	bucket_link) {
	list_del(&entry->bucket_link);
	list_del(&entry->lru_link);
	panfrost_bo_free(entry);
	}
	}
	pthread_mutex_unlock(&dev->bo_cache.lock);
	}

	void
	panfrost_bo_mmap(struct panfrost_bo *bo)
	{
	struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
	int ret;

	if (bo->cpu)
	return;

	ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
	if (ret) {
	fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
	assert(0);
	}

	bo->cpu = os_mmap(NULL, bo->size, PROT_READ \| PROT_WRITE, MAP_SHARED,
	bo->dev->fd, mmap_bo.offset);
	if (bo->cpu == MAP_FAILED) {
	fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
	assert(0);
	}
	}

	static void
	panfrost_bo_munmap(struct panfrost_bo *bo)
	{
	if (!bo->cpu)
	return;

	if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
	perror("munmap");
	abort();
	}

	bo->cpu = NULL;
	}

	struct panfrost_bo *
	panfrost_bo_create(struct panfrost_device *dev, size_t size,
	uint32_t flags)
	{
	struct panfrost_bo *bo;

	/* Kernel will fail (confusingly) with EPERM otherwise */
	assert(size > 0);

	/* To maximize BO cache usage, don't allocate tiny BOs */
	size = MAX2(size, 4096);

	/* GROWABLE BOs cannot be mmapped */
	if (flags & PAN_BO_GROWABLE)
	assert(flags & PAN_BO_INVISIBLE);

	/* Before creating a BO, we first want to check the cache but without
	* waiting for BO readiness (BOs in the cache can still be referenced
	* by jobs that are not finished yet).
	* If the cached allocation fails we fall back on fresh BO allocation,
	* and if that fails too, we try one more time to allocate from the
	* cache, but this time we accept to wait.
	*/
	bo = panfrost_bo_cache_fetch(dev, size, flags, true);
	if (!bo)
	bo = panfrost_bo_alloc(dev, size, flags);
	if (!bo)
	bo = panfrost_bo_cache_fetch(dev, size, flags, false);

	if (!bo)
	fprintf(stderr, "BO creation failed\n");

	assert(bo);

	/* Only mmap now if we know we need to. For CPU-invisible buffers, we
	* never map since we don't care about their contents; they're purely
	* for GPU-internal use. But we do trace them anyway. */

	if (!(flags & (PAN_BO_INVISIBLE \| PAN_BO_DELAY_MMAP)))
	panfrost_bo_mmap(bo);

	p_atomic_set(&bo->refcnt, 1);

	pthread_mutex_lock(&dev->active_bos_lock);
	_mesa_set_add(bo->dev->active_bos, bo);
	pthread_mutex_unlock(&dev->active_bos_lock);

	return bo;
	}

	void
	panfrost_bo_reference(struct panfrost_bo *bo)
	{
	if (bo) {
	ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
	assert(count != 1);
	}
	}

	void
	panfrost_bo_unreference(struct panfrost_bo *bo)
	{
	if (!bo)
	return;

	/* Don't return to cache if there are still references */
	if (p_atomic_dec_return(&bo->refcnt))
	return;

	struct panfrost_device *dev = bo->dev;

	pthread_mutex_lock(&dev->active_bos_lock);
	/* Someone might have imported this BO while we were waiting for the
	* lock, let's make sure it's still not referenced before freeing it.
	*/
	if (p_atomic_read(&bo->refcnt) == 0) {
	_mesa_set_remove_key(bo->dev->active_bos, bo);

	/* When the reference count goes to zero, we need to cleanup */
	panfrost_bo_munmap(bo);

	/* Rather than freeing the BO now, we'll cache the BO for later
	* allocations if we're allowed to.
	*/
	if (!panfrost_bo_cache_put(bo))
	panfrost_bo_free(bo);
	}
	pthread_mutex_unlock(&dev->active_bos_lock);
	}

	struct panfrost_bo *
	panfrost_bo_import(struct panfrost_device *dev, int fd)
	{
	struct panfrost_bo bo, newbo = rzalloc(dev->memctx, struct panfrost_bo);
	struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
	struct set_entry *entry;
	ASSERTED int ret;
	unsigned gem_handle;

	newbo->dev = dev;

	ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
	assert(!ret);

	newbo->gem_handle = gem_handle;

	pthread_mutex_lock(&dev->active_bos_lock);
	entry = _mesa_set_search_or_add(dev->active_bos, newbo);
	assert(entry);
	bo = (struct panfrost_bo *)entry->key;
	if (newbo == bo) {
	get_bo_offset.handle = gem_handle;
	ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
	assert(!ret);

	newbo->gpu = (mali_ptr) get_bo_offset.offset;
	newbo->size = lseek(fd, 0, SEEK_END);
	newbo->flags \|= PAN_BO_DONT_REUSE \| PAN_BO_IMPORTED;
	assert(newbo->size > 0);
	p_atomic_set(&newbo->refcnt, 1);
	// TODO map and unmap on demand?
	panfrost_bo_mmap(newbo);
	} else {
	ralloc_free(newbo);
	/* bo->refcnt == 0 can happen if the BO
	* was being released but panfrost_bo_import() acquired the
	* lock before panfrost_bo_unreference(). In that case, refcnt
	* is 0 and we can't use panfrost_bo_reference() directly, we
	* have to re-initialize the refcnt().
	* Note that panfrost_bo_unreference() checks
	* refcnt value just after acquiring the lock to
	* make sure the object is not freed if panfrost_bo_import()
	* acquired it in the meantime.
	*/
	if (p_atomic_read(&bo->refcnt) == 0)
	p_atomic_set(&newbo->refcnt, 1);
	else
	panfrost_bo_reference(bo);
	assert(bo->cpu);
	}
	pthread_mutex_unlock(&dev->active_bos_lock);

	return bo;
	}

	int
	panfrost_bo_export(struct panfrost_bo *bo)
	{
	struct drm_prime_handle args = {
	.handle = bo->gem_handle,
	.flags = DRM_CLOEXEC,
	};

	int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
	if (ret == -1)
	return -1;

	bo->flags \|= PAN_BO_DONT_REUSE \| PAN_BO_EXPORTED;
	return args.fd;
	}