MIDCET-4324/GPUCORE-35490: Prevent incorrect clearing of no user free property
Our code didn't check if KBASE_REG_NO_USER_FREE was already set
in two code paths (in mali_kbase_csf_tiler_heap.c and
mali_kbase_csf.c). This could be used to maliciously clear
that KBASE_REG_NO_USER_FREE flag.
We add a new refcount for this no user free property, replacing the
KBASE_REG_NO_USER_FREE flag, as a stopgap solution.
In addition to this:
- fix a possible race condition in JIT alloc and tiler
heap init where another thread could take a no user free
reference while the DONT_NEED flag not yet being set
- fix another issue in JIT alloc where reg->flags
was updated without taking the appropriate lock
- move the no user free decref to remove_queue
to clean up context termination code
- refactor memory helpers
Also includes:
- GPUCORE-35469: Fix list del corruption issue in shrinker callback
- GPUCORE-35221 Defer the freeing of VA regions in the chunked tiler heap shrinker callback
- GPUCORE-35499: Fix GROUP_SUSPEND kcpu suspend handling to prevent UAF
- GPUCORE-35268: Fix UAF due to use of MEM_FLAGS_CHANGE ioctl for JIT allocs
(cherry picked from commit 7a1dc910a6a8c9c5aa06677c936c8ad6e9c369ab)
Bug: 260123539
Provenance: https://code.ipdelivery.arm.com/c/GPU/mali-ddk/+/4801
Change-Id: I7e2349b135e61054f567bdf0577d27eb224d2b12
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 12ab66f..2349436 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -453,6 +453,17 @@
/* The queue can't still be on the per context list. */
WARN_ON(!list_empty(&queue->link));
WARN_ON(queue->group);
+
+ /* After this the Userspace would be able to free the
+ * memory for GPU queue. In case the Userspace missed
+ * terminating the queue, the cleanup will happen on
+ * context termination where tear down of region tracker
+ * would free up the GPU queue memory.
+ */
+ kbase_gpu_vm_lock(queue->kctx);
+ kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg);
+ kbase_gpu_vm_unlock(queue->kctx);
+
kfree(queue);
}
}
@@ -506,7 +517,8 @@
region = kbase_region_tracker_find_region_enclosing_address(kctx,
queue_addr);
- if (kbase_is_region_invalid_or_free(region)) {
+ if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) ||
+ region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
ret = -ENOENT;
goto out_unlock_vm;
}
@@ -555,7 +567,7 @@
queue->kctx = kctx;
queue->base_addr = queue_addr;
- queue->queue_reg = region;
+ queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region);
queue->size = (queue_size << PAGE_SHIFT);
queue->csi_index = KBASEP_IF_NR_INVALID;
queue->enabled = false;
@@ -589,7 +601,6 @@
queue->extract_ofs = 0;
- region->flags |= KBASE_REG_NO_USER_FREE;
region->user_data = queue;
/* Initialize the cs_trace configuration parameters, When buffer_size
@@ -683,16 +694,8 @@
unbind_queue(kctx, queue);
kbase_gpu_vm_lock(kctx);
- if (!WARN_ON(!queue->queue_reg)) {
- /* After this the Userspace would be able to free the
- * memory for GPU queue. In case the Userspace missed
- * terminating the queue, the cleanup will happen on
- * context termination where tear down of region tracker
- * would free up the GPU queue memory.
- */
- queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ if (!WARN_ON(!queue->queue_reg))
queue->queue_reg->user_data = NULL;
- }
kbase_gpu_vm_unlock(kctx);
dev_dbg(kctx->kbdev->dev,
@@ -1941,9 +1944,7 @@
* only one reference left that was taken when queue was
* registered.
*/
- if (atomic_read(&queue->refcount) != 1)
- dev_warn(kctx->kbdev->dev,
- "Releasing queue with incorrect refcounting!\n");
+ WARN_ON(atomic_read(&queue->refcount) != 1);
list_del_init(&queue->link);
release_queue(queue);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index 828e8a6..f94806e 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -144,7 +144,14 @@
if (ctx_alloc->region) {
kbase_gpu_vm_lock(kctx);
- ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * We can't enforce (nor check) the no_user_free refcount
+ * to be 0 here as other code regions can take such a reference.
+ * Anyway, this isn't an issue as the region will eventually
+ * be freed by the region tracker if its refcount didn't drop
+ * to 0.
+ */
+ kbase_va_region_no_user_free_put(kctx, ctx_alloc->region);
kbase_mem_free_region(kctx, ctx_alloc->region);
kbase_gpu_vm_unlock(kctx);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 2b5308f..4b05da8 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -672,9 +672,8 @@
(kbase_reg_current_backed_size(reg) < nr_pages) ||
!(reg->flags & KBASE_REG_CPU_WR) ||
(reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
- (reg->flags & KBASE_REG_DONT_NEED) ||
- (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ||
- (reg->flags & KBASE_REG_NO_USER_FREE)) {
+ (kbase_is_region_shrinkable(reg)) ||
+ (kbase_va_region_is_no_user_free(kctx, reg))) {
ret = -EINVAL;
goto out_clean_pages;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 905923a..d33fa03 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -7213,6 +7213,9 @@
/* Attempt freeing all the counted heap pages from the kctx */
u32 n = shrink_ctrl->scan_cb(kctx, info->nr_scan_pages);
+ if (n)
+ schedule_work(&kctx->jit_work);
+
/* The free is attempted on all the counted heap pages. If the kctx has
* all its counted heap pages freed, or, it can't offer anymore, drop
* it from the reclaim manger, otherwise leave it remaining in. If the
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index b0d3825..70ecd80 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -74,6 +74,33 @@
}
/**
+ * remove_external_chunk_mappings - Remove external mappings from a chunk that
+ * is being transitioned to the tiler heap
+ * memory system.
+ *
+ * @kctx: kbase context the chunk belongs to.
+ * @chunk: The chunk whose external mappings are going to be removed.
+ *
+ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
+ * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
+ * parts of kbase outside of tiler heap management should not take references on its physical
+ * pages, and should not modify them.
+ */
+static void remove_external_chunk_mappings(struct kbase_context *const kctx,
+ struct kbase_csf_tiler_heap_chunk *chunk)
+{
+ lockdep_assert_held(&kctx->reg_lock);
+
+ if (chunk->region->cpu_alloc != NULL) {
+ kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
+ chunk->region->cpu_alloc->nents);
+ }
+ chunk->region->flags |= KBASE_REG_DONT_NEED;
+
+ dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
+}
+
+/**
* link_chunk - Link a chunk into a tiler heap
*
* @heap: Pointer to the tiler heap.
@@ -93,19 +120,12 @@
if (prev) {
struct kbase_context *const kctx = heap->kctx;
- struct kbase_vmap_struct map;
- u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va,
- sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map);
+ u64 *prev_hdr = prev->map.addr;
- if (unlikely(!prev_hdr)) {
- dev_err(kctx->kbdev->dev,
- "Failed to map tiler heap chunk 0x%llX\n",
- prev->gpu_va);
- return -ENOMEM;
- }
+ WARN((prev->region->flags & KBASE_REG_CPU_CACHED),
+ "Cannot support CPU cached chunks without sync operations");
*prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
- kbase_vunmap(kctx, &map);
dev_dbg(kctx->kbdev->dev,
"Linked tiler heap chunks, 0x%llX -> 0x%llX\n",
@@ -132,160 +152,274 @@
static int init_chunk(struct kbase_csf_tiler_heap *const heap,
struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev)
{
- struct kbase_vmap_struct map;
- struct u64 *chunk_hdr = NULL;
+ int err = 0;
+ u64 *chunk_hdr;
struct kbase_context *const kctx = heap->kctx;
+ lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+
if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) {
dev_err(kctx->kbdev->dev,
"Tiler heap chunk address is unusable\n");
return -EINVAL;
}
- chunk_hdr = kbase_vmap_prot(kctx,
- chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map);
-
- if (unlikely(!chunk_hdr)) {
- dev_err(kctx->kbdev->dev,
- "Failed to map a tiler heap chunk header\n");
- return -ENOMEM;
+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
+ "Cannot support CPU cached chunks without sync operations");
+ chunk_hdr = chunk->map.addr;
+ if (WARN(chunk->map.size < CHUNK_HDR_SIZE,
+ "Tiler chunk kernel mapping was not large enough for zero-init")) {
+ return -EINVAL;
}
memset(chunk_hdr, 0, CHUNK_HDR_SIZE);
- kbase_vunmap(kctx, &map);
+ INIT_LIST_HEAD(&chunk->link);
if (link_with_prev)
- return link_chunk(heap, chunk);
- else
- return 0;
+ err = link_chunk(heap, chunk);
+
+ if (unlikely(err)) {
+ dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n");
+ return -EINVAL;
+ }
+
+ list_add_tail(&chunk->link, &heap->chunks_list);
+ heap->chunk_count++;
+
+ return err;
}
/**
- * create_chunk - Create a tiler heap chunk
+ * remove_unlinked_chunk - Remove a chunk that is not currently linked into a
+ * heap.
*
- * @heap: Pointer to the tiler heap for which to allocate memory.
- * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be
- * linked with the previously allocated chunk.
- *
- * This function allocates a chunk of memory for a tiler heap and adds it to
- * the end of the list of chunks associated with that heap. The size of the
- * chunk is not a parameter because it is configured per-heap not per-chunk.
- *
- * Return: 0 if successful or a negative error code on failure.
+ * @kctx: Kbase context that was used to allocate the memory.
+ * @chunk: Chunk that has been allocated, but not linked into a heap.
*/
-static int create_chunk(struct kbase_csf_tiler_heap *const heap,
- bool link_with_prev)
+static void remove_unlinked_chunk(struct kbase_context *kctx,
+ struct kbase_csf_tiler_heap_chunk *chunk)
{
- int err = 0;
- struct kbase_context *const kctx = heap->kctx;
- u64 nr_pages = PFN_UP(heap->chunk_size);
+ if (WARN_ON(!list_empty(&chunk->link)))
+ return;
+
+ kbase_gpu_vm_lock(kctx);
+ kbase_vunmap(kctx, &chunk->map);
+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
+ * regions), and so we must clear that flag too before freeing.
+ * For "no user free", we check that the refcount is 1 as it is a shrinkable region;
+ * no other code part within kbase can take a reference to it.
+ */
+ WARN_ON(chunk->region->no_user_free_refcnt > 1);
+ kbase_va_region_no_user_free_put(kctx, chunk->region);
+ chunk->region->flags &= ~KBASE_REG_DONT_NEED;
+ kbase_mem_free_region(kctx, chunk->region);
+ kbase_gpu_vm_unlock(kctx);
+
+ kfree(chunk);
+}
+
+/**
+ * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA
+ * region for the chunk, and provide a kernel mapping.
+ * @kctx: kbase context with which the chunk will be linked
+ * @chunk_size: the size of the chunk from the corresponding heap
+ *
+ * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the
+ * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so
+ * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap
+ * lock).
+ *
+ * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we
+ * ensure that it is protected from being mapped by other parts of kbase.
+ *
+ * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the
+ * shrinker path, as it may be otherwise shrunk at any time.
+ *
+ * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer
+ * on failure
+ */
+static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx,
+ u64 chunk_size)
+{
+ u64 nr_pages = PFN_UP(chunk_size);
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+ /* The chunk kernel mapping needs to be large enough to:
+ * - initially zero the CHUNK_HDR_SIZE area
+ * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area
+ */
+ const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE);
/* Calls to this function are inherently synchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
-
flags |= kbase_mem_group_id_set(kctx->jit_group_id);
chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
if (unlikely(!chunk)) {
dev_err(kctx->kbdev->dev,
"No kernel memory for a new tiler heap chunk\n");
- return -ENOMEM;
+ return NULL;
}
/* Allocate GPU memory for the new chunk. */
- INIT_LIST_HEAD(&chunk->link);
chunk->region =
kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
if (unlikely(!chunk->region)) {
- dev_err(kctx->kbdev->dev,
- "Failed to allocate a tiler heap chunk\n");
+ dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n");
+ goto unroll_chunk;
+ }
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* Some checks done here as NO_USER_FREE still allows such things to be made
+ * whilst we had dropped the region lock
+ */
+ if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
+ dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n");
+ goto unroll_region;
+ }
+
+ /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
+ * thread can have the "no user free" refcount increased between kbase_mem_alloc
+ * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
+ * remove_external_chunk_mappings (below).
+ *
+ * It should be fine and not a security risk if we let the region leak till
+ * region tracker termination in such a case.
+ */
+ if (unlikely(chunk->region->no_user_free_refcnt > 1)) {
+ dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n");
+ goto unroll_region;
+ }
+
+ /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
+ * being requested, it's useful to document in code what those restrictions are, and ensure
+ * they remain in place in future.
+ */
+ if (WARN(!chunk->region->gpu_alloc,
+ "NO_USER_FREE chunks should not have had their alloc freed")) {
+ goto unroll_region;
+ }
+
+ if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
+ "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
+ goto unroll_region;
+ }
+
+ if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
+ "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
+ goto unroll_region;
+ }
+
+ if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
+ "NO_USER_FREE chunks should not have been made ephemeral")) {
+ goto unroll_region;
+ }
+
+ if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
+ "NO_USER_FREE chunks should not have been aliased")) {
+ goto unroll_region;
+ }
+
+ if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size,
+ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map,
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) {
+ dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n");
+ goto unroll_region;
+ }
+
+ remove_external_chunk_mappings(kctx, chunk);
+ kbase_gpu_vm_unlock(kctx);
+
+ return chunk;
+
+unroll_region:
+ /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
+ * regions), and so we must clear that flag too before freeing.
+ */
+ kbase_va_region_no_user_free_put(kctx, chunk->region);
+ chunk->region->flags &= ~KBASE_REG_DONT_NEED;
+ kbase_mem_free_region(kctx, chunk->region);
+ kbase_gpu_vm_unlock(kctx);
+unroll_chunk:
+ kfree(chunk);
+ return NULL;
+}
+
+/**
+ * create_chunk - Create a tiler heap chunk
+ *
+ * @heap: Pointer to the tiler heap for which to allocate memory.
+ *
+ * This function allocates a chunk of memory for a tiler heap, adds it to the
+ * the list of chunks associated with that heap both on the host side and in GPU
+ * memory.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int create_chunk(struct kbase_csf_tiler_heap *const heap)
+{
+ int err = 0;
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+
+ chunk = alloc_new_chunk(heap->kctx, heap->chunk_size);
+ if (unlikely(!chunk)) {
err = -ENOMEM;
- } else {
- err = init_chunk(heap, chunk, link_with_prev);
- if (unlikely(err)) {
- kbase_gpu_vm_lock(kctx);
- chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
- kbase_mem_free_region(kctx, chunk->region);
- kbase_gpu_vm_unlock(kctx);
- }
+ goto allocation_failure;
}
- if (unlikely(err)) {
- kfree(chunk);
- } else {
- list_add_tail(&chunk->link, &heap->chunks_list);
- heap->chunk_count++;
+ mutex_lock(&heap->kctx->csf.tiler_heaps.lock);
+ err = init_chunk(heap, chunk, true);
+ mutex_unlock(&heap->kctx->csf.tiler_heaps.lock);
- dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n",
- chunk->gpu_va);
- }
+ if (unlikely(err))
+ goto initialization_failure;
+ dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va);
+
+ return 0;
+initialization_failure:
+ remove_unlinked_chunk(heap->kctx, chunk);
+allocation_failure:
return err;
}
-static void mark_free_mem_bypassing_pool(struct kbase_va_region *reg)
-{
- if (WARN_ON(reg->gpu_alloc == NULL))
- return;
-
- reg->gpu_alloc->evicted = reg->gpu_alloc->nents;
- kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
-}
-
/**
- * delete_chunk - Delete a tiler heap chunk
- *
- * @heap: Pointer to the tiler heap for which @chunk was allocated.
- * @chunk: Pointer to a chunk to be deleted.
- * @reclaim: Indicating the deletion is from shrinking reclaim or not.
- *
- * This function frees a tiler heap chunk previously allocated by @create_chunk
- * and removes it from the list of chunks associated with the heap.
- *
- * WARNING: The deleted chunk is not unlinked from the list of chunks used by
- * the GPU, therefore it is only safe to use this function when
- * deleting a heap, or under reclaim operations when the relevant CSGS
- * are off-slots for the given kctx.
- */
-static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
- struct kbase_csf_tiler_heap_chunk *const chunk, bool reclaim)
-{
- struct kbase_context *const kctx = heap->kctx;
-
- kbase_gpu_vm_lock(kctx);
- chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
- if (reclaim)
- mark_free_mem_bypassing_pool(chunk->region);
- kbase_mem_free_region(kctx, chunk->region);
- kbase_gpu_vm_unlock(kctx);
- list_del(&chunk->link);
- heap->chunk_count--;
- kfree(chunk);
-}
-
-/**
- * delete_all_chunks - Delete all chunks belonging to a tiler heap
+ * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap
*
* @heap: Pointer to a tiler heap.
*
- * This function empties the list of chunks associated with a tiler heap by
- * freeing all chunks previously allocated by @create_chunk.
+ * This function empties the list of chunks associated with a tiler heap by freeing all chunks
+ * previously allocated by @create_chunk.
+ *
+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
+ * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct
+ * kbase_context.region_lock.
+ *
+ * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the
+ * list of chunks used by the GPU, therefore it is only safe to use this function when
+ * deleting a heap.
*/
static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
{
+ struct kbase_context *const kctx = heap->kctx;
struct list_head *entry = NULL, *tmp = NULL;
+ WARN(!list_empty(&heap->link),
+ "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller");
+
list_for_each_safe(entry, tmp, &heap->chunks_list) {
struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
entry, struct kbase_csf_tiler_heap_chunk, link);
- delete_chunk(heap, chunk, false);
+ list_del_init(&chunk->link);
+ heap->chunk_count--;
+
+ remove_unlinked_chunk(kctx, chunk);
}
}
@@ -307,7 +441,7 @@
u32 i;
for (i = 0; (i < nchunks) && likely(!err); i++)
- err = create_chunk(heap, true);
+ err = create_chunk(heap);
if (unlikely(err))
delete_all_chunks(heap);
@@ -316,14 +450,17 @@
}
/**
- * delete_heap - Delete a tiler heap
+ * delete_heap - Delete an unlinked tiler heap
*
* @heap: Pointer to a tiler heap to be deleted.
*
* This function frees any chunks allocated for a tiler heap previously
- * initialized by @kbase_csf_tiler_heap_init and removes it from the list of
- * heaps associated with the kbase context. The heap context structure used by
+ * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by
* the firmware is also freed.
+ *
+ * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
+ * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct
+ * kbase_context.region_lock.
*/
static void delete_heap(struct kbase_csf_tiler_heap *heap)
{
@@ -331,27 +468,38 @@
dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va);
- lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
+ WARN(!list_empty(&heap->link),
+ "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller");
+ /* Make sure that all of the VA regions corresponding to the chunks are
+ * freed at this time and that the work queue is not trying to access freed
+ * memory.
+ *
+ * Note: since the heap is unlinked, and that no references are made to chunks other
+ * than from their heap, there is no need to separately move the chunks out of the
+ * heap->chunks_list to delete them.
+ */
delete_all_chunks(heap);
+ kbase_vunmap(kctx, &heap->gpu_va_map);
/* We could optimize context destruction by not freeing leaked heap
- * contexts but it doesn't seem worth the extra complexity.
+ * contexts but it doesn't seem worth the extra complexity. After this
+ * point, the suballocation is returned to the heap context allocator and
+ * may be overwritten with new data, meaning heap->gpu_va should not
+ * be used past this point.
*/
kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc,
heap->gpu_va);
- list_del(&heap->link);
- atomic_sub(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
-
WARN_ON(heap->chunk_count);
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
heap->target_in_flight, 0);
- if (heap->buf_desc_va) {
+ if (heap->buf_desc_reg) {
+ kbase_vunmap(kctx, &heap->buf_desc_map);
kbase_gpu_vm_lock(kctx);
- heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
@@ -390,6 +538,23 @@
return NULL;
}
+static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap,
+ u64 const chunk_gpu_va)
+{
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+
+ list_for_each_entry(chunk, &heap->chunks_list, link) {
+ if (chunk->gpu_va == chunk_gpu_va)
+ return chunk;
+ }
+
+ dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va);
+
+ return NULL;
+}
+
int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx)
{
int err = kbase_csf_heap_context_allocator_init(
@@ -409,25 +574,76 @@
void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
{
+ LIST_HEAD(local_heaps_list);
struct list_head *entry = NULL, *tmp = NULL;
dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n");
mutex_lock(&kctx->csf.tiler_heaps.lock);
+ list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
- list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) {
+ list_for_each_safe(entry, tmp, &local_heaps_list) {
struct kbase_csf_tiler_heap *heap = list_entry(
entry, struct kbase_csf_tiler_heap, link);
+
+ list_del_init(&heap->link);
+ atomic_sub(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
+
delete_heap(heap);
}
WARN_ON(atomic_read(&kctx->csf.tiler_heaps.est_count_pages) != 0);
- mutex_unlock(&kctx->csf.tiler_heaps.lock);
+
mutex_destroy(&kctx->csf.tiler_heaps.lock);
kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
}
+/**
+ * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house
+ * the tiler heap buffer descriptor
+ * is suitable for the purpose.
+ * @kctx: kbase context of the tiler heap
+ * @reg: VA region being checked for suitability
+ *
+ * The tiler heap buffer descriptor memory does not admit page faults according
+ * to its design, so it must have the entirety of the backing upon allocation,
+ * and it has to remain alive as long as the tiler heap is alive, meaning it
+ * cannot be allocated from JIT/Ephemeral, or user freeable memory.
+ *
+ * Return: true on suitability, false otherwise.
+ */
+static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx,
+ struct kbase_va_region *const reg)
+{
+ if (kbase_is_region_invalid_or_free(reg)) {
+ dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n");
+ return false;
+ }
+
+ if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
+ (reg->flags & KBASE_REG_PF_GROW)) {
+ dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
+ return false;
+ }
+
+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+ dev_err(kctx->kbdev->dev, "Region has invalid type!\n");
+ return false;
+ }
+
+ if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) ||
+ (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) {
+ dev_err(kctx->kbdev->dev, "Region has invalid backing!\n");
+ return false;
+ }
+
+ return true;
+}
+
+#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap))
+
int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
u32 const initial_chunks, u32 const max_chunks,
u16 const target_in_flight, u64 const buf_desc_va,
@@ -437,10 +653,12 @@
struct kbase_csf_tiler_heap *heap = NULL;
struct kbase_csf_heap_context_allocator *const ctx_alloc =
&kctx->csf.tiler_heaps.ctx_alloc;
- struct kbase_va_region *reg = NULL;
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
+ struct kbase_va_region *gpu_va_reg = NULL;
+ void *vmap_ptr = NULL;
dev_dbg(kctx->kbdev->dev,
- "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx",
+ "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n",
initial_chunks, max_chunks, chunk_size, buf_desc_va);
if (!kbase_mem_allow_alloc(kctx))
@@ -461,95 +679,138 @@
if (target_in_flight == 0)
return -EINVAL;
- /* Check on the buffer descriptor virtual Address */
- if (buf_desc_va) {
- kbase_gpu_vm_lock(kctx);
- reg = kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
- if (kbase_is_region_invalid_or_free(reg) || !(reg->flags & KBASE_REG_CPU_RD) ||
- (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)) {
- kbase_gpu_vm_unlock(kctx);
- return -EINVAL;
- }
-
- reg->flags |= KBASE_REG_NO_USER_FREE;
- kbase_gpu_vm_unlock(kctx);
- }
-
heap = kzalloc(sizeof(*heap), GFP_KERNEL);
if (unlikely(!heap)) {
dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
- err = -ENOMEM;
- goto err_out;
+ return -ENOMEM;
}
heap->kctx = kctx;
heap->chunk_size = chunk_size;
heap->max_chunks = max_chunks;
heap->target_in_flight = target_in_flight;
- heap->buf_desc_va = buf_desc_va;
- heap->buf_desc_reg = reg;
heap->desc_chk_flags = 0;
heap->desc_chk_cnt = 0;
INIT_LIST_HEAD(&heap->chunks_list);
+ INIT_LIST_HEAD(&heap->link);
+
+ /* Check on the buffer descriptor virtual Address */
+ if (buf_desc_va) {
+ struct kbase_va_region *buf_desc_reg;
+
+ kbase_gpu_vm_lock(kctx);
+ buf_desc_reg =
+ kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
+
+ if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) {
+ kbase_gpu_vm_unlock(kctx);
+ dev_err(kctx->kbdev->dev,
+ "Could not find a suitable VA region for the tiler heap buf desc!\n");
+ err = -EINVAL;
+ goto buf_desc_not_suitable;
+ }
+
+ /* If we don't prevent userspace from unmapping this, we may run into
+ * use-after-free, as we don't check for the existence of the region throughout.
+ */
+
+ heap->buf_desc_va = buf_desc_va;
+ heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg);
+
+ vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
+ KBASE_REG_CPU_RD, &heap->buf_desc_map,
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
+ kbase_gpu_vm_unlock(kctx);
+
+ if (unlikely(!vmap_ptr)) {
+ dev_err(kctx->kbdev->dev,
+ "Could not vmap buffer descriptor into kernel memory (err %d)\n",
+ err);
+ err = -ENOMEM;
+ goto buf_desc_vmap_failed;
+ }
+ }
heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
-
if (unlikely(!heap->gpu_va)) {
- dev_dbg(kctx->kbdev->dev,
- "Failed to allocate a tiler heap context");
+ dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n");
err = -ENOMEM;
- } else {
- err = create_initial_chunks(heap, initial_chunks);
- if (unlikely(err))
- kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
+ goto heap_context_alloc_failed;
}
- if (likely(!err)) {
- struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry(
- &heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
+ gpu_va_reg = ctx_alloc->region;
- *heap_gpu_va = heap->gpu_va;
- *first_chunk_va = chunk->gpu_va;
+ kbase_gpu_vm_lock(kctx);
+ /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens
+ * on kctx termination (after all syscalls on kctx have finished), and so it is safe to
+ * assume that gpu_va_reg is still present.
+ */
+ vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE,
+ (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map,
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
+ kbase_gpu_vm_unlock(kctx);
+ if (unlikely(!vmap_ptr)) {
+ dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n");
+ err = -ENOMEM;
+ goto heap_context_vmap_failed;
+ }
- mutex_lock(&kctx->csf.tiler_heaps.lock);
- kctx->csf.tiler_heaps.nr_of_heaps++;
- heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
- list_add(&heap->link, &kctx->csf.tiler_heaps.list);
+ err = create_initial_chunks(heap, initial_chunks);
+ if (unlikely(err)) {
+ dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n");
+ goto create_chunks_failed;
+ }
+ chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
- KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
- kctx->kbdev, kctx->id, heap->heap_id,
- PFN_UP(heap->chunk_size * heap->max_chunks),
- PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks,
- heap->chunk_size, heap->chunk_count, heap->target_in_flight, 0);
+ *heap_gpu_va = heap->gpu_va;
+ *first_chunk_va = chunk->gpu_va;
+
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
+ kctx->csf.tiler_heaps.nr_of_heaps++;
+ heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
+ list_add(&heap->link, &kctx->csf.tiler_heaps.list);
+
+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
+ PFN_UP(heap->chunk_size * heap->max_chunks),
+ PFN_UP(heap->chunk_size * heap->chunk_count),
+ heap->max_chunks, heap->chunk_size, heap->chunk_count,
+ heap->target_in_flight, 0);
#if defined(CONFIG_MALI_VECTOR_DUMP)
- list_for_each_entry(chunk, &heap->chunks_list, link) {
- KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(
- kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va);
- }
-#endif
- kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
- kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
- if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
- kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
-
- /* Assuming at least one chunk reclaimable per heap on (estimated) count */
- atomic_add(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
- dev_dbg(kctx->kbdev->dev,
- "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d",
- heap->gpu_va, buf_desc_va, kctx->tgid, kctx->id);
- mutex_unlock(&kctx->csf.tiler_heaps.lock);
-
- return 0;
+ list_for_each_entry(chunk, &heap->chunks_list, link) {
+ KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id,
+ chunk->gpu_va);
}
+#endif
+ kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
+ kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
-err_out:
- kfree(heap);
- if (buf_desc_va) {
+ /* Assuming at least one chunk reclaimable per heap on (estimated) count */
+ atomic_add(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
+ dev_dbg(kctx->kbdev->dev,
+ "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
+ buf_desc_va, kctx->tgid, kctx->id);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+
+ return 0;
+
+create_chunks_failed:
+ kbase_vunmap(kctx, &heap->gpu_va_map);
+heap_context_vmap_failed:
+ kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
+heap_context_alloc_failed:
+ if (heap->buf_desc_reg)
+ kbase_vunmap(kctx, &heap->buf_desc_map);
+buf_desc_vmap_failed:
+ if (heap->buf_desc_reg) {
kbase_gpu_vm_lock(kctx);
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
+buf_desc_not_suitable:
+ kfree(heap);
return err;
}
@@ -562,15 +823,20 @@
u64 heap_size = 0;
mutex_lock(&kctx->csf.tiler_heaps.lock);
-
heap = find_tiler_heap(kctx, heap_gpu_va);
if (likely(heap)) {
chunk_count = heap->chunk_count;
heap_size = heap->chunk_size * chunk_count;
- delete_heap(heap);
- } else
- err = -EINVAL;
+ list_del_init(&heap->link);
+ atomic_sub(PFN_UP(heap->chunk_size), &kctx->csf.tiler_heaps.est_count_pages);
+ } else {
+ err = -EINVAL;
+ }
+
+ /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list
+ * at all times
+ */
if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
kctx->running_total_tiler_heap_memory -= heap_size;
else
@@ -583,39 +849,25 @@
"Running total tiler chunk count lower than expected!");
if (!err)
dev_dbg(kctx->kbdev->dev,
- "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d",
+ "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n",
heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
mutex_unlock(&kctx->csf.tiler_heaps.lock);
+
+ /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from
+ * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock
+ */
+ if (likely(heap))
+ delete_heap(heap);
+
return err;
}
-/**
- * alloc_new_chunk - Allocate a new chunk for the tiler heap.
- *
- * @heap: Pointer to the tiler heap.
- * @nr_in_flight: Number of render passes that are in-flight, must not be zero.
- * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
- * The minimum value is zero but it must be less or equal to
- * the total number of render passes in flight
- * @new_chunk_ptr: Where to store the GPU virtual address & size of the new
- * chunk allocated for the heap.
- *
- * This function will allocate a new chunk for the chunked tiler heap depending
- * on the settings provided by userspace when the heap was created and the
- * heap's statistics (like number of render passes in-flight).
- *
- * Return: 0 if a new chunk was allocated otherwise an appropriate negative
- * error code.
- */
-static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
- u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
+static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
+ u32 pending_frag_count)
{
- int err = -ENOMEM;
-
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
- if (WARN_ON(!nr_in_flight) ||
- WARN_ON(pending_frag_count > nr_in_flight))
+ if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight))
return -EINVAL;
if (nr_in_flight <= heap->target_in_flight) {
@@ -623,109 +875,215 @@
/* Not exceeded the target number of render passes yet so be
* generous with memory.
*/
- err = create_chunk(heap, false);
-
- if (likely(!err)) {
- struct kbase_csf_tiler_heap_chunk *new_chunk =
- get_last_chunk(heap);
- if (!WARN_ON(!new_chunk)) {
- *new_chunk_ptr =
- encode_chunk_ptr(heap->chunk_size,
- new_chunk->gpu_va);
- return 0;
- }
- }
+ return 0;
} else if (pending_frag_count > 0) {
- err = -EBUSY;
+ return -EBUSY;
} else {
- err = -ENOMEM;
+ return -ENOMEM;
}
} else {
/* Reached target number of render passes in flight.
* Wait for some of them to finish
*/
- err = -EBUSY;
+ return -EBUSY;
}
-
- return err;
+ return -ENOMEM;
}
int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
{
struct kbase_csf_tiler_heap *heap;
+ struct kbase_csf_tiler_heap_chunk *chunk;
int err = -EINVAL;
+ u64 chunk_size = 0;
+ u64 heap_id = 0;
+
+ /* To avoid potential locking issues during allocation, this is handled
+ * in three phases:
+ * 1. Take the lock, find the corresponding heap, and find its chunk size
+ * (this is always 2 MB, but may change down the line).
+ * 2. Allocate memory for the chunk and its region.
+ * 3. If the heap still exists, link it to the end of the list. If it
+ * doesn't, roll back the allocation.
+ */
mutex_lock(&kctx->csf.tiler_heaps.lock);
+ heap = find_tiler_heap(kctx, gpu_heap_va);
+ if (likely(heap)) {
+ chunk_size = heap->chunk_size;
+ heap_id = heap->heap_id;
+ } else {
+ dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ goto prelink_failure;
+ }
+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
+ if (unlikely(err)) {
+ dev_err(kctx->kbdev->dev,
+ "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
+ gpu_heap_va, err);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ goto prelink_failure;
+ }
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ /* this heap must not be used whilst we have dropped the lock */
+ heap = NULL;
+
+ chunk = alloc_new_chunk(kctx, chunk_size);
+ if (unlikely(!chunk)) {
+ dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d",
+ chunk_size, kctx->tgid, kctx->id);
+ goto prelink_failure;
+ }
+
+ /* After this point, the heap that we were targeting could already have had the needed
+ * chunks allocated, if we were handling multiple OoM events on multiple threads, so
+ * we need to revalidate the need for the allocation.
+ */
+ mutex_lock(&kctx->csf.tiler_heaps.lock);
heap = find_tiler_heap(kctx, gpu_heap_va);
- if (likely(heap)) {
- err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
- new_chunk_ptr);
- if (likely(!err)) {
- /* update total and peak tiler heap memory record */
- kctx->running_total_tiler_heap_nr_chunks++;
- kctx->running_total_tiler_heap_memory += heap->chunk_size;
-
- if (kctx->running_total_tiler_heap_memory >
- kctx->peak_total_tiler_heap_memory)
- kctx->peak_total_tiler_heap_memory =
- kctx->running_total_tiler_heap_memory;
- }
-
- KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
- kctx->kbdev, kctx->id, heap->heap_id,
- PFN_UP(heap->chunk_size * heap->max_chunks),
- PFN_UP(heap->chunk_size * heap->chunk_count),
- heap->max_chunks, heap->chunk_size, heap->chunk_count,
- heap->target_in_flight, nr_in_flight);
+ if (unlikely(!heap)) {
+ dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ goto unroll_chunk;
}
+ if (heap_id != heap->heap_id) {
+ dev_err(kctx->kbdev->dev,
+ "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!",
+ gpu_heap_va, kctx->tgid, kctx->id, chunk_size);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ goto unroll_chunk;
+ }
+
+ if (WARN_ON(chunk_size != heap->chunk_size)) {
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ goto unroll_chunk;
+ }
+
+ err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
+ if (unlikely(err)) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)",
+ gpu_heap_va, err);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ goto unroll_chunk;
+ }
+
+ err = init_chunk(heap, chunk, false);
+
+ /* On error, the chunk would not be linked, so we can still treat it as an unlinked
+ * chunk for error handling.
+ */
+ if (unlikely(err)) {
+ dev_err(kctx->kbdev->dev,
+ "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d",
+ chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err);
+ mutex_unlock(&kctx->csf.tiler_heaps.lock);
+ goto unroll_chunk;
+ }
+
+ *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
+
+ /* update total and peak tiler heap memory record */
+ kctx->running_total_tiler_heap_nr_chunks++;
+ kctx->running_total_tiler_heap_memory += heap->chunk_size;
+
+ if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
+ kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
+
+ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
+ PFN_UP(heap->chunk_size * heap->max_chunks),
+ PFN_UP(heap->chunk_size * heap->chunk_count),
+ heap->max_chunks, heap->chunk_size, heap->chunk_count,
+ heap->target_in_flight, nr_in_flight);
+
mutex_unlock(&kctx->csf.tiler_heaps.lock);
return err;
+unroll_chunk:
+ remove_unlinked_chunk(kctx, chunk);
+prelink_failure:
+ return err;
}
-static bool delete_chunk_from_gpu_va(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
- u64 *hdr_val)
+static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
+ u64 *hdr_val)
{
+ int err;
+ u64 *chunk_hdr;
struct kbase_context *kctx = heap->kctx;
- struct kbase_csf_tiler_heap_chunk *chunk;
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
- list_for_each_entry(chunk, &heap->chunks_list, link) {
- struct kbase_vmap_struct map;
- u64 *chunk_hdr;
+ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
- if (chunk->gpu_va != chunk_gpu_va)
- continue;
- /* Found it, extract next chunk header before delete it */
- chunk_hdr = kbase_vmap_prot(kctx, chunk_gpu_va, sizeof(*chunk_hdr),
- KBASE_REG_CPU_RD, &map);
-
- if (unlikely(!chunk_hdr)) {
- dev_warn(
- kctx->kbdev->dev,
- "Failed to map tiler heap(0x%llX) chunk(0x%llX) for reclaim extract next header",
- heap->gpu_va, chunk_gpu_va);
- return false;
- }
-
- *hdr_val = *chunk_hdr;
- kbase_vunmap(kctx, &map);
-
- dev_dbg(kctx->kbdev->dev,
- "Scan reclaim delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)",
- chunk_gpu_va, heap->gpu_va, *hdr_val);
- delete_chunk(heap, chunk, true);
-
- return true;
+ chunk = find_chunk(heap, chunk_gpu_va);
+ if (unlikely(!chunk)) {
+ dev_warn(kctx->kbdev->dev,
+ "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n",
+ heap->gpu_va, chunk_gpu_va);
+ return false;
}
- dev_warn(kctx->kbdev->dev,
- "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete", heap->gpu_va,
- chunk_gpu_va);
- return false;
+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
+ "Cannot support CPU cached chunks without sync operations");
+ chunk_hdr = chunk->map.addr;
+ *hdr_val = *chunk_hdr;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
+ chunk_gpu_va, heap->gpu_va, *hdr_val);
+
+ err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents);
+ if (unlikely(err)) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n",
+ chunk_gpu_va, heap->gpu_va, err);
+
+ /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on
+ * the heap's chunk list and try a different heap.
+ */
+
+ return false;
+ }
+ /* Destroy the mapping before the physical pages which are mapped are destroyed. */
+ kbase_vunmap(kctx, &chunk->map);
+
+ err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc,
+ chunk->region->gpu_alloc->nents);
+ if (unlikely(err)) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n",
+ chunk_gpu_va, heap->gpu_va, err);
+
+ /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs
+ * anyway, so continue instead of returning early.
+ *
+ * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has
+ * its mapping removed, as that could lead to problems. It's safest to instead
+ * continue with deferred destruction of the chunk.
+ */
+ }
+
+ dev_dbg(kctx->kbdev->dev,
+ "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
+ chunk_gpu_va, heap->gpu_va, *hdr_val);
+
+ mutex_lock(&heap->kctx->jit_evict_lock);
+ list_move(&chunk->region->jit_node, &kctx->jit_destroy_head);
+ mutex_unlock(&heap->kctx->jit_evict_lock);
+
+ list_del(&chunk->link);
+ heap->chunk_count--;
+ kfree(chunk);
+
+ return true;
}
static bool heap_buffer_decsriptor_checked(struct kbase_csf_tiler_heap *const heap)
@@ -741,22 +1099,19 @@
lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
if (ptr_addr) {
- struct kbase_csf_tiler_heap_chunk *chunk;
+ struct kbase_csf_tiler_heap_chunk *chunk = find_chunk(heap, ptr_addr);
- /* desc->pointer must be a chunk in the given heap */
- list_for_each_entry(chunk, &heap->chunks_list, link) {
- if (chunk->gpu_va == ptr_addr) {
- dev_dbg(heap->kctx->kbdev->dev,
- "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed",
- heap->buf_desc_va);
+ if (likely(chunk)) {
+ dev_dbg(heap->kctx->kbdev->dev,
+ "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
+ heap->buf_desc_va);
- heap->desc_chk_flags = HEAP_BUF_DESCRIPTOR_CHECKED;
- return;
- }
+ heap->desc_chk_flags = HEAP_BUF_DESCRIPTOR_CHECKED;
+ return;
}
}
/* If there is no match, defer the check to next time */
- dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred",
+ dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n",
heap->buf_desc_va);
}
@@ -774,102 +1129,87 @@
return true;
/* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
- if (heap->buf_desc_va) {
- struct kbase_vmap_struct map;
- struct kbase_csf_gpu_buffer_heap *desc = kbase_vmap_prot(
- kctx, heap->buf_desc_va, sizeof(*desc), KBASE_REG_CPU_RD, &map);
+ if (heap->buf_desc_reg) {
+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
- if (unlikely(!desc)) {
- dev_warn_once(kctx->kbdev->dev,
- "Sanity check: buffer descriptor 0x%llX map failed",
- heap->buf_desc_va);
- goto out;
- }
+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
sanity_check_gpu_buffer_heap(heap, desc);
checked = heap_buffer_decsriptor_checked(heap);
if (checked)
*ptr_u64 = desc->pointer & CHUNK_ADDR_MASK;
-
- kbase_vunmap(kctx, &map);
}
-out:
return checked;
}
static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
{
u32 freed = 0;
- u64 gpu_va = 0;
+ u64 chunk_gpu_va = 0;
struct kbase_context *kctx = heap->kctx;
+ struct kbase_csf_tiler_heap_chunk *chunk = NULL;
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
- if (can_read_hw_gpu_buffer_heap(heap, &gpu_va)) {
+ if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) {
u64 chunk_hdr_val;
u64 *hw_hdr;
- struct kbase_vmap_struct map;
- if (!gpu_va) {
- struct kbase_csf_gpu_buffer_heap *desc = kbase_vmap_prot(
- kctx, heap->buf_desc_va, sizeof(*desc), KBASE_REG_CPU_RD, &map);
+ if (!chunk_gpu_va) {
+ struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
- if (unlikely(!desc)) {
- dev_warn(
- kctx->kbdev->dev,
- "Failed to map Buffer descriptor 0x%llX for HW reclaim scan",
- heap->buf_desc_va);
- goto out;
- }
+ /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
+ if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
+ kbase_sync_mem_regions(kctx, &heap->buf_desc_map,
+ KBASE_SYNC_TO_CPU);
+ chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
- gpu_va = desc->pointer & CHUNK_ADDR_MASK;
- kbase_vunmap(kctx, &map);
-
- if (!gpu_va) {
+ if (!chunk_gpu_va) {
dev_dbg(kctx->kbdev->dev,
- "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan",
+ "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n",
heap->buf_desc_va);
goto out;
}
}
- /* Map the HW chunk header here with RD/WR for likely update */
- hw_hdr = kbase_vmap_prot(kctx, gpu_va, sizeof(*hw_hdr),
- KBASE_REG_CPU_RD | KBASE_REG_CPU_WR, &map);
- if (unlikely(!hw_hdr)) {
- dev_warn(kctx->kbdev->dev,
- "Failed to map HW chnker header 0x%llX for HW reclaim scan",
- gpu_va);
+ chunk = find_chunk(heap, chunk_gpu_va);
+ if (unlikely(!chunk))
goto out;
- }
+
+ WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
+ "Cannot support CPU cached chunks without sync operations");
+ hw_hdr = chunk->map.addr;
/* Move onto the next chunk relevant information */
chunk_hdr_val = *hw_hdr;
- gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
- while (gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
- bool success = delete_chunk_from_gpu_va(heap, gpu_va, &chunk_hdr_val);
+ while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
+ bool success =
+ delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val);
if (!success)
break;
freed++;
/* On success, chunk_hdr_val is updated, extract the next chunk address */
- gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
}
/* Update the existing hardware chunk header, after reclaim deletion of chunks */
*hw_hdr = chunk_hdr_val;
- kbase_vunmap(kctx, &map);
- dev_dbg(heap->kctx->kbdev->dev,
- "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX", freed,
- chunk_hdr_val);
- } else
- dev_dbg(kctx->kbdev->dev,
- "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)",
- heap->buf_desc_va);
+ dev_dbg(heap->kctx->kbdev->dev,
+ "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed,
+ chunk_hdr_val);
+ } else {
+ dev_dbg(kctx->kbdev->dev,
+ "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n",
+ heap->buf_desc_va);
+ }
out:
return freed;
}
@@ -878,29 +1218,25 @@
{
u32 freed_chunks = 0;
u64 freed_pages = 0;
- u64 gpu_va;
+ u64 chunk_gpu_va;
u64 chunk_hdr_val;
struct kbase_context *kctx = heap->kctx;
- unsigned long prot = KBASE_REG_CPU_RD | KBASE_REG_CPU_WR;
- struct kbase_vmap_struct map;
u64 *ctx_ptr;
lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
- ctx_ptr = kbase_vmap_prot(kctx, heap->gpu_va, sizeof(*ctx_ptr), prot, &map);
- if (unlikely(!ctx_ptr)) {
- dev_dbg(kctx->kbdev->dev,
- "Failed to map tiler heap context 0x%llX for reclaim_scan", heap->gpu_va);
- goto out;
- }
+ WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED,
+ "Cannot support CPU cached heap context without sync operations");
+
+ ctx_ptr = heap->gpu_va_map.addr;
/* Extract the first chunk address from the context's free_list_head */
chunk_hdr_val = *ctx_ptr;
- gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
- while (gpu_va) {
+ while (chunk_gpu_va) {
u64 hdr_val;
- bool success = delete_chunk_from_gpu_va(heap, gpu_va, &hdr_val);
+ bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val);
if (!success)
break;
@@ -908,24 +1244,22 @@
freed_chunks++;
chunk_hdr_val = hdr_val;
/* extract the next chunk address */
- gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
+ chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
}
/* Update the post-scan deletion to context header */
*ctx_ptr = chunk_hdr_val;
- kbase_vunmap(kctx, &map);
/* Try to scan the HW hoarded list of unused chunks */
freed_chunks += delete_hoarded_chunks(heap);
freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
dev_dbg(heap->kctx->kbdev->dev,
- "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX",
+ "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n",
freed_chunks, freed_pages, chunk_hdr_val);
/* Update context tiler heaps memory usage */
kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
-out:
return freed_pages;
}
@@ -938,6 +1272,7 @@
list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
freed += delete_unused_chunk_pages(heap);
+
/* If freed enough, then stop here */
if (freed >= to_free)
break;
@@ -968,7 +1303,7 @@
}
dev_dbg(heap->kctx->kbdev->dev,
- "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX", chunk_cnt,
+ "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt,
page_cnt, heap->gpu_va);
return page_cnt;
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
index 70dbb6c..9d447ce 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
@@ -65,12 +65,20 @@
/* Tiler heap shrinker batch value */
#define HEAP_SHRINKER_BATCH (512)
+/* The size of the area needed to be vmapped prior to handing the tiler heap
+ * over to the tiler, so that the shrinker could be invoked.
+ */
+#define NEXT_CHUNK_ADDR_SIZE (sizeof(u64))
+
/**
* struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
*
* @link: Link to this chunk in a list of chunks belonging to a
* @kbase_csf_tiler_heap.
* @region: Pointer to the GPU memory region allocated for the chunk.
+ * @map: Kernel VA mapping so that we would not need to use vmap in the
+ * shrinker callback, which can allocate. This maps only the header
+ * of the chunk, so it could be traversed.
* @gpu_va: GPU virtual address of the start of the memory region.
* This points to the header of the chunk and not to the low address
* of free memory within it.
@@ -84,6 +92,7 @@
struct kbase_csf_tiler_heap_chunk {
struct list_head link;
struct kbase_va_region *region;
+ struct kbase_vmap_struct map;
u64 gpu_va;
};
@@ -102,10 +111,14 @@
* uniquely identify the heap.
* @heap_id: Unique id representing the heap, assigned during heap
* initialization.
- * @buf_desc_va: Buffer decsriptor GPU VA. Can be 0 for backward compatible
+ * @buf_desc_va: Buffer descriptor GPU VA. Can be 0 for backward compatible
* to earlier version base interfaces.
* @buf_desc_reg: Pointer to the VA region that covers the provided buffer
* descriptor memory object pointed to by buf_desc_va.
+ * @gpu_va_map: Kernel VA mapping of the GPU VA region.
+ * @buf_desc_map: Kernel VA mapping of the buffer descriptor, read from
+ * during the tiler heap shrinker. Sync operations may need
+ * to be done before each read.
* @chunk_size: Size of each chunk, in bytes. Must be page-aligned.
* @chunk_count: The number of chunks currently allocated. Must not be
* zero or greater than @max_chunks.
@@ -126,6 +139,8 @@
u64 heap_id;
u64 buf_desc_va;
struct kbase_va_region *buf_desc_reg;
+ struct kbase_vmap_struct buf_desc_map;
+ struct kbase_vmap_struct gpu_va_map;
u32 chunk_size;
u32 chunk_count;
u32 max_chunks;
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 6c4e3e8..c0b8d6e 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -154,8 +154,7 @@
/* Maximum number of pages of memory that require a permanent mapping, per
* kbase_context
*/
-#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \
- PAGE_SHIFT)
+#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT)
/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer
* clients, to reduce undesired system load.
* If a virtualizer client requests a dump within this threshold period after
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index fcbaf2b..1526225 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1550,6 +1550,7 @@
return NULL;
new_reg->va_refcnt = 1;
+ new_reg->no_user_free_refcnt = 0;
new_reg->cpu_alloc = NULL; /* no alloc bound yet */
new_reg->gpu_alloc = NULL; /* no alloc bound yet */
new_reg->rbtree = rbtree;
@@ -2184,11 +2185,30 @@
__func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
- if (reg->flags & KBASE_REG_NO_USER_FREE) {
+ if (kbase_va_region_is_no_user_free(kctx, reg)) {
dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
return -EINVAL;
}
+ /* If a region has been made evictable then we must unmake it
+ * before trying to free it.
+ * If the memory hasn't been reclaimed it will be unmapped and freed
+ * below, if it has been reclaimed then the operations below are no-ops.
+ */
+ if (reg->flags & KBASE_REG_DONT_NEED) {
+ WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE);
+ mutex_lock(&kctx->jit_evict_lock);
+ /* Unlink the physical allocation before unmaking it evictable so
+ * that the allocation isn't grown back to its last backed size
+ * as we're going to unmap it anyway.
+ */
+ reg->cpu_alloc->reg = NULL;
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ reg->gpu_alloc->reg = NULL;
+ mutex_unlock(&kctx->jit_evict_lock);
+ kbase_mem_evictable_unmake(reg->gpu_alloc);
+ }
+
err = kbase_gpu_munmap(kctx, reg);
if (err) {
dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n");
@@ -2384,8 +2404,11 @@
if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
- if (flags & BASEP_MEM_NO_USER_FREE)
- reg->flags |= KBASE_REG_NO_USER_FREE;
+ if (flags & BASEP_MEM_NO_USER_FREE) {
+ kbase_gpu_vm_lock(kctx);
+ kbase_va_region_no_user_free_get(kctx, reg);
+ kbase_gpu_vm_unlock(kctx);
+ }
if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
@@ -3746,7 +3769,15 @@
mutex_unlock(&kctx->jit_evict_lock);
kbase_gpu_vm_lock(kctx);
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
+
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(reg->no_user_free_refcnt > 1);
+ kbase_va_region_no_user_free_put(kctx, reg);
kbase_mem_free_region(kctx, reg);
kbase_gpu_vm_unlock(kctx);
} while (1);
@@ -4473,6 +4504,29 @@
}
}
+ /* Similarly to tiler heap init, there is a short window of time
+ * where the (either recycled or newly allocated, in our case) region has
+ * "no user free" refcount incremented but is still missing the DONT_NEED flag, and
+ * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
+ * allocation is the least bad option that doesn't lead to a security issue down the
+ * line (it will eventually be cleaned up during context termination).
+ *
+ * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region
+ * flags.
+ */
+ kbase_gpu_vm_lock(kctx);
+ if (unlikely(reg->no_user_free_refcnt > 1)) {
+ kbase_gpu_vm_unlock(kctx);
+ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n");
+
+ mutex_lock(&kctx->jit_evict_lock);
+ list_move(®->jit_node, &kctx->jit_pool_head);
+ mutex_unlock(&kctx->jit_evict_lock);
+
+ reg = NULL;
+ goto end;
+ }
+
trace_mali_jit_alloc(reg, info->id);
kctx->jit_current_allocations++;
@@ -4490,6 +4544,7 @@
kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+ kbase_gpu_vm_unlock(kctx);
end:
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
@@ -4595,11 +4650,19 @@
reg = list_entry(kctx->jit_pool_head.prev,
struct kbase_va_region, jit_node);
list_del(®->jit_node);
+ list_del_init(®->gpu_alloc->evict_node);
}
mutex_unlock(&kctx->jit_evict_lock);
if (reg) {
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(reg->no_user_free_refcnt > 1);
+ kbase_va_region_no_user_free_put(kctx, reg);
kbase_mem_free_region(kctx, reg);
}
@@ -4619,12 +4682,17 @@
walker = list_first_entry(&kctx->jit_pool_head,
struct kbase_va_region, jit_node);
list_del(&walker->jit_node);
+ list_del_init(&walker->gpu_alloc->evict_node);
mutex_unlock(&kctx->jit_evict_lock);
- /* As context is terminating, directly free the backing pages
- * without unmapping them from the GPU as done in
- * kbase_region_tracker_erase_rbtree().
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
*/
- kbase_free_alloced_region(walker);
+ WARN_ON(walker->no_user_free_refcnt > 1);
+ kbase_va_region_no_user_free_put(kctx, walker);
+ kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4633,8 +4701,17 @@
walker = list_first_entry(&kctx->jit_active_head,
struct kbase_va_region, jit_node);
list_del(&walker->jit_node);
+ list_del_init(&walker->gpu_alloc->evict_node);
mutex_unlock(&kctx->jit_evict_lock);
- kbase_free_alloced_region(walker);
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(walker->no_user_free_refcnt > 1);
+ kbase_va_region_no_user_free_put(kctx, walker);
+ kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
#if MALI_JIT_PRESSURE_LIMIT_BASE
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 2013d38..97d6471 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -303,6 +303,8 @@
* @jit_usage_id: The last just-in-time memory usage ID for this region.
* @jit_bin_id: The just-in-time memory bin this region came from.
* @va_refcnt: Number of users of this region. Protected by reg_lock.
+ * @no_user_free_refcnt: Number of users that want to prevent the region from
+ * being freed by userspace.
* @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
* an allocated region
* The object can be one of:
@@ -387,6 +389,13 @@
#define KBASE_REG_PROTECTED (1ul << 19)
+/* Region belongs to a shrinker.
+ *
+ * This can either mean that it is part of the JIT/Ephemeral or tiler heap
+ * shrinker paths. Should be removed only after making sure that there are
+ * no references remaining to it in these paths, as it may cause the physical
+ * backing of the region to disappear during use.
+ */
#define KBASE_REG_DONT_NEED (1ul << 20)
/* Imported buffer is padded? */
@@ -416,10 +425,7 @@
#define KBASE_REG_RESERVED_BIT_23 (1ul << 23)
#endif /* !MALI_USE_CSF */
-/* Whilst this flag is set the GPU allocation is not supposed to be freed by
- * user space. The flag will remain set for the lifetime of JIT allocations.
- */
-#define KBASE_REG_NO_USER_FREE (1ul << 24)
+/* Bit 24 is currently unused and is available for use for a new flag */
/* Memory has permanent kernel side mapping */
#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
@@ -560,6 +566,7 @@
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
int va_refcnt;
+ int no_user_free_refcnt;
};
/**
@@ -602,6 +609,23 @@
return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg));
}
+/**
+ * kbase_is_region_shrinkable - Check if a region is "shrinkable".
+ * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages)
+ * can be freed at any point, even though the kbase_va_region structure itself
+ * may have been refcounted.
+ * Regions that aren't on a shrinker, but could be shrunk at any point in future
+ * without warning are still considered "shrinkable" (e.g. Active JIT allocs)
+ *
+ * @reg: Pointer to region
+ *
+ * Return: true if the region is "shrinkable", false if not.
+ */
+static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg)
+{
+ return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC);
+}
+
void kbase_remove_va_region(struct kbase_device *kbdev,
struct kbase_va_region *reg);
static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
@@ -622,6 +646,7 @@
lockdep_assert_held(&kctx->reg_lock);
WARN_ON(!region->va_refcnt);
+ WARN_ON(region->va_refcnt == INT_MAX);
/* non-atomic as kctx->reg_lock is held */
dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
@@ -649,6 +674,69 @@
return NULL;
}
+/**
+ * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region.
+ * A region that must not be freed by userspace indicates that it is owned by some other
+ * kbase subsystem, for example tiler heaps, JIT memory or CSF queues.
+ * Such regions must not be shrunk (i.e. have their backing pages freed), except by the
+ * current owner.
+ * Hence, callers cannot rely on this check alone to determine if a region might be shrunk
+ * by any part of kbase. Instead they should use kbase_is_region_shrinkable().
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region.
+ *
+ * Return: true if userspace cannot free the region, false if userspace can free the region.
+ */
+static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx,
+ struct kbase_va_region *region)
+{
+ lockdep_assert_held(&kctx->reg_lock);
+ return region->no_user_free_refcnt > 0;
+}
+
+/**
+ * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region.
+ * Calling this function will prevent the region to be shrunk by parts of kbase that
+ * don't own the region (as long as the refcount stays above zero). Refer to
+ * kbase_va_region_is_no_user_free() for more information.
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region (not shrinkable).
+ *
+ * Return: the pointer to the region passed as argument.
+ */
+static inline struct kbase_va_region *
+kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region)
+{
+ lockdep_assert_held(&kctx->reg_lock);
+
+ WARN_ON(kbase_is_region_shrinkable(region));
+ WARN_ON(region->no_user_free_refcnt == INT_MAX);
+
+ /* non-atomic as kctx->reg_lock is held */
+ region->no_user_free_refcnt++;
+
+ return region;
+}
+
+/**
+ * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region.
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region (not shrinkable).
+ */
+static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx,
+ struct kbase_va_region *region)
+{
+ lockdep_assert_held(&kctx->reg_lock);
+
+ WARN_ON(!kbase_va_region_is_no_user_free(kctx, region));
+
+ /* non-atomic as kctx->reg_lock is held */
+ region->no_user_free_refcnt--;
+}
+
/* Common functions */
static inline struct tagged_addr *kbase_get_cpu_phy_pages(
struct kbase_va_region *reg)
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index a596f7a..2c33df4 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -86,18 +86,14 @@
static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma);
#endif
-static int kbase_vmap_phy_pages(struct kbase_context *kctx,
- struct kbase_va_region *reg, u64 offset_bytes, size_t size,
- struct kbase_vmap_struct *map);
+static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map,
+ kbase_vmap_flag vmap_flags);
static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
struct kbase_vmap_struct *map);
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
- struct kbase_va_region *reg,
- u64 new_pages, u64 old_pages);
-
static bool is_process_exiting(struct vm_area_struct *vma)
{
/* PF_EXITING flag can't be reliably used here for the detection
@@ -195,20 +191,12 @@
reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
return -EINVAL;
- if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
- atomic_read(&kctx->permanent_mapped_pages))) {
- dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages",
- (u64)size,
- KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
- atomic_read(&kctx->permanent_mapped_pages));
- return -ENOMEM;
- }
-
kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL);
if (!kern_mapping)
return -ENOMEM;
- err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping);
+ err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping,
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
if (err < 0)
goto vmap_fail;
@@ -216,7 +204,6 @@
reg->flags &= ~KBASE_REG_GROWABLE;
reg->cpu_alloc->permanent_map = kern_mapping;
- atomic_add(size, &kctx->permanent_mapped_pages);
return 0;
vmap_fail:
@@ -232,13 +219,6 @@
kfree(alloc->permanent_map);
alloc->permanent_map = NULL;
-
- /* Mappings are only done on cpu_alloc, so don't need to worry about
- * this being reduced a second time if a separate gpu_alloc is
- * freed
- */
- WARN_ON(alloc->nents > atomic_read(&kctx->permanent_mapped_pages));
- atomic_sub(alloc->nents, &kctx->permanent_mapped_pages);
}
void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx,
@@ -970,7 +950,7 @@
* & GPU queue ringbuffer and none of them needs to be explicitly marked
* as evictable by Userspace.
*/
- if (reg->flags & KBASE_REG_NO_USER_FREE)
+ if (kbase_va_region_is_no_user_free(kctx, reg))
goto out_unlock;
/* Is the region being transitioning between not needed and needed? */
@@ -1884,9 +1864,9 @@
/* validate found region */
if (kbase_is_region_invalid_or_free(aliasing_reg))
goto bad_handle; /* Not found/already free */
- if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+ if (kbase_is_region_shrinkable(aliasing_reg))
goto bad_handle; /* Ephemeral region */
- if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE)
+ if (kbase_va_region_is_no_user_free(kctx, aliasing_reg))
goto bad_handle; /* JIT regions can't be
* aliased. NO_USER_FREE flag
* covers the entire lifetime
@@ -2184,22 +2164,9 @@
(old_pages - new_pages)<<PAGE_SHIFT, 1);
}
-/**
- * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
- * @kctx: Context the region belongs to
- * @reg: The GPU region or NULL if there isn't one
- * @new_pages: The number of pages after the shrink
- * @old_pages: The number of pages before the shrink
- *
- * Return: 0 on success, negative -errno on error
- *
- * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
- * itself is unmodified as we still need to reserve the VA, only the page tables
- * will be modified by this function.
- */
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
- struct kbase_va_region *const reg,
- u64 const new_pages, u64 const old_pages)
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
+ struct kbase_va_region *const reg, u64 const new_pages,
+ u64 const old_pages)
{
u64 delta = old_pages - new_pages;
struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
@@ -2270,8 +2237,11 @@
if (atomic_read(®->cpu_alloc->kernel_mappings) > 0)
goto out_unlock;
- /* can't grow regions which are ephemeral */
- if (reg->flags & KBASE_REG_DONT_NEED)
+
+ if (kbase_is_region_shrinkable(reg))
+ goto out_unlock;
+
+ if (kbase_va_region_is_no_user_free(kctx, reg))
goto out_unlock;
#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -3051,9 +3021,9 @@
}
}
-static int kbase_vmap_phy_pages(struct kbase_context *kctx,
- struct kbase_va_region *reg, u64 offset_bytes, size_t size,
- struct kbase_vmap_struct *map)
+static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map,
+ kbase_vmap_flag vmap_flags)
{
unsigned long page_index;
unsigned int offset_in_page = offset_bytes & ~PAGE_MASK;
@@ -3064,6 +3034,12 @@
pgprot_t prot;
size_t i;
+ if (WARN_ON(vmap_flags & ~KBASE_VMAP_INPUT_FLAGS))
+ return -EINVAL;
+
+ if (WARN_ON(kbase_is_region_invalid_or_free(reg)))
+ return -EINVAL;
+
if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc)
return -EINVAL;
@@ -3080,6 +3056,17 @@
if (page_index + page_count > kbase_reg_current_backed_size(reg))
return -ENOMEM;
+ if ((vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) &&
+ (page_count > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
+ atomic_read(&kctx->permanent_mapped_pages)))) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages",
+ (u64)page_count, KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
+ atomic_read(&kctx->permanent_mapped_pages));
+ return -ENOMEM;
+ }
+
if (reg->flags & KBASE_REG_DONT_NEED)
return -EINVAL;
@@ -3118,59 +3105,73 @@
map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page);
map->size = size;
- map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
- !kbase_mem_is_imported(map->gpu_alloc->type);
+ map->flags = vmap_flags;
+ if ((reg->flags & KBASE_REG_CPU_CACHED) && !kbase_mem_is_imported(map->gpu_alloc->type))
+ map->flags |= KBASE_VMAP_FLAG_SYNC_NEEDED;
- if (map->sync_needed)
+ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
+ if (vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING)
+ atomic_add(page_count, &kctx->permanent_mapped_pages);
+
kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc);
return 0;
}
+void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr,
+ size_t size, unsigned long prot_request, struct kbase_vmap_struct *map,
+ kbase_vmap_flag vmap_flags)
+{
+ u64 offset_bytes;
+ struct kbase_mem_phy_alloc *cpu_alloc;
+ struct kbase_mem_phy_alloc *gpu_alloc;
+ int err;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ if (WARN_ON(kbase_is_region_invalid_or_free(reg)))
+ return NULL;
+
+ /* check access permissions can be satisfied
+ * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
+ */
+ if ((reg->flags & prot_request) != prot_request)
+ return NULL;
+
+ offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+ cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+ gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+ err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map, vmap_flags);
+ if (err < 0)
+ goto fail_vmap_phy_pages;
+
+ return map->addr;
+
+fail_vmap_phy_pages:
+ kbase_mem_phy_alloc_put(cpu_alloc);
+ kbase_mem_phy_alloc_put(gpu_alloc);
+ return NULL;
+}
+
void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
unsigned long prot_request, struct kbase_vmap_struct *map)
{
struct kbase_va_region *reg;
void *addr = NULL;
- u64 offset_bytes;
- struct kbase_mem_phy_alloc *cpu_alloc;
- struct kbase_mem_phy_alloc *gpu_alloc;
- int err;
kbase_gpu_vm_lock(kctx);
- reg = kbase_region_tracker_find_region_enclosing_address(kctx,
- gpu_addr);
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
if (kbase_is_region_invalid_or_free(reg))
goto out_unlock;
- /* check access permissions can be satisfied
- * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
- */
- if ((reg->flags & prot_request) != prot_request)
- goto out_unlock;
-
- offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
- cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
- gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
-
- err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map);
- if (err < 0)
- goto fail_vmap_phy_pages;
-
- addr = map->addr;
+ addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u);
out_unlock:
kbase_gpu_vm_unlock(kctx);
return addr;
-
-fail_vmap_phy_pages:
- kbase_gpu_vm_unlock(kctx);
- kbase_mem_phy_alloc_put(cpu_alloc);
- kbase_mem_phy_alloc_put(gpu_alloc);
-
- return NULL;
}
void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
@@ -3193,16 +3194,23 @@
vunmap(addr);
- if (map->sync_needed)
+ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+ if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) {
+ size_t page_count = PFN_UP(map->offset_in_page + map->size);
+
+ WARN_ON(page_count > atomic_read(&kctx->permanent_mapped_pages));
+ atomic_sub(page_count, &kctx->permanent_mapped_pages);
+ }
kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc);
+
map->offset_in_page = 0;
map->cpu_pages = NULL;
map->gpu_pages = NULL;
map->addr = NULL;
map->size = 0;
- map->sync_needed = false;
+ map->flags = 0;
}
void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 5e5d991..6dda44b 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -217,6 +217,26 @@
*/
bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+typedef unsigned int kbase_vmap_flag;
+
+/* Sync operations are needed on beginning and ending of access to kernel-mapped GPU memory.
+ *
+ * This is internal to the struct kbase_vmap_struct and should not be passed in by callers of
+ * kbase_vmap-related functions.
+ */
+#define KBASE_VMAP_FLAG_SYNC_NEEDED (((kbase_vmap_flag)1) << 0)
+
+/* Permanently mapped memory accounting (including enforcing limits) should be done on the
+ * kernel-mapped GPU memory.
+ *
+ * This should be used if the kernel mapping is going to live for a potentially long time, for
+ * example if it will persist after the caller has returned.
+ */
+#define KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING (((kbase_vmap_flag)1) << 1)
+
+/* Set of flags that can be passed into kbase_vmap-related functions */
+#define KBASE_VMAP_INPUT_FLAGS (KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING)
+
struct kbase_vmap_struct {
off_t offset_in_page;
struct kbase_mem_phy_alloc *cpu_alloc;
@@ -225,9 +245,55 @@
struct tagged_addr *gpu_pages;
void *addr;
size_t size;
- bool sync_needed;
+ kbase_vmap_flag flags;
};
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx: Context the region belongs to
+ * @reg: The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_vmap_reg - Map part of an existing region into the kernel safely, only if the requested
+ * access permissions are supported
+ * @kctx: Context @reg belongs to
+ * @reg: The GPU region to map part of
+ * @gpu_addr: Start address of VA range to map, which must be within @reg
+ * @size: Size of VA range, which when added to @gpu_addr must be within @reg
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map: Structure to be given to kbase_vunmap() on freeing
+ * @vmap_flags: Flags of type kbase_vmap_flag
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Variant of kbase_vmap_prot() that can be used given an existing region.
+ *
+ * The caller must satisfy one of the following for @reg:
+ * * It must have been obtained by finding it on the region tracker, and the region lock must not
+ * have been released in the mean time.
+ * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region
+ * lock is now held again.
+ * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the
+ * region lock is now held again.
+ *
+ * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS.
+ *
+ * Refer to kbase_vmap_prot() for more information on the operation of this function.
+ */
+void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr,
+ size_t size, unsigned long prot_request, struct kbase_vmap_struct *map,
+ kbase_vmap_flag vmap_flags);
/**
* kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the