| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| From: John Stultz <john.stultz@linaro.org> |
| Date: Thu, 7 May 2020 20:08:13 +0000 |
| Subject: ONHOLD: FROMLIST: dma-buf: system_heap: Add a system-uncached heap |
| re-using the system heap |
| |
| This adds a heap that allocates non-contiguous buffers that are |
| marked as writecombined, so they are not cached by the CPU. |
| |
| This is useful, as most graphics buffers are usually not touched |
| by the CPU or only written into once by the CPU. So when mapping |
| the buffer over and over between devices, we can skip the CPU |
| syncing, which saves a lot of cache management overhead, greatly |
| improving performance. |
| |
| For folk using ION, there was a ION_FLAG_CACHED flag, which |
| signaled if the returned buffer should be CPU cacheable or not. |
| With DMA-BUF heaps, we do not yet have such a flag, and by default |
| the current heaps (system and cma) produce CPU cachable buffers. |
| So for folks transitioning from ION to DMA-BUF Heaps, this fills |
| in some of that missing functionality. |
| |
| There has been a suggestion to make this functionality a flag |
| (DMAHEAP_FLAG_UNCACHED?) on the system heap, similar to how |
| ION used the ION_FLAG_CACHED. But I want to make sure an |
| _UNCACHED flag would truely be a generic attribute across all |
| heaps. So far that has been unclear, so having it as a separate |
| heap seemes better for now. (But I'm open to discussion on this |
| point!) |
| |
| This is a rework of earlier efforts to add a uncached system heap, |
| done utilizing the exisitng system heap, adding just a bit of |
| logic to handle the uncached case. |
| |
| Feedback would be very welcome! |
| |
| Many thanks to Liam Mark for his help to get this working. |
| |
| Pending opensource users of this code include: |
| * AOSP HiKey960 gralloc: |
| - https://android-review.googlesource.com/c/device/linaro/hikey/+/1399519 |
| - Visibly improves performance over the system heap |
| * AOSP Codec2 (possibly, needs more review): |
| - https://android-review.googlesource.com/c/platform/frameworks/av/+/1360640/17/media/codec2/vndk/C2DmaBufAllocator.cpp#325 |
| |
| [CPNOTE: 21/05/21] Lee: Requested status from John on the MLs |
| [CPNOTE: 26/05/21] Lee: Daniel is blocking - no upstream user - John is trying to find one |
| |
| Signed-off-by: John Stultz <john.stultz@linaro.org> |
| Link: https://lore.kernel.org/lkml/20201110034934.70898-8-john.stultz@linaro.org/ |
| Bug: 170887642 |
| Change-Id: I56cb3acf58546c7dfd423a2ce432e3d6d7fd7a69 |
| Signed-off-by: Hridya Valsaraju <hridya@google.com> |
| (cherry picked from commit 79793050fa3eaad4099e259a63622ffea17f0683) |
| Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com> |
| --- |
| drivers/dma-buf/heaps/system_heap.c | 111 ++++++++++++++++++++++++---- |
| 1 file changed, 95 insertions(+), 16 deletions(-) |
| |
| diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c |
| --- a/drivers/dma-buf/heaps/system_heap.c |
| +++ b/drivers/dma-buf/heaps/system_heap.c |
| @@ -22,6 +22,7 @@ |
| #include <linux/vmalloc.h> |
| |
| static struct dma_heap *sys_heap; |
| +static struct dma_heap *sys_uncached_heap; |
| |
| struct system_heap_buffer { |
| struct dma_heap *heap; |
| @@ -31,6 +32,8 @@ struct system_heap_buffer { |
| struct sg_table sg_table; |
| int vmap_cnt; |
| void *vaddr; |
| + |
| + bool uncached; |
| }; |
| |
| struct dma_heap_attachment { |
| @@ -38,6 +41,8 @@ struct dma_heap_attachment { |
| struct sg_table *table; |
| struct list_head list; |
| bool mapped; |
| + |
| + bool uncached; |
| }; |
| |
| #define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO | __GFP_COMP) |
| @@ -101,7 +106,7 @@ static int system_heap_attach(struct dma_buf *dmabuf, |
| a->dev = attachment->dev; |
| INIT_LIST_HEAD(&a->list); |
| a->mapped = false; |
| - |
| + a->uncached = buffer->uncached; |
| attachment->priv = a; |
| |
| mutex_lock(&buffer->lock); |
| @@ -131,9 +136,13 @@ static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attac |
| { |
| struct dma_heap_attachment *a = attachment->priv; |
| struct sg_table *table = a->table; |
| + int attr = 0; |
| int ret; |
| |
| - ret = dma_map_sgtable(attachment->dev, table, direction, 0); |
| + if (a->uncached) |
| + attr = DMA_ATTR_SKIP_CPU_SYNC; |
| + |
| + ret = dma_map_sgtable(attachment->dev, table, direction, attr); |
| if (ret) |
| return ERR_PTR(ret); |
| |
| @@ -146,9 +155,12 @@ static void system_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, |
| enum dma_data_direction direction) |
| { |
| struct dma_heap_attachment *a = attachment->priv; |
| + int attr = 0; |
| |
| + if (a->uncached) |
| + attr = DMA_ATTR_SKIP_CPU_SYNC; |
| a->mapped = false; |
| - dma_unmap_sgtable(attachment->dev, table, direction, 0); |
| + dma_unmap_sgtable(attachment->dev, table, direction, attr); |
| } |
| |
| static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, |
| @@ -162,10 +174,12 @@ static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, |
| if (buffer->vmap_cnt) |
| invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); |
| |
| - list_for_each_entry(a, &buffer->attachments, list) { |
| - if (!a->mapped) |
| - continue; |
| - dma_sync_sgtable_for_cpu(a->dev, a->table, direction); |
| + if (!buffer->uncached) { |
| + list_for_each_entry(a, &buffer->attachments, list) { |
| + if (!a->mapped) |
| + continue; |
| + dma_sync_sgtable_for_cpu(a->dev, a->table, direction); |
| + } |
| } |
| mutex_unlock(&buffer->lock); |
| |
| @@ -183,10 +197,12 @@ static int system_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, |
| if (buffer->vmap_cnt) |
| flush_kernel_vmap_range(buffer->vaddr, buffer->len); |
| |
| - list_for_each_entry(a, &buffer->attachments, list) { |
| - if (!a->mapped) |
| - continue; |
| - dma_sync_sgtable_for_device(a->dev, a->table, direction); |
| + if (!buffer->uncached) { |
| + list_for_each_entry(a, &buffer->attachments, list) { |
| + if (!a->mapped) |
| + continue; |
| + dma_sync_sgtable_for_device(a->dev, a->table, direction); |
| + } |
| } |
| mutex_unlock(&buffer->lock); |
| |
| @@ -201,6 +217,9 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) |
| struct sg_page_iter piter; |
| int ret; |
| |
| + if (buffer->uncached) |
| + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); |
| + |
| for_each_sgtable_page(table, &piter, vma->vm_pgoff) { |
| struct page *page = sg_page_iter_page(&piter); |
| |
| @@ -222,17 +241,21 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer) |
| struct page **pages = vmalloc(sizeof(struct page *) * npages); |
| struct page **tmp = pages; |
| struct sg_page_iter piter; |
| + pgprot_t pgprot = PAGE_KERNEL; |
| void *vaddr; |
| |
| if (!pages) |
| return ERR_PTR(-ENOMEM); |
| |
| + if (buffer->uncached) |
| + pgprot = pgprot_writecombine(PAGE_KERNEL); |
| + |
| for_each_sgtable_page(table, &piter, 0) { |
| WARN_ON(tmp - pages >= npages); |
| *tmp++ = sg_page_iter_page(&piter); |
| } |
| |
| - vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL); |
| + vaddr = vmap(pages, npages, VM_MAP, pgprot); |
| vfree(pages); |
| |
| if (!vaddr) |
| @@ -332,10 +355,11 @@ static struct page *alloc_largest_available(unsigned long size, |
| return NULL; |
| } |
| |
| -static struct dma_buf *system_heap_allocate(struct dma_heap *heap, |
| - unsigned long len, |
| - unsigned long fd_flags, |
| - unsigned long heap_flags) |
| +static struct dma_buf *system_heap_do_allocate(struct dma_heap *heap, |
| + unsigned long len, |
| + unsigned long fd_flags, |
| + unsigned long heap_flags, |
| + bool uncached) |
| { |
| struct system_heap_buffer *buffer; |
| DEFINE_DMA_BUF_EXPORT_INFO(exp_info); |
| @@ -356,6 +380,7 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap, |
| mutex_init(&buffer->lock); |
| buffer->heap = heap; |
| buffer->len = len; |
| + buffer->uncached = uncached; |
| |
| INIT_LIST_HEAD(&pages); |
| i = 0; |
| @@ -401,6 +426,18 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap, |
| ret = PTR_ERR(dmabuf); |
| goto free_pages; |
| } |
| + |
| + /* |
| + * For uncached buffers, we need to initially flush cpu cache, since |
| + * the __GFP_ZERO on the allocation means the zeroing was done by the |
| + * cpu and thus it is likely cached. Map (and implicitly flush) and |
| + * unmap it now so we don't get corruption later on. |
| + */ |
| + if (buffer->uncached) { |
| + dma_map_sgtable(dma_heap_get_dev(heap), table, DMA_BIDIRECTIONAL, 0); |
| + dma_unmap_sgtable(dma_heap_get_dev(heap), table, DMA_BIDIRECTIONAL, 0); |
| + } |
| + |
| return dmabuf; |
| |
| free_pages: |
| @@ -418,10 +455,40 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap, |
| return ERR_PTR(ret); |
| } |
| |
| +static struct dma_buf *system_heap_allocate(struct dma_heap *heap, |
| + unsigned long len, |
| + unsigned long fd_flags, |
| + unsigned long heap_flags) |
| +{ |
| + return system_heap_do_allocate(heap, len, fd_flags, heap_flags, false); |
| +} |
| + |
| static const struct dma_heap_ops system_heap_ops = { |
| .allocate = system_heap_allocate, |
| }; |
| |
| +static struct dma_buf *system_uncached_heap_allocate(struct dma_heap *heap, |
| + unsigned long len, |
| + unsigned long fd_flags, |
| + unsigned long heap_flags) |
| +{ |
| + return system_heap_do_allocate(heap, len, fd_flags, heap_flags, true); |
| +} |
| + |
| +/* Dummy function to be used until we can call coerce_mask_and_coherent */ |
| +static struct dma_buf *system_uncached_heap_not_initialized(struct dma_heap *heap, |
| + unsigned long len, |
| + unsigned long fd_flags, |
| + unsigned long heap_flags) |
| +{ |
| + return ERR_PTR(-EBUSY); |
| +} |
| + |
| +static struct dma_heap_ops system_uncached_heap_ops = { |
| + /* After system_heap_create is complete, we will swap this */ |
| + .allocate = system_uncached_heap_not_initialized, |
| +}; |
| + |
| static int system_heap_create(void) |
| { |
| struct dma_heap_export_info exp_info; |
| @@ -434,6 +501,18 @@ static int system_heap_create(void) |
| if (IS_ERR(sys_heap)) |
| return PTR_ERR(sys_heap); |
| |
| + exp_info.name = "system-uncached"; |
| + exp_info.ops = &system_uncached_heap_ops; |
| + exp_info.priv = NULL; |
| + |
| + sys_uncached_heap = dma_heap_add(&exp_info); |
| + if (IS_ERR(sys_uncached_heap)) |
| + return PTR_ERR(sys_uncached_heap); |
| + |
| + dma_coerce_mask_and_coherent(dma_heap_get_dev(sys_uncached_heap), DMA_BIT_MASK(64)); |
| + mb(); /* make sure we only set allocate after dma_mask is set */ |
| + system_uncached_heap_ops.allocate = system_uncached_heap_allocate; |
| + |
| return 0; |
| } |
| module_init(system_heap_create); |