| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
| /* |
| * |
| * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| /** |
| * DOC: Base kernel MMU management. |
| */ |
| |
| #include <linux/kernel.h> |
| #include <linux/dma-mapping.h> |
| #include <linux/migrate.h> |
| #include <mali_kbase.h> |
| #include <gpu/mali_kbase_gpu_fault.h> |
| #include <hw_access/mali_kbase_hw_access_regmap.h> |
| #include <tl/mali_kbase_tracepoints.h> |
| #include <backend/gpu/mali_kbase_instr_defs.h> |
| #include <mali_kbase_ctx_sched.h> |
| #include <mali_kbase_debug.h> |
| #include <mali_kbase_defs.h> |
| #include <mali_kbase_hw.h> |
| #include <mmu/mali_kbase_mmu_hw.h> |
| #include <mali_kbase_mem.h> |
| #include <mali_kbase_reset_gpu.h> |
| #include <mmu/mali_kbase_mmu.h> |
| #include <mmu/mali_kbase_mmu_internal.h> |
| #include <mali_kbase_cs_experimental.h> |
| #include <device/mali_kbase_device.h> |
| #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h> |
| #if !MALI_USE_CSF |
| #include <mali_kbase_hwaccess_jm.h> |
| #endif |
| #include <linux/version_compat_defs.h> |
| |
| #include <mali_kbase_trace_gpu_mem.h> |
| #include <backend/gpu/mali_kbase_pm_internal.h> |
| |
| /* Threshold used to decide whether to flush full caches or just a physical range */ |
| #define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20 |
| #define MGM_DEFAULT_PTE_GROUP (0) |
| |
| /* Macro to convert updated PDGs to flags indicating levels skip in flush */ |
| #define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds)&0xF) |
| |
| static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| const u64 start_vpfn, struct tagged_addr *phys, size_t nr, |
| unsigned long flags, int const group_id, u64 *dirty_pgds, |
| struct kbase_va_region *reg, bool ignore_page_migration); |
| |
| /* Small wrapper function to factor out GPU-dependent context releasing */ |
| static void release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) |
| { |
| #if MALI_USE_CSF |
| CSTD_UNUSED(kbdev); |
| kbase_ctx_sched_release_ctx_lock(kctx); |
| #else /* MALI_USE_CSF */ |
| kbasep_js_runpool_release_ctx(kbdev, kctx); |
| #endif /* MALI_USE_CSF */ |
| } |
| |
| /** |
| * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done |
| * through GPU_CONTROL interface. |
| * |
| * @kbdev: kbase device to check GPU model ID on. |
| * |
| * This function returns whether a cache flush for page table update should |
| * run through GPU_CONTROL interface or MMU_AS_CONTROL interface. |
| * |
| * Return: True if cache flush should be done on GPU command. |
| */ |
| static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) |
| { |
| return kbdev->gpu_props.gpu_id.arch_major > 11; |
| } |
| |
| /** |
| * mmu_flush_pa_range() - Flush physical address range |
| * |
| * @kbdev: kbase device to issue the MMU operation on. |
| * @phys: Starting address of the physical range to start the operation on. |
| * @nr_bytes: Number of bytes to work on. |
| * @op: Type of cache flush operation to perform. |
| * |
| * Issue a cache flush physical range command. |
| */ |
| #if MALI_USE_CSF |
| static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes, |
| enum kbase_mmu_op_type op) |
| { |
| u32 flush_op; |
| |
| lockdep_assert_held(&kbdev->hwaccess_lock); |
| |
| /* Translate operation to command */ |
| if (op == KBASE_MMU_OP_FLUSH_PT) |
| flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2; |
| else if (op == KBASE_MMU_OP_FLUSH_MEM) |
| flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; |
| else { |
| dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); |
| return; |
| } |
| |
| if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op)) |
| dev_err(kbdev->dev, "Flush for physical address range did not complete"); |
| } |
| #endif |
| |
| /** |
| * mmu_invalidate() - Perform an invalidate operation on MMU caches. |
| * @kbdev: The Kbase device. |
| * @kctx: The Kbase context. |
| * @as_nr: GPU address space number for which invalidate is required. |
| * @op_param: Non-NULL pointer to struct containing information about the MMU |
| * operation to perform. |
| * |
| * Perform an MMU invalidate operation on a particual address space |
| * by issuing a UNLOCK command. |
| */ |
| static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, |
| const struct kbase_mmu_hw_op_param *op_param) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| |
| if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) { |
| as_nr = kctx ? kctx->as_nr : as_nr; |
| if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param)) |
| dev_err(kbdev->dev, |
| "Invalidate after GPU page table update did not complete"); |
| } |
| |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| } |
| |
| /* Perform a flush/invalidate on a particular address space |
| */ |
| static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, |
| const struct kbase_mmu_hw_op_param *op_param) |
| { |
| unsigned long flags; |
| |
| /* AS transaction begin */ |
| mutex_lock(&kbdev->mmu_hw_mutex); |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| |
| if (kbdev->pm.backend.gpu_ready && kbase_mmu_hw_do_flush(kbdev, as, op_param)) |
| dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); |
| |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| mutex_unlock(&kbdev->mmu_hw_mutex); |
| /* AS transaction end */ |
| } |
| |
| /** |
| * mmu_flush_invalidate() - Perform a flush operation on GPU caches. |
| * @kbdev: The Kbase device. |
| * @kctx: The Kbase context. |
| * @as_nr: GPU address space number for which flush + invalidate is required. |
| * @op_param: Non-NULL pointer to struct containing information about the MMU |
| * operation to perform. |
| * |
| * This function performs the cache flush operation described by @op_param. |
| * The function retains a reference to the given @kctx and releases it |
| * after performing the flush operation. |
| * |
| * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue |
| * a cache flush + invalidate to the L2 caches and invalidate the TLBs. |
| * |
| * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue |
| * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as |
| * invalidating the TLBs. |
| */ |
| static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, |
| const struct kbase_mmu_hw_op_param *op_param) |
| { |
| bool ctx_is_in_runpool; |
| |
| /* Early out if there is nothing to do */ |
| if (op_param->nr == 0) |
| return; |
| |
| /* If no context is provided then MMU operation is performed on address |
| * space which does not belong to user space context. Otherwise, retain |
| * refcount to context provided and release after flush operation. |
| */ |
| if (!kctx) { |
| mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param); |
| } else { |
| #if !MALI_USE_CSF |
| rt_mutex_lock(&kbdev->js_data.queue_mutex); |
| ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); |
| rt_mutex_unlock(&kbdev->js_data.queue_mutex); |
| #else |
| ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); |
| #endif /* !MALI_USE_CSF */ |
| |
| if (ctx_is_in_runpool) { |
| KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); |
| |
| mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param); |
| |
| release_ctx(kbdev, kctx); |
| } |
| } |
| } |
| |
| /** |
| * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via |
| * the GPU_CONTROL interface |
| * @kbdev: The Kbase device. |
| * @kctx: The Kbase context. |
| * @as_nr: GPU address space number for which flush + invalidate is required. |
| * @op_param: Non-NULL pointer to struct containing information about the MMU |
| * operation to perform. |
| * |
| * Perform a flush/invalidate on a particular address space via the GPU_CONTROL |
| * interface. |
| */ |
| static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, |
| int as_nr, |
| const struct kbase_mmu_hw_op_param *op_param) |
| { |
| unsigned long flags; |
| |
| /* AS transaction begin */ |
| mutex_lock(&kbdev->mmu_hw_mutex); |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| |
| if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) { |
| as_nr = kctx ? kctx->as_nr : as_nr; |
| if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param)) |
| dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); |
| } |
| |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| mutex_unlock(&kbdev->mmu_hw_mutex); |
| } |
| |
| static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx, |
| phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op) |
| { |
| kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op); |
| } |
| |
| static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size) |
| { |
| /* Ensure that the GPU can read the pages from memory. |
| * |
| * pixel: b/200555454 requires this sync to happen even if the system |
| * is coherent. |
| */ |
| dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE); |
| } |
| |
| /** |
| * kbase_mmu_sync_pgd() - sync page directory to memory when needed. |
| * @kbdev: Device pointer. |
| * @kctx: Context pointer. |
| * @phys: Starting physical address of the destination region. |
| * @handle: Address of DMA region. |
| * @size: Size of the region to sync. |
| * @flush_op: MMU cache flush operation to perform on the physical address |
| * range, if GPU control is available. |
| * |
| * This function is called whenever the association between a virtual address |
| * range and a physical address range changes, because a mapping is created or |
| * destroyed. |
| * One of the effects of this operation is performing an MMU cache flush |
| * operation only on the physical address range affected by this function, if |
| * GPU control is available. |
| * |
| * This should be called after each page directory update. |
| */ |
| static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx, |
| phys_addr_t phys, dma_addr_t handle, size_t size, |
| enum kbase_mmu_op_type flush_op) |
| { |
| kbase_mmu_sync_pgd_cpu(kbdev, handle, size); |
| kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op); |
| } |
| |
| /* |
| * Definitions: |
| * - PGD: Page Directory. |
| * - PTE: Page Table Entry. A 64bit value pointing to the next |
| * level of translation |
| * - ATE: Address Translation Entry. A 64bit value pointing to |
| * a 4kB physical page. |
| */ |
| |
| /** |
| * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and |
| * free memory of the page directories |
| * |
| * @kbdev: Device pointer. |
| * @mmut: GPU MMU page table. |
| * @pgds: Physical addresses of page directories to be freed. |
| * @vpfn: The virtual page frame number. |
| * @level: The level of MMU page table. |
| * @flush_op: The type of MMU flush operation to perform. |
| * @dirty_pgds: Flags to track every level where a PGD has been updated. |
| */ |
| static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, |
| struct kbase_mmu_table *mmut, phys_addr_t *pgds, |
| u64 vpfn, int level, |
| enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); |
| |
| static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) |
| { |
| atomic_sub(1, &kbdev->memdev.used_pages); |
| |
| /* If MMU tables belong to a context then pages will have been accounted |
| * against it, so we must decrement the usage counts here. |
| */ |
| if (mmut->kctx) { |
| kbase_process_page_usage_dec(mmut->kctx, 1); |
| atomic_sub(1, &mmut->kctx->used_pages); |
| } |
| |
| kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); |
| } |
| |
| static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, |
| struct kbase_mmu_table *mmut, struct page *p) |
| { |
| struct kbase_page_metadata *page_md = kbase_page_private(p); |
| bool page_is_isolated = false; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| if (!kbase_is_page_migration_enabled()) |
| return false; |
| |
| spin_lock(&page_md->migrate_lock); |
| if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) { |
| WARN_ON_ONCE(!mmut->kctx); |
| if (IS_PAGE_ISOLATED(page_md->status)) { |
| page_md->status = |
| PAGE_STATUS_SET(page_md->status, FREE_PT_ISOLATED_IN_PROGRESS); |
| page_md->data.free_pt_isolated.kbdev = kbdev; |
| page_is_isolated = true; |
| } else { |
| page_md->status = PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); |
| } |
| } else if ((PAGE_STATUS_GET(page_md->status) == FREE_IN_PROGRESS) || |
| (PAGE_STATUS_GET(page_md->status) == ALLOCATE_IN_PROGRESS)) { |
| /* Nothing to do - fall through */ |
| } else { |
| WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); |
| } |
| spin_unlock(&page_md->migrate_lock); |
| |
| if (unlikely(page_is_isolated)) { |
| /* Do the CPU cache flush and accounting here for the isolated |
| * PGD page, which is done inside kbase_mmu_free_pgd() for the |
| * PGD page that did not get isolated. |
| */ |
| dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE, |
| DMA_BIDIRECTIONAL); |
| kbase_mmu_account_freed_pgd(kbdev, mmut); |
| } |
| |
| return page_is_isolated; |
| } |
| |
| /** |
| * kbase_mmu_free_pgd() - Free memory of the page directory |
| * |
| * @kbdev: Device pointer. |
| * @mmut: GPU MMU page table. |
| * @pgd: Physical address of page directory to be freed. |
| * |
| * This function is supposed to be called with mmu_lock held and after |
| * ensuring that the GPU won't be able to access the page. |
| */ |
| static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| phys_addr_t pgd) |
| { |
| struct page *p; |
| bool page_is_isolated = false; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| p = pfn_to_page(PFN_DOWN(pgd)); |
| page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); |
| |
| if (likely(!page_is_isolated)) { |
| kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); |
| kbase_mmu_account_freed_pgd(kbdev, mmut); |
| } |
| } |
| |
| /** |
| * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list |
| * |
| * @kbdev: Device pointer. |
| * @mmut: GPU MMU page table. |
| * |
| * This function will call kbase_mmu_free_pgd() on each page directory page |
| * present in the list of free PGDs inside @mmut. |
| * |
| * The function is supposed to be called after the GPU cache and MMU TLB has |
| * been invalidated post the teardown loop. |
| * |
| * The mmu_lock shall be held prior to calling the function. |
| */ |
| static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) |
| { |
| size_t i; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++) |
| kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i])); |
| |
| mmut->scratch_mem.free_pgds.head_index = 0; |
| } |
| |
| static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p) |
| { |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1))) |
| return; |
| |
| mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p; |
| } |
| |
| static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut) |
| { |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| mmut->scratch_mem.free_pgds.head_index = 0; |
| } |
| |
| /** |
| * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to |
| * a region on a GPU page fault |
| * @kbdev: KBase device |
| * @reg: The region that will be backed with more pages |
| * @fault_rel_pfn: PFN of the fault relative to the start of the region |
| * |
| * This calculates how much to increase the backing of a region by, based on |
| * where a GPU page fault occurred and the flags in the region. |
| * |
| * This can be more than the minimum number of pages that would reach |
| * @fault_rel_pfn, for example to reduce the overall rate of page fault |
| * interrupts on a region, or to ensure that the end address is aligned. |
| * |
| * Return: the number of backed pages to increase by |
| */ |
| static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, struct kbase_va_region *reg, |
| size_t fault_rel_pfn) |
| { |
| size_t multiple = reg->extension; |
| size_t reg_current_size = kbase_reg_current_backed_size(reg); |
| size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; |
| size_t remainder; |
| |
| if (!multiple) { |
| dev_warn( |
| kbdev->dev, |
| "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW", |
| ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); |
| return minimum_extra; |
| } |
| |
| /* Calculate the remainder to subtract from minimum_extra to make it |
| * the desired (rounded down) multiple of the extension. |
| * Depending on reg's flags, the base used for calculating multiples is |
| * different |
| */ |
| |
| /* multiple is based from the current backed size, even if the |
| * current backed size/pfn for end of committed memory are not |
| * themselves aligned to multiple |
| */ |
| remainder = minimum_extra % multiple; |
| |
| #if !MALI_USE_CSF |
| if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { |
| /* multiple is based from the top of the initial commit, which |
| * has been allocated in such a way that (start_pfn + |
| * initial_commit) is already aligned to multiple. Hence the |
| * pfn for the end of committed memory will also be aligned to |
| * multiple |
| */ |
| size_t initial_commit = reg->initial_commit; |
| |
| if (fault_rel_pfn < initial_commit) { |
| /* this case is just to catch in case it's been |
| * recommitted by userspace to be smaller than the |
| * initial commit |
| */ |
| minimum_extra = initial_commit - reg_current_size; |
| remainder = 0; |
| } else { |
| /* same as calculating |
| * (fault_rel_pfn - initial_commit + 1) |
| */ |
| size_t pages_after_initial = |
| minimum_extra + reg_current_size - initial_commit; |
| |
| remainder = pages_after_initial % multiple; |
| } |
| } |
| #endif /* !MALI_USE_CSF */ |
| |
| if (remainder == 0) |
| return minimum_extra; |
| |
| return minimum_extra + multiple - remainder; |
| } |
| |
| #ifdef CONFIG_MALI_CINSTR_GWT |
| static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, |
| struct kbase_as *faulting_as, u64 start_pfn, |
| size_t nr, u32 kctx_id, u64 dirty_pgds) |
| { |
| /* Calls to this function are inherently synchronous, with respect to |
| * MMU operations. |
| */ |
| const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; |
| struct kbase_mmu_hw_op_param op_param; |
| unsigned long irq_flags; |
| int ret = 0; |
| |
| kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| |
| /* flush L2 and unlock the VA (resumes the MMU) */ |
| op_param.vpfn = start_pfn; |
| op_param.nr = nr; |
| op_param.op = KBASE_MMU_OP_FLUSH_PT; |
| op_param.kctx_id = kctx_id; |
| op_param.mmu_sync_info = mmu_sync_info; |
| spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); |
| if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { |
| op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); |
| ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); |
| } else { |
| ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); |
| } |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); |
| |
| if (ret) |
| dev_err(kbdev->dev, |
| "Flush for GPU page fault due to write access did not complete"); |
| |
| kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| } |
| |
| static void set_gwt_element_page_addr_and_size(struct kbasep_gwt_list_element *element, |
| u64 fault_page_addr, struct tagged_addr fault_phys) |
| { |
| u64 fault_pfn = fault_page_addr >> PAGE_SHIFT; |
| unsigned int vindex = fault_pfn & (NUM_PAGES_IN_2MB_LARGE_PAGE - 1); |
| |
| /* If the fault address lies within a 2MB page, then consider |
| * the whole 2MB page for dumping to avoid incomplete dumps. |
| */ |
| if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) { |
| element->page_addr = fault_page_addr & ~(SZ_2M - 1UL); |
| element->num_pages = NUM_PAGES_IN_2MB_LARGE_PAGE; |
| } else { |
| element->page_addr = fault_page_addr; |
| element->num_pages = 1; |
| } |
| } |
| |
| static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, |
| struct kbase_as *faulting_as) |
| { |
| struct kbasep_gwt_list_element *pos; |
| struct kbase_va_region *region; |
| struct kbase_device *kbdev; |
| struct tagged_addr *fault_phys_addr; |
| struct kbase_fault *fault; |
| u64 fault_pfn, pfn_offset; |
| unsigned int as_no; |
| u64 dirty_pgds = 0; |
| |
| as_no = faulting_as->number; |
| kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); |
| fault = &faulting_as->pf_data; |
| fault_pfn = fault->addr >> PAGE_SHIFT; |
| |
| kbase_gpu_vm_lock(kctx); |
| |
| /* Find region and check if it should be writable. */ |
| region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr); |
| if (kbase_is_region_invalid_or_free(region)) { |
| kbase_gpu_vm_unlock(kctx); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, |
| "Memory is not mapped on the GPU", |
| &faulting_as->pf_data); |
| return; |
| } |
| |
| if (!(region->flags & KBASE_REG_GPU_WR)) { |
| kbase_gpu_vm_unlock(kctx); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, |
| "Region does not have write permissions", |
| &faulting_as->pf_data); |
| return; |
| } |
| |
| pfn_offset = fault_pfn - region->start_pfn; |
| fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset]; |
| |
| /* Capture addresses of faulting write location |
| * for job dumping if write tracking is enabled. |
| */ |
| if (kctx->gwt_enabled) { |
| u64 fault_page_addr = fault->addr & PAGE_MASK; |
| bool found = false; |
| /* Check if this write was already handled. */ |
| list_for_each_entry(pos, &kctx->gwt_current_list, link) { |
| if (fault_page_addr == pos->page_addr) { |
| found = true; |
| break; |
| } |
| } |
| |
| if (!found) { |
| pos = kmalloc(sizeof(*pos), GFP_KERNEL); |
| if (pos) { |
| pos->region = region; |
| set_gwt_element_page_addr_and_size(pos, fault_page_addr, |
| *fault_phys_addr); |
| list_add(&pos->link, &kctx->gwt_current_list); |
| } else { |
| dev_warn(kbdev->dev, "kmalloc failure"); |
| } |
| } |
| } |
| |
| /* Now make this faulting page writable to GPU. */ |
| kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1, |
| region->flags, region->gpu_alloc->group_id, &dirty_pgds); |
| |
| kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, kctx->id, |
| dirty_pgds); |
| |
| kbase_gpu_vm_unlock(kctx); |
| } |
| |
| static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, |
| struct kbase_as *faulting_as) |
| { |
| struct kbase_fault *fault = &faulting_as->pf_data; |
| |
| switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) { |
| case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: |
| case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: |
| kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); |
| break; |
| case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE: |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Execute Permission fault", |
| fault); |
| break; |
| case AS_FAULTSTATUS_ACCESS_TYPE_READ: |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Read Permission fault", fault); |
| break; |
| default: |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Unknown Permission fault", |
| fault); |
| break; |
| } |
| } |
| #endif |
| |
| /** |
| * estimate_pool_space_required - Determine how much a pool should be grown by to support a future |
| * allocation |
| * @pool: The memory pool to check, including its linked pools |
| * @pages_required: Number of small pages require for the pool to support a future allocation |
| * |
| * The value returned is accounting for the size of @pool and the size of each memory pool linked to |
| * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to |
| * allocate from. |
| * |
| * Note: this is only an estimate, because even during the calculation the memory pool(s) involved |
| * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an |
| * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller |
| * should keep attempting an allocation and then re-growing with a new value queried form this |
| * function until the allocation succeeds. |
| * |
| * Return: an estimate of the amount of extra small pages in @pool that are required to satisfy an |
| * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the |
| * allocation. |
| */ |
| static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required) |
| { |
| size_t pages_still_required; |
| |
| for (pages_still_required = pages_required; pool != NULL && pages_still_required; |
| pool = pool->next_pool) { |
| size_t pool_size_small; |
| |
| kbase_mem_pool_lock(pool); |
| |
| pool_size_small = kbase_mem_pool_size(pool) << pool->order; |
| if (pool_size_small >= pages_still_required) |
| pages_still_required = 0; |
| else |
| pages_still_required -= pool_size_small; |
| |
| kbase_mem_pool_unlock(pool); |
| } |
| return pages_still_required; |
| } |
| |
| /** |
| * page_fault_try_alloc - Try to allocate memory from a context pool |
| * @kctx: Context pointer |
| * @region: Region to grow |
| * @new_pages: Number of small pages to allocate |
| * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be |
| * either small or 2 MiB pages, depending on the number of pages requested. |
| * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for |
| * pool of small pages. |
| * @prealloc_sas: Pointer to kbase_sub_alloc structures |
| * |
| * This function will try to allocate as many pages as possible from the context pool, then if |
| * required will try to allocate the remaining pages from the device pool. |
| * |
| * This function will not allocate any new memory beyond that is already present in the context or |
| * device pools. This is because it is intended to be called whilst the thread has acquired the |
| * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is |
| * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close(). |
| * |
| * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB |
| * pages, otherwise it will be a count of small pages. |
| * |
| * Return: true if successful, false on failure |
| */ |
| static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_region *region, |
| size_t new_pages, size_t *pages_to_grow, bool *grow_2mb_pool, |
| struct kbase_sub_alloc **prealloc_sas) |
| { |
| size_t total_gpu_pages_alloced = 0; |
| size_t total_cpu_pages_alloced = 0; |
| struct kbase_mem_pool *pool, *root_pool; |
| bool alloc_failed = false; |
| size_t pages_still_required; |
| size_t total_mempools_free_small = 0; |
| |
| lockdep_assert_held(&kctx->reg_lock); |
| lockdep_assert_held(&kctx->mem_partials_lock); |
| |
| if (WARN_ON(region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { |
| /* Do not try to grow the memory pool */ |
| *pages_to_grow = 0; |
| return false; |
| } |
| |
| if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) { |
| root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; |
| *grow_2mb_pool = true; |
| } else { |
| root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; |
| *grow_2mb_pool = false; |
| } |
| |
| if (region->gpu_alloc != region->cpu_alloc) |
| new_pages *= 2; |
| |
| /* Determine how many pages are in the pools before trying to allocate. |
| * Don't attempt to allocate & free if the allocation can't succeed. |
| */ |
| pages_still_required = estimate_pool_space_required(root_pool, new_pages); |
| |
| if (pages_still_required) { |
| /* Insufficient pages in pools. Don't try to allocate - just |
| * request a grow. |
| */ |
| *pages_to_grow = pages_still_required; |
| |
| return false; |
| } |
| |
| /* Since we're not holding any of the mempool locks, the amount of memory in the pools may |
| * change between the above estimate and the actual allocation. |
| */ |
| pages_still_required = new_pages; |
| for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) { |
| size_t pool_size_small; |
| size_t pages_to_alloc_small; |
| size_t pages_to_alloc_small_per_alloc; |
| |
| kbase_mem_pool_lock(pool); |
| |
| /* Allocate as much as possible from this pool*/ |
| pool_size_small = kbase_mem_pool_size(pool) << pool->order; |
| total_mempools_free_small += pool_size_small; |
| pages_to_alloc_small = MIN(pages_still_required, pool_size_small); |
| if (region->gpu_alloc == region->cpu_alloc) |
| pages_to_alloc_small_per_alloc = pages_to_alloc_small; |
| else |
| pages_to_alloc_small_per_alloc = pages_to_alloc_small >> 1; |
| |
| if (pages_to_alloc_small) { |
| struct tagged_addr *gpu_pages = kbase_alloc_phy_pages_helper_locked( |
| region->gpu_alloc, pool, pages_to_alloc_small_per_alloc, |
| &prealloc_sas[0]); |
| |
| if (!gpu_pages) |
| alloc_failed = true; |
| else |
| total_gpu_pages_alloced += pages_to_alloc_small_per_alloc; |
| |
| if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) { |
| struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked( |
| region->cpu_alloc, pool, pages_to_alloc_small_per_alloc, |
| &prealloc_sas[1]); |
| |
| if (!cpu_pages) |
| alloc_failed = true; |
| else |
| total_cpu_pages_alloced += pages_to_alloc_small_per_alloc; |
| } |
| } |
| |
| kbase_mem_pool_unlock(pool); |
| |
| if (alloc_failed) { |
| WARN_ON(!pages_still_required); |
| WARN_ON(pages_to_alloc_small >= pages_still_required); |
| WARN_ON(pages_to_alloc_small_per_alloc >= pages_still_required); |
| break; |
| } |
| |
| pages_still_required -= pages_to_alloc_small; |
| } |
| |
| if (pages_still_required) { |
| /* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation, |
| * so must in any case use kbase_free_phy_pages_helper() rather than |
| * kbase_free_phy_pages_helper_locked() |
| */ |
| if (total_gpu_pages_alloced > 0) |
| kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced); |
| if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0) |
| kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced); |
| |
| if (alloc_failed) { |
| /* Note that in allocating from the above memory pools, we always ensure |
| * never to request more than is available in each pool with the pool's |
| * lock held. Hence failing to allocate in such situations would be unusual |
| * and we should cancel the growth instead (as re-growing the memory pool |
| * might not fix the situation) |
| */ |
| dev_warn( |
| kctx->kbdev->dev, |
| "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available", |
| new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced, |
| total_mempools_free_small); |
| *pages_to_grow = 0; |
| } else { |
| /* Tell the caller to try to grow the memory pool |
| * |
| * Freeing pages above may have spilled or returned them to the OS, so we |
| * have to take into account how many are still in the pool before giving a |
| * new estimate for growth required of the pool. We can just re-estimate a |
| * new value. |
| */ |
| pages_still_required = estimate_pool_space_required(root_pool, new_pages); |
| if (pages_still_required) { |
| *pages_to_grow = pages_still_required; |
| } else { |
| /* It's possible another thread could've grown the pool to be just |
| * big enough after we rolled back the allocation. Request at least |
| * one more page to ensure the caller doesn't fail the growth by |
| * conflating it with the alloc_failed case above |
| */ |
| *pages_to_grow = 1u; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* Allocation was successful. No pages to grow, return success. */ |
| *pages_to_grow = 0; |
| |
| return true; |
| } |
| |
| void kbase_mmu_page_fault_worker(struct work_struct *data) |
| { |
| u64 fault_pfn; |
| u32 fault_status; |
| size_t new_pages; |
| size_t fault_rel_pfn; |
| struct kbase_as *faulting_as; |
| unsigned int as_no; |
| struct kbase_context *kctx; |
| struct kbase_device *kbdev; |
| struct kbase_va_region *region; |
| struct kbase_fault *fault; |
| int err; |
| bool grown = false; |
| size_t pages_to_grow; |
| bool grow_2mb_pool; |
| struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; |
| int i; |
| size_t current_backed_size; |
| #if MALI_JIT_PRESSURE_LIMIT_BASE |
| size_t pages_trimmed = 0; |
| #endif |
| unsigned long hwaccess_flags; |
| |
| /* Calls to this function are inherently synchronous, with respect to |
| * MMU operations. |
| */ |
| const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; |
| |
| faulting_as = container_of(data, struct kbase_as, work_pagefault); |
| fault = &faulting_as->pf_data; |
| fault_pfn = fault->addr >> PAGE_SHIFT; |
| as_no = faulting_as->number; |
| |
| kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); |
| dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %u", __func__, (void *)data, |
| fault_pfn, as_no); |
| |
| /* Grab the context that was already refcounted in kbase_mmu_interrupt() |
| * Therefore, it cannot be scheduled out of this AS until we explicitly |
| * release it |
| */ |
| kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); |
| if (!kctx) { |
| atomic_dec(&kbdev->faults_pending); |
| return; |
| } |
| |
| KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); |
| |
| #if MALI_JIT_PRESSURE_LIMIT_BASE |
| #if !MALI_USE_CSF |
| rt_mutex_lock(&kctx->jctx.lock); |
| #endif |
| #endif |
| |
| #ifdef CONFIG_MALI_ARBITER_SUPPORT |
| /* check if we still have GPU */ |
| if (unlikely(kbase_is_gpu_removed(kbdev))) { |
| dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__); |
| goto fault_done; |
| } |
| #endif |
| |
| if (unlikely(fault->protected_mode)) { |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault); |
| kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| |
| goto fault_done; |
| } |
| |
| fault_status = fault->status; |
| switch (AS_FAULTSTATUS_EXCEPTION_TYPE_GET(fault_status)) { |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_3: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_4: |
| #if !MALI_USE_CSF |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_IDENTITY: |
| #endif |
| /* need to check against the region to handle this one */ |
| break; |
| |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_0: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_3: |
| #ifdef CONFIG_MALI_CINSTR_GWT |
| /* If GWT was ever enabled then we need to handle |
| * write fault pages even if the feature was disabled later. |
| */ |
| if (kctx->gwt_was_enabled) { |
| kbase_gpu_mmu_handle_permission_fault(kctx, faulting_as); |
| goto fault_done; |
| } |
| #endif |
| |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault); |
| goto fault_done; |
| |
| #if !MALI_USE_CSF |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_0: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_3: |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Translation table bus fault", |
| fault); |
| goto fault_done; |
| #endif |
| |
| #if !MALI_USE_CSF |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_0: |
| fallthrough; |
| #endif |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_3: |
| /* nothing to do, but we don't expect this fault currently */ |
| dev_warn(kbdev->dev, "Access flag unexpectedly set"); |
| goto fault_done; |
| |
| #if MALI_USE_CSF |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN: |
| fallthrough; |
| #else |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN0: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN3: |
| fallthrough; |
| #endif |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT0: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT3: |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Address size fault", fault); |
| goto fault_done; |
| |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3: |
| #if !MALI_USE_CSF |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_0: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_1: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_2: |
| fallthrough; |
| case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_3: |
| #endif |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory attributes fault", |
| fault); |
| goto fault_done; |
| |
| default: |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Unknown fault code", fault); |
| goto fault_done; |
| } |
| |
| page_fault_retry: |
| if (kbdev->pagesize_2mb) { |
| /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ |
| for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { |
| if (!prealloc_sas[i]) { |
| prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); |
| |
| if (!prealloc_sas[i]) { |
| kbase_mmu_report_fault_and_kill( |
| kctx, faulting_as, |
| "Failed pre-allocating memory for sub-allocations' metadata", |
| fault); |
| goto fault_done; |
| } |
| } |
| } |
| } |
| |
| /* so we have a translation fault, |
| * let's see if it is for growable memory |
| */ |
| kbase_gpu_vm_lock(kctx); |
| |
| region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr); |
| if (kbase_is_region_invalid_or_free(region)) { |
| kbase_gpu_vm_unlock(kctx); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, |
| "Memory is not mapped on the GPU", fault); |
| goto fault_done; |
| } |
| |
| if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { |
| kbase_gpu_vm_unlock(kctx); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, |
| "DMA-BUF is not mapped on the GPU", fault); |
| goto fault_done; |
| } |
| |
| if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { |
| kbase_gpu_vm_unlock(kctx); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Bad physical memory group ID", |
| fault); |
| goto fault_done; |
| } |
| |
| if ((region->flags & GROWABLE_FLAGS_REQUIRED) != GROWABLE_FLAGS_REQUIRED) { |
| kbase_gpu_vm_unlock(kctx); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory is not growable", fault); |
| goto fault_done; |
| } |
| |
| if ((region->flags & KBASE_REG_DONT_NEED)) { |
| kbase_gpu_vm_unlock(kctx); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, |
| "Don't need memory can't be grown", fault); |
| goto fault_done; |
| } |
| |
| if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == AS_FAULTSTATUS_ACCESS_TYPE_READ) |
| dev_warn(kbdev->dev, "Grow on pagefault while reading"); |
| |
| /* find the size we need to grow it by |
| * we know the result fit in a size_t due to |
| * kbase_region_tracker_find_region_enclosing_address |
| * validating the fault_address to be within a size_t from the start_pfn |
| */ |
| fault_rel_pfn = fault_pfn - region->start_pfn; |
| |
| current_backed_size = kbase_reg_current_backed_size(region); |
| |
| if (fault_rel_pfn < current_backed_size) { |
| struct kbase_mmu_hw_op_param op_param; |
| |
| dev_dbg(kbdev->dev, |
| "Page fault @ VA 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", |
| fault->addr, region->start_pfn, region->start_pfn + current_backed_size); |
| |
| kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| /* [1] in case another page fault occurred while we were |
| * handling the (duplicate) page fault we need to ensure we |
| * don't loose the other page fault as result of us clearing |
| * the MMU IRQ. Therefore, after we clear the MMU IRQ we send |
| * an UNLOCK command that will retry any stalled memory |
| * transaction (which should cause the other page fault to be |
| * raised again). |
| */ |
| op_param.mmu_sync_info = mmu_sync_info; |
| op_param.kctx_id = kctx->id; |
| /* Can safely skip the invalidate for all levels in case |
| * of duplicate page faults. |
| */ |
| op_param.flush_skip_levels = 0xF; |
| op_param.vpfn = fault_pfn; |
| op_param.nr = 1; |
| spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); |
| err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); |
| |
| if (err) { |
| dev_err(kbdev->dev, |
| "Invalidation for MMU did not complete on handling page fault @ VA 0x%llx", |
| fault->addr); |
| } |
| |
| kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| kbase_gpu_vm_unlock(kctx); |
| |
| goto fault_done; |
| } |
| |
| new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); |
| |
| /* cap to max vsize */ |
| new_pages = min(new_pages, region->nr_pages - current_backed_size); |
| dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages); |
| |
| if (new_pages == 0) { |
| struct kbase_mmu_hw_op_param op_param; |
| |
| /* Duplicate of a fault we've already handled, nothing to do */ |
| kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| |
| /* See comment [1] about UNLOCK usage */ |
| op_param.mmu_sync_info = mmu_sync_info; |
| op_param.kctx_id = kctx->id; |
| /* Can safely skip the invalidate for all levels in case |
| * of duplicate page faults. |
| */ |
| op_param.flush_skip_levels = 0xF; |
| op_param.vpfn = fault_pfn; |
| op_param.nr = 1; |
| spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); |
| err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); |
| |
| if (err) { |
| dev_err(kbdev->dev, |
| "Invalidation for MMU did not complete on handling page fault @ VA 0x%llx", |
| fault->addr); |
| } |
| |
| kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| kbase_gpu_vm_unlock(kctx); |
| goto fault_done; |
| } |
| |
| pages_to_grow = 0; |
| |
| #if MALI_JIT_PRESSURE_LIMIT_BASE |
| if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) { |
| kbase_jit_request_phys_increase(kctx, new_pages); |
| pages_trimmed = new_pages; |
| } |
| #endif |
| |
| spin_lock(&kctx->mem_partials_lock); |
| grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, &grow_2mb_pool, |
| prealloc_sas); |
| spin_unlock(&kctx->mem_partials_lock); |
| |
| if (grown) { |
| u64 dirty_pgds = 0; |
| u64 pfn_offset; |
| struct kbase_mmu_hw_op_param op_param; |
| |
| /* alloc success */ |
| WARN_ON(kbase_reg_current_backed_size(region) > region->nr_pages); |
| |
| /* set up the new pages */ |
| pfn_offset = kbase_reg_current_backed_size(region) - new_pages; |
| /* |
| * Note: |
| * Issuing an MMU operation will unlock the MMU and cause the |
| * translation to be replayed. If the page insertion fails then |
| * rather then trying to continue the context should be killed |
| * so the no_flush version of insert_pages is used which allows |
| * us to unlock the MMU as we see fit. |
| */ |
| err = mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset, |
| &kbase_get_gpu_phy_pages(region)[pfn_offset], |
| new_pages, region->flags, |
| region->gpu_alloc->group_id, &dirty_pgds, region, |
| false); |
| if (err) { |
| kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); |
| if (region->gpu_alloc != region->cpu_alloc) |
| kbase_free_phy_pages_helper(region->cpu_alloc, new_pages); |
| kbase_gpu_vm_unlock(kctx); |
| /* The locked VA region will be unlocked and the cache |
| * invalidated in here |
| */ |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, |
| "Page table update failure", fault); |
| goto fault_done; |
| } |
| KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages); |
| if (kbase_reg_is_valid(kbdev, MMU_AS_OFFSET(as_no, FAULTEXTRA))) |
| trace_mali_mmu_page_fault_extra_grow(region, fault, new_pages); |
| else |
| trace_mali_mmu_page_fault_grow(region, fault, new_pages); |
| |
| #if MALI_INCREMENTAL_RENDERING_JM |
| /* Switch to incremental rendering if we have nearly run out of |
| * memory in a JIT memory allocation. |
| */ |
| if (region->threshold_pages && |
| kbase_reg_current_backed_size(region) > region->threshold_pages) { |
| dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu", |
| new_pages + current_backed_size, region->threshold_pages); |
| |
| if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { |
| dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region); |
| kbase_va_region_alloc_get(kctx, region); |
| } |
| } |
| #endif |
| |
| /* AS transaction begin */ |
| |
| /* clear MMU interrupt - this needs to be done after updating |
| * the page tables but before issuing a FLUSH command. The |
| * FLUSH cmd has a side effect that it restarts stalled memory |
| * transactions in other address spaces which may cause |
| * another fault to occur. If we didn't clear the interrupt at |
| * this stage a new IRQ might not be raised when the GPU finds |
| * a MMU IRQ is already pending. |
| */ |
| kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| |
| op_param.vpfn = region->start_pfn + pfn_offset; |
| op_param.nr = new_pages; |
| op_param.op = KBASE_MMU_OP_FLUSH_PT; |
| op_param.kctx_id = kctx->id; |
| op_param.mmu_sync_info = mmu_sync_info; |
| spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); |
| if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { |
| /* Unlock to invalidate the TLB (and resume the MMU) */ |
| op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); |
| err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param); |
| } else { |
| /* flush L2 and unlock the VA (resumes the MMU) */ |
| err = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); |
| } |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); |
| |
| if (err) { |
| dev_err(kbdev->dev, |
| "Flush for GPU page table update did not complete on handling page fault @ VA 0x%llx", |
| fault->addr); |
| } |
| |
| /* AS transaction end */ |
| |
| /* reenable this in the mask */ |
| kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); |
| |
| #ifdef CONFIG_MALI_CINSTR_GWT |
| if (kctx->gwt_enabled) { |
| /* GWT also tracks growable regions. */ |
| struct kbasep_gwt_list_element *pos; |
| |
| pos = kmalloc(sizeof(*pos), GFP_KERNEL); |
| if (pos) { |
| pos->region = region; |
| pos->page_addr = (region->start_pfn + pfn_offset) << PAGE_SHIFT; |
| pos->num_pages = new_pages; |
| list_add(&pos->link, &kctx->gwt_current_list); |
| } else { |
| dev_warn(kbdev->dev, "kmalloc failure"); |
| } |
| } |
| #endif |
| |
| #if MALI_JIT_PRESSURE_LIMIT_BASE |
| if (pages_trimmed) { |
| kbase_jit_done_phys_increase(kctx, pages_trimmed); |
| pages_trimmed = 0; |
| } |
| #endif |
| kbase_gpu_vm_unlock(kctx); |
| } else { |
| int ret = -ENOMEM; |
| const u8 group_id = region->gpu_alloc->group_id; |
| |
| kbase_gpu_vm_unlock(kctx); |
| |
| /* If the memory pool was insufficient then grow it and retry. |
| * Otherwise fail the allocation. |
| */ |
| if (pages_to_grow > 0) { |
| if (kbdev->pagesize_2mb && grow_2mb_pool) { |
| /* Round page requirement up to nearest 2 MB */ |
| struct kbase_mem_pool *const lp_mem_pool = |
| &kctx->mem_pools.large[group_id]; |
| |
| pages_to_grow = |
| (pages_to_grow + ((1u << lp_mem_pool->order) - 1u)) >> |
| lp_mem_pool->order; |
| |
| ret = kbase_mem_pool_grow(lp_mem_pool, pages_to_grow, kctx->task); |
| } else { |
| struct kbase_mem_pool *const mem_pool = |
| &kctx->mem_pools.small[group_id]; |
| |
| ret = kbase_mem_pool_grow(mem_pool, pages_to_grow, kctx->task); |
| } |
| } |
| if (ret < 0) { |
| /* failed to extend, handle as a normal PF */ |
| if (unlikely(ret == -EPERM)) |
| kbase_ctx_flag_set(kctx, KCTX_PAGE_FAULT_REPORT_SKIP); |
| kbase_mmu_report_fault_and_kill(kctx, faulting_as, |
| "Page allocation failure", fault); |
| } else { |
| dev_dbg(kbdev->dev, "Try again after pool_grow"); |
| goto page_fault_retry; |
| } |
| } |
| |
| fault_done: |
| #if MALI_JIT_PRESSURE_LIMIT_BASE |
| if (pages_trimmed) { |
| kbase_gpu_vm_lock(kctx); |
| kbase_jit_done_phys_increase(kctx, pages_trimmed); |
| kbase_gpu_vm_unlock(kctx); |
| } |
| #if !MALI_USE_CSF |
| rt_mutex_unlock(&kctx->jctx.lock); |
| #endif |
| #endif |
| |
| for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) |
| kfree(prealloc_sas[i]); |
| |
| /* |
| * By this point, the fault was handled in some way, |
| * so release the ctx refcount |
| */ |
| release_ctx(kbdev, kctx); |
| |
| atomic_dec(&kbdev->faults_pending); |
| dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data); |
| } |
| |
| static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) |
| { |
| u64 *page; |
| struct page *p; |
| phys_addr_t pgd; |
| |
| p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); |
| if (!p) |
| return KBASE_INVALID_PHYSICAL_ADDRESS; |
| |
| page = kbase_kmap(p); |
| |
| if (page == NULL) |
| goto alloc_free; |
| |
| pgd = page_to_phys(p); |
| |
| /* If the MMU tables belong to a context then account the memory usage |
| * to that context, otherwise the MMU tables are device wide and are |
| * only accounted to the device. |
| */ |
| if (mmut->kctx) { |
| int new_page_count; |
| |
| new_page_count = atomic_add_return(1, &mmut->kctx->used_pages); |
| KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, (u64)new_page_count); |
| kbase_process_page_usage_inc(mmut->kctx, 1); |
| } |
| |
| atomic_add(1, &kbdev->memdev.used_pages); |
| |
| kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); |
| |
| kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); |
| |
| /* As this page is newly created, therefore there is no content to |
| * clean or invalidate in the GPU caches. |
| */ |
| kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); |
| |
| kbase_kunmap(p, page); |
| return pgd; |
| |
| alloc_free: |
| kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); |
| |
| return KBASE_INVALID_PHYSICAL_ADDRESS; |
| } |
| |
| /** |
| * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1 |
| * |
| * @kbdev: Device pointer. |
| * @mmut: GPU MMU page table. |
| * @pgd: Physical addresse of level N page directory. |
| * @vpfn: The virtual page frame number, in GPU_PAGE_SIZE units. |
| * @level: The level of MMU page table (N). |
| * |
| * Return: |
| * * 0 - OK |
| * * -EFAULT - level N+1 PGD does not exist |
| * * -EINVAL - kmap() failed for level N PGD PFN |
| */ |
| static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| phys_addr_t *pgd, u64 vpfn, int level) |
| { |
| u64 *page; |
| phys_addr_t target_pgd; |
| struct page *p; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| /* |
| * Architecture spec defines level-0 as being the top-most. |
| * This is a bit unfortunate here, but we keep the same convention. |
| */ |
| vpfn >>= (3 - level) * 9; |
| vpfn &= 0x1FF; |
| |
| p = pfn_to_page(PFN_DOWN(*pgd)); |
| page = kbase_kmap(p); |
| if (page == NULL) { |
| dev_err(kbdev->dev, "%s: kmap failure", __func__); |
| return -EINVAL; |
| } |
| |
| if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { |
| dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, |
| vpfn); |
| kbase_kunmap(p, page); |
| return -EFAULT; |
| } else { |
| target_pgd = kbdev->mmu_mode->pte_to_phy_addr( |
| kbdev->mgm_dev->ops.mgm_pte_to_original_pte( |
| kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); |
| } |
| |
| kbase_kunmap(p, page); |
| *pgd = target_pgd; |
| |
| return 0; |
| } |
| |
| /** |
| * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level |
| * |
| * @kbdev: Device pointer. |
| * @mmut: GPU MMU page table. |
| * @vpfn: The virtual page frame number, in GPU_PAGE_SIZE units. |
| * @in_level: The level of MMU page table (N). |
| * @out_level: Set to the level of the lowest valid PGD found on success. |
| * Invalid on error. |
| * @out_pgd: Set to the lowest valid PGD found on success. |
| * Invalid on error. |
| * |
| * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or |
| * closest to in_level |
| * |
| * Terminology: |
| * Level-0 = Top-level = highest |
| * Level-3 = Bottom-level = lowest |
| * |
| * Return: |
| * * 0 - OK |
| * * -EINVAL - kmap() failed during page table walk. |
| */ |
| static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd) |
| { |
| phys_addr_t pgd; |
| int l; |
| int err = 0; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| pgd = mmut->pgd; |
| |
| for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) { |
| err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); |
| |
| /* Handle failure condition */ |
| if (err) { |
| dev_dbg(kbdev->dev, |
| "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", |
| __func__, l + 1); |
| break; |
| } |
| } |
| |
| *out_pgd = pgd; |
| *out_level = l; |
| |
| /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid. |
| * This implies that we have found the lowest valid pgd. Reset the error code. |
| */ |
| if (err == -EFAULT) |
| err = 0; |
| |
| return err; |
| } |
| |
| /* |
| * On success, sets out_pgd to the PGD for the specified level of translation |
| * Returns -EFAULT if a valid PGD is not found |
| */ |
| static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, |
| int level, phys_addr_t *out_pgd) |
| { |
| phys_addr_t pgd; |
| int l; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| pgd = mmut->pgd; |
| |
| for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { |
| int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); |
| /* Handle failure condition */ |
| if (err) { |
| dev_err(kbdev->dev, |
| "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", |
| __func__, l + 1); |
| return err; |
| } |
| } |
| |
| *out_pgd = pgd; |
| |
| return 0; |
| } |
| |
| static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, |
| struct kbase_mmu_table *mmut, u64 from_vpfn, |
| u64 to_vpfn, u64 *dirty_pgds, |
| struct tagged_addr *phys, bool ignore_page_migration) |
| { |
| u64 vpfn = from_vpfn; |
| struct kbase_mmu_mode const *mmu_mode; |
| |
| /* Both from_vpfn and to_vpfn are in GPU_PAGE_SIZE units */ |
| |
| /* 64-bit address range is the max */ |
| KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / GPU_PAGE_SIZE)); |
| KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn); |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| mmu_mode = kbdev->mmu_mode; |
| kbase_mmu_reset_free_pgds_list(mmut); |
| |
| while (vpfn < to_vpfn) { |
| unsigned int idx = vpfn & 0x1FF; |
| unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; |
| unsigned int pcount = 0; |
| unsigned int left = to_vpfn - vpfn; |
| int level; |
| u64 *page; |
| phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; |
| phys_addr_t pgd = mmut->pgd; |
| struct page *p = phys_to_page(pgd); |
| |
| register unsigned int num_of_valid_entries; |
| |
| if (count > left) |
| count = left; |
| |
| /* need to check if this is a 2MB page or a small page */ |
| for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { |
| idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; |
| pgds[level] = pgd; |
| page = kbase_kmap(p); |
| if (mmu_mode->ate_is_valid(page[idx], level)) |
| break; /* keep the mapping */ |
| kbase_kunmap(p, page); |
| pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( |
| kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); |
| p = phys_to_page(pgd); |
| } |
| |
| switch (level) { |
| case MIDGARD_MMU_LEVEL(2): |
| /* remap to single entry to update */ |
| pcount = 1; |
| break; |
| case MIDGARD_MMU_BOTTOMLEVEL: |
| /* page count is the same as the logical count */ |
| pcount = count; |
| break; |
| default: |
| dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level); |
| goto next; |
| } |
| |
| if (dirty_pgds && pcount > 0) |
| *dirty_pgds |= 1ULL << level; |
| |
| num_of_valid_entries = mmu_mode->get_num_valid_entries(page); |
| if (WARN_ON_ONCE(num_of_valid_entries < pcount)) |
| num_of_valid_entries = 0; |
| else |
| num_of_valid_entries -= pcount; |
| |
| /* Invalidate the entries we added */ |
| mmu_mode->entries_invalidate(&page[idx], pcount); |
| |
| if (!num_of_valid_entries) { |
| kbase_kunmap(p, page); |
| |
| kbase_mmu_add_to_free_pgds_list(mmut, p); |
| |
| kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, |
| KBASE_MMU_OP_NONE, dirty_pgds); |
| vpfn += count; |
| continue; |
| } |
| |
| mmu_mode->set_num_valid_entries(page, num_of_valid_entries); |
| |
| /* MMU cache flush strategy is NONE because GPU cache maintenance is |
| * going to be done by the caller |
| */ |
| kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), |
| kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, |
| KBASE_MMU_OP_NONE); |
| kbase_kunmap(p, page); |
| next: |
| vpfn += count; |
| } |
| |
| /* If page migration is enabled: the only way to recover from failure |
| * is to mark all pages as not movable. It is not predictable what's |
| * going to happen to these pages at this stage. They might return |
| * movable once they are returned to a memory pool. |
| */ |
| if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) { |
| const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE; |
| u64 i; |
| |
| for (i = 0; i < num_pages; i++) { |
| struct page *phys_page = as_page(phys[i]); |
| struct kbase_page_metadata *page_md = kbase_page_private(phys_page); |
| |
| if (page_md) { |
| spin_lock(&page_md->migrate_lock); |
| page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); |
| spin_unlock(&page_md->migrate_lock); |
| } |
| } |
| } |
| } |
| |
| static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, |
| struct kbase_mmu_table *mmut, const u64 vpfn, |
| size_t nr, u64 dirty_pgds, |
| enum kbase_caller_mmu_sync_info mmu_sync_info, |
| bool insert_pages_failed) |
| { |
| struct kbase_mmu_hw_op_param op_param; |
| int as_nr = 0; |
| |
| op_param.vpfn = vpfn; |
| op_param.nr = nr; |
| op_param.op = KBASE_MMU_OP_FLUSH_PT; |
| op_param.mmu_sync_info = mmu_sync_info; |
| op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; |
| op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); |
| |
| #if MALI_USE_CSF |
| as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; |
| #else |
| WARN_ON(!mmut->kctx); |
| #endif |
| |
| /* MMU cache flush strategy depends on whether GPU control commands for |
| * flushing physical address ranges are supported. The new physical pages |
| * are not present in GPU caches therefore they don't need any cache |
| * maintenance, but PGDs in the page table may or may not be created anew. |
| * |
| * Operations that affect the whole GPU cache shall only be done if it's |
| * impossible to update physical ranges. |
| * |
| * On GPUs where flushing by physical address range is supported, |
| * full cache flush is done when an error occurs during |
| * insert_pages() to keep the error handling simpler. |
| */ |
| if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed) |
| mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); |
| else |
| mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); |
| } |
| |
| /** |
| * update_parent_pgds() - Updates the page table from bottom level towards |
| * the top level to insert a new ATE |
| * |
| * @kbdev: Device pointer. |
| * @mmut: GPU MMU page table. |
| * @cur_level: The level of MMU page table where the ATE needs to be added. |
| * The bottom PGD level. |
| * @insert_level: The level of MMU page table where the chain of newly allocated |
| * PGDs needs to be linked-in/inserted. |
| * @insert_vpfn: The virtual page frame number, in GPU_PAGE_SIZE units, for the ATE. |
| * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains |
| * the physical addresses of newly allocated PGDs from index |
| * insert_level+1 to cur_level, and an existing PGD at index |
| * insert_level. |
| * |
| * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD |
| * at insert_level which already exists in the MMU Page Tables. Migration status is also |
| * updated for all the newly allocated PGD pages. |
| * |
| * Return: |
| * * 0 - OK |
| * * -EFAULT - level N+1 PGD does not exist |
| * * -EINVAL - kmap() failed for level N PGD PFN |
| */ |
| static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| int cur_level, int insert_level, u64 insert_vpfn, |
| phys_addr_t *pgds_to_insert) |
| { |
| int pgd_index; |
| int err = 0; |
| |
| /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1) |
| * Loop runs from the bottom-most to the top-most level so that all entries in the chain |
| * are valid when they are inserted into the MMU Page table via the insert_level PGD. |
| */ |
| for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) { |
| int parent_index = pgd_index - 1; |
| phys_addr_t parent_pgd = pgds_to_insert[parent_index]; |
| unsigned int current_valid_entries; |
| u64 pte; |
| phys_addr_t target_pgd = pgds_to_insert[pgd_index]; |
| u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF; |
| struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd)); |
| u64 *parent_page_va; |
| |
| if (WARN_ON_ONCE(target_pgd == KBASE_INVALID_PHYSICAL_ADDRESS)) { |
| err = -EFAULT; |
| goto failure_recovery; |
| } |
| |
| parent_page_va = kbase_kmap(parent_page); |
| |
| if (unlikely(parent_page_va == NULL)) { |
| dev_err(kbdev->dev, "%s: kmap failure", __func__); |
| err = -EINVAL; |
| goto failure_recovery; |
| } |
| |
| current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va); |
| |
| kbdev->mmu_mode->entry_set_pte(&pte, target_pgd); |
| parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( |
| kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); |
| kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); |
| kbase_kunmap(parent_page, parent_page_va); |
| |
| if (parent_index != insert_level) { |
| /* Newly allocated PGDs */ |
| kbase_mmu_sync_pgd_cpu( |
| kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), |
| sizeof(u64)); |
| } else { |
| /* A new valid entry is added to an existing PGD. Perform the |
| * invalidate operation for GPU cache as it could be having a |
| * cacheline that contains the entry (in an invalid form). |
| */ |
| kbase_mmu_sync_pgd( |
| kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)), |
| kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), |
| sizeof(u64), KBASE_MMU_OP_FLUSH_PT); |
| } |
| |
| /* Update the new target_pgd page to its stable state */ |
| if (kbase_is_page_migration_enabled()) { |
| struct kbase_page_metadata *page_md = |
| kbase_page_private(phys_to_page(target_pgd)); |
| |
| spin_lock(&page_md->migrate_lock); |
| |
| WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || |
| IS_PAGE_ISOLATED(page_md->status)); |
| |
| if (mmut->kctx) { |
| page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); |
| page_md->data.pt_mapped.mmut = mmut; |
| page_md->data.pt_mapped.pgd_vpfn_level = |
| PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index); |
| } else { |
| page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); |
| } |
| |
| spin_unlock(&page_md->migrate_lock); |
| } |
| } |
| |
| return 0; |
| |
| failure_recovery: |
| /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */ |
| for (; pgd_index < cur_level; pgd_index++) { |
| phys_addr_t pgd = pgds_to_insert[pgd_index]; |
| struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); |
| u64 *pgd_page_va = kbase_kmap(pgd_page); |
| u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; |
| |
| kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); |
| kbase_kunmap(pgd_page, pgd_page_va); |
| } |
| |
| return err; |
| } |
| |
| /** |
| * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to |
| * level_high (inclusive) |
| * |
| * @kbdev: Device pointer. |
| * @mmut: GPU MMU page table. |
| * @level_low: The lower bound for the levels for which the PGD allocs are required |
| * @level_high: The higher bound for the levels for which the PGD allocs are required |
| * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the |
| * newly allocated PGD addresses to. |
| * |
| * Numerically, level_low < level_high, not to be confused with top level and |
| * bottom level concepts for MMU PGDs. They are only used as low and high bounds |
| * in an incrementing for-loop. |
| * |
| * Return: |
| * * 0 - OK |
| * * -ENOMEM - allocation failed for a PGD. |
| */ |
| static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| phys_addr_t *new_pgds, int level_low, int level_high) |
| { |
| int err = 0; |
| int i; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| for (i = level_low; i <= level_high; i++) { |
| do { |
| new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut); |
| if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS) |
| break; |
| |
| rt_mutex_unlock(&mmut->mmu_lock); |
| err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id], |
| (size_t)level_high, NULL); |
| rt_mutex_lock(&mmut->mmu_lock); |
| if (err) { |
| dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d", |
| __func__, err); |
| |
| /* Free all PGDs allocated in previous successful iterations |
| * from (i-1) to level_low |
| */ |
| for (i = (i - 1); i >= level_low; i--) { |
| if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS) |
| kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]); |
| } |
| |
| return err; |
| } |
| } while (1); |
| } |
| |
| return 0; |
| } |
| |
| static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, |
| struct tagged_addr phys, size_t nr, unsigned long flags, |
| int const group_id, |
| enum kbase_caller_mmu_sync_info mmu_sync_info, |
| bool ignore_page_migration) |
| { |
| phys_addr_t pgd; |
| u64 *pgd_page; |
| u64 insert_vpfn = start_vpfn; |
| size_t remain = nr; |
| int err; |
| struct kbase_device *kbdev; |
| u64 dirty_pgds = 0; |
| unsigned int i; |
| phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; |
| enum kbase_mmu_op_type flush_op; |
| struct kbase_mmu_table *mmut = &kctx->mmu; |
| int l, cur_level, insert_level; |
| const phys_addr_t base_phys_address = as_phys_addr_t(phys); |
| |
| if (WARN_ON(kctx == NULL)) |
| return -EINVAL; |
| |
| /* 64-bit address range is the max */ |
| KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); |
| |
| kbdev = kctx->kbdev; |
| |
| /* Early out if there is nothing to do */ |
| if (nr == 0) |
| return 0; |
| |
| /* Convert to GPU_PAGE_SIZE units. */ |
| insert_vpfn *= GPU_PAGES_PER_CPU_PAGE; |
| remain *= GPU_PAGES_PER_CPU_PAGE; |
| |
| /* If page migration is enabled, pages involved in multiple GPU mappings |
| * are always treated as not movable. |
| */ |
| if (kbase_is_page_migration_enabled() && !ignore_page_migration) { |
| struct page *phys_page = as_page(phys); |
| struct kbase_page_metadata *page_md = kbase_page_private(phys_page); |
| |
| if (page_md) { |
| spin_lock(&page_md->migrate_lock); |
| page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); |
| spin_unlock(&page_md->migrate_lock); |
| } |
| } |
| |
| rt_mutex_lock(&mmut->mmu_lock); |
| |
| while (remain) { |
| unsigned int vindex = insert_vpfn & 0x1FF; |
| unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; |
| struct page *p; |
| register unsigned int num_of_valid_entries; |
| bool newly_created_pgd = false; |
| |
| if (count > remain) |
| count = remain; |
| |
| cur_level = MIDGARD_MMU_BOTTOMLEVEL; |
| insert_level = cur_level; |
| |
| /* |
| * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly |
| * suboptimal. We don't have to re-parse the whole tree |
| * each time (just cache the l0-l2 sequence). |
| * On the other hand, it's only a gain when we map more than |
| * 256 pages at once (on average). Do we really care? |
| */ |
| /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ |
| err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, |
| &pgd); |
| |
| if (err) { |
| dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", |
| __func__, err); |
| goto fail_unlock; |
| } |
| |
| /* No valid pgd at cur_level */ |
| if (insert_level != cur_level) { |
| /* Allocate new pgds for all missing levels from the required level |
| * down to the lowest valid pgd at insert_level |
| */ |
| err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), |
| cur_level); |
| if (err) |
| goto fail_unlock; |
| |
| newly_created_pgd = true; |
| |
| new_pgds[insert_level] = pgd; |
| |
| /* If we didn't find an existing valid pgd at cur_level, |
| * we've now allocated one. The ATE in the next step should |
| * be inserted in this newly allocated pgd. |
| */ |
| pgd = new_pgds[cur_level]; |
| } |
| |
| p = pfn_to_page(PFN_DOWN(pgd)); |
| |
| pgd_page = kbase_kmap(p); |
| if (!pgd_page) { |
| dev_err(kbdev->dev, "%s: kmap failure", __func__); |
| err = -ENOMEM; |
| |
| goto fail_unlock_free_pgds; |
| } |
| |
| num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); |
| |
| for (i = 0; i < count; i += GPU_PAGES_PER_CPU_PAGE) { |
| unsigned int j; |
| |
| for (j = 0; j < GPU_PAGES_PER_CPU_PAGE; j++) { |
| unsigned int ofs = vindex + i + j; |
| phys_addr_t page_address = base_phys_address + (j * GPU_PAGE_SIZE); |
| |
| /* Fail if the current page is a valid ATE entry */ |
| WARN_ON_ONCE((pgd_page[ofs] & 1UL)); |
| pgd_page[ofs] = kbase_mmu_create_ate(kbdev, as_tagged(page_address), |
| flags, MIDGARD_MMU_BOTTOMLEVEL, |
| group_id); |
| } |
| } |
| |
| kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries + count); |
| |
| dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL); |
| |
| /* MMU cache flush operation here will depend on whether bottom level |
| * PGD is newly created or not. |
| * |
| * If bottom level PGD is newly created then no GPU cache maintenance is |
| * required as the PGD will not exist in GPU cache. Otherwise GPU cache |
| * maintenance is required for existing PGD. |
| */ |
| flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; |
| |
| kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)), |
| kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), |
| flush_op); |
| |
| if (newly_created_pgd) { |
| err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, |
| new_pgds); |
| if (err) { |
| dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", |
| __func__, err); |
| |
| kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); |
| |
| kbase_kunmap(p, pgd_page); |
| goto fail_unlock_free_pgds; |
| } |
| } |
| |
| insert_vpfn += count; |
| remain -= count; |
| kbase_kunmap(p, pgd_page); |
| } |
| |
| rt_mutex_unlock(&mmut->mmu_lock); |
| |
| mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, |
| false); |
| |
| return 0; |
| |
| fail_unlock_free_pgds: |
| /* Free the pgds allocated by us from insert_level+1 to bottom level */ |
| for (l = cur_level; l > insert_level; l--) |
| kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); |
| |
| fail_unlock: |
| if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) { |
| /* Invalidate the pages we have partially completed */ |
| mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE, |
| insert_vpfn, &dirty_pgds, NULL, true); |
| } |
| |
| mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, |
| true); |
| kbase_mmu_free_pgds_list(kbdev, mmut); |
| rt_mutex_unlock(&mmut->mmu_lock); |
| |
| return err; |
| } |
| |
| int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, |
| struct tagged_addr phys, size_t nr, unsigned long flags, |
| int const group_id, |
| enum kbase_caller_mmu_sync_info mmu_sync_info) |
| { |
| /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ |
| return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, |
| false); |
| } |
| |
| int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, |
| struct tagged_addr phys, size_t nr, unsigned long flags, |
| int const group_id, |
| enum kbase_caller_mmu_sync_info mmu_sync_info) |
| { |
| /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ |
| return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, |
| false); |
| } |
| |
| static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, |
| struct kbase_va_region *reg, |
| struct kbase_mmu_table *mmut, const u64 vpfn) |
| { |
| struct page *phys_page = as_page(phys); |
| struct kbase_page_metadata *page_md = kbase_page_private(phys_page); |
| |
| if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) |
| return; |
| |
| spin_lock(&page_md->migrate_lock); |
| |
| /* If no GPU va region is given: the metadata provided are |
| * invalid. |
| * |
| * If the page is already allocated and mapped: this is |
| * an additional GPU mapping, probably to create a memory |
| * alias, which means it is no longer possible to migrate |
| * the page easily because tracking all the GPU mappings |
| * would be too costly. |
| * |
| * In any case: the page becomes not movable. It is kept |
| * alive, but attempts to migrate it will fail. The page |
| * will be freed if it is still not movable when it returns |
| * to a memory pool. Notice that the movable flag is not |
| * cleared because that would require taking the page lock. |
| */ |
| if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) { |
| page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); |
| } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) { |
| page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED); |
| page_md->data.mapped.reg = reg; |
| page_md->data.mapped.mmut = mmut; |
| page_md->data.mapped.vpfn = vpfn; |
| } |
| |
| spin_unlock(&page_md->migrate_lock); |
| } |
| |
| static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, |
| struct tagged_addr *phys, size_t requested_nr) |
| { |
| size_t i; |
| |
| if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) |
| return; |
| |
| for (i = 0; i < requested_nr; i++) { |
| struct page *phys_page = as_page(phys[i]); |
| struct kbase_page_metadata *page_md = kbase_page_private(phys_page); |
| |
| /* Skip the small page that is part of a large page, as the large page is |
| * excluded from the migration process. |
| */ |
| if (is_huge(phys[i]) || is_partial(phys[i])) |
| continue; |
| |
| if (page_md) { |
| u8 status; |
| |
| spin_lock(&page_md->migrate_lock); |
| status = PAGE_STATUS_GET(page_md->status); |
| |
| if (status == ALLOCATED_MAPPED) { |
| if (IS_PAGE_ISOLATED(page_md->status)) { |
| page_md->status = PAGE_STATUS_SET( |
| page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS); |
| page_md->data.free_isolated.kbdev = kbdev; |
| /* At this point, we still have a reference |
| * to the page via its page migration metadata, |
| * and any page with the FREE_ISOLATED_IN_PROGRESS |
| * status will subsequently be freed in either |
| * kbase_page_migrate() or kbase_page_putback() |
| */ |
| phys[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS); |
| } else |
| page_md->status = PAGE_STATUS_SET(page_md->status, |
| (u8)FREE_IN_PROGRESS); |
| } |
| |
| spin_unlock(&page_md->migrate_lock); |
| } |
| } |
| } |
| |
| u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy, |
| unsigned long const flags, int const level, int const group_id) |
| { |
| u64 entry; |
| |
| kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level); |
| return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, (unsigned int)group_id, level, |
| entry); |
| } |
| |
| static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| u64 start_vpfn, struct tagged_addr *phys, size_t nr, |
| unsigned long flags, int const group_id, u64 *dirty_pgds, |
| struct kbase_va_region *reg, bool ignore_page_migration) |
| { |
| phys_addr_t pgd; |
| u64 *pgd_page; |
| u64 insert_vpfn = start_vpfn; |
| size_t remain = nr; |
| int err; |
| struct kbase_mmu_mode const *mmu_mode; |
| unsigned int i; |
| phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; |
| int l, cur_level, insert_level; |
| struct tagged_addr *start_phys = phys; |
| |
| /* Note that 0 is a valid start_vpfn */ |
| /* 64-bit address range is the max */ |
| KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); |
| |
| mmu_mode = kbdev->mmu_mode; |
| |
| /* Early out if there is nothing to do */ |
| if (nr == 0) |
| return 0; |
| |
| /* Convert to GPU_PAGE_SIZE units. */ |
| insert_vpfn *= GPU_PAGES_PER_CPU_PAGE; |
| remain *= GPU_PAGES_PER_CPU_PAGE; |
| rt_mutex_lock(&mmut->mmu_lock); |
| |
| while (remain) { |
| unsigned int vindex = insert_vpfn & 0x1FF; |
| unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; |
| struct page *p; |
| register unsigned int num_of_valid_entries; |
| bool newly_created_pgd = false; |
| enum kbase_mmu_op_type flush_op; |
| |
| if (count > remain) |
| count = remain; |
| |
| if (!vindex && is_huge_head(*phys)) |
| cur_level = MIDGARD_MMU_LEVEL(2); |
| else |
| cur_level = MIDGARD_MMU_BOTTOMLEVEL; |
| |
| insert_level = cur_level; |
| |
| /* |
| * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly |
| * suboptimal. We don't have to re-parse the whole tree |
| * each time (just cache the l0-l2 sequence). |
| * On the other hand, it's only a gain when we map more than |
| * 256 pages at once (on average). Do we really care? |
| */ |
| /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ |
| err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, |
| &pgd); |
| |
| if (err) { |
| dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", |
| __func__, err); |
| goto fail_unlock; |
| } |
| |
| /* No valid pgd at cur_level */ |
| if (insert_level != cur_level) { |
| /* Allocate new pgds for all missing levels from the required level |
| * down to the lowest valid pgd at insert_level |
| */ |
| err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), |
| cur_level); |
| if (err) |
| goto fail_unlock; |
| |
| newly_created_pgd = true; |
| |
| new_pgds[insert_level] = pgd; |
| |
| /* If we didn't find an existing valid pgd at cur_level, |
| * we've now allocated one. The ATE in the next step should |
| * be inserted in this newly allocated pgd. |
| */ |
| pgd = new_pgds[cur_level]; |
| } |
| |
| p = pfn_to_page(PFN_DOWN(pgd)); |
| pgd_page = kbase_kmap(p); |
| |
| if (!pgd_page) { |
| dev_err(kbdev->dev, "%s: kmap failure", __func__); |
| err = -ENOMEM; |
| |
| goto fail_unlock_free_pgds; |
| } |
| |
| num_of_valid_entries = mmu_mode->get_num_valid_entries(pgd_page); |
| |
| if (cur_level == MIDGARD_MMU_LEVEL(2)) { |
| int level_index = (insert_vpfn >> 9) & 0x1FF; |
| pgd_page[level_index] = |
| kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id); |
| |
| num_of_valid_entries++; |
| } else { |
| for (i = 0; i < count; i += GPU_PAGES_PER_CPU_PAGE) { |
| struct tagged_addr base_tagged_addr = |
| phys[i / GPU_PAGES_PER_CPU_PAGE]; |
| phys_addr_t base_phys_address = as_phys_addr_t(base_tagged_addr); |
| unsigned int j; |
| |
| for (j = 0; j < GPU_PAGES_PER_CPU_PAGE; j++) { |
| unsigned int ofs = vindex + i + j; |
| u64 *target = &pgd_page[ofs]; |
| phys_addr_t page_address = |
| base_phys_address + (j * GPU_PAGE_SIZE); |
| |
| /* Warn if the current page is a valid ATE |
| * entry. The page table shouldn't have anything |
| * in the place where we are trying to put a |
| * new entry. Modification to page table entries |
| * should be performed with |
| * kbase_mmu_update_pages() |
| */ |
| WARN_ON_ONCE((*target & 1UL) != 0); |
| |
| *target = kbase_mmu_create_ate(kbdev, |
| as_tagged(page_address), |
| flags, cur_level, group_id); |
| } |
| |
| /* If page migration is enabled, this is the right time |
| * to update the status of the page. |
| */ |
| if (kbase_is_page_migration_enabled() && !ignore_page_migration && |
| !is_huge(base_tagged_addr) && !is_partial(base_tagged_addr)) |
| kbase_mmu_progress_migration_on_insert( |
| base_tagged_addr, reg, mmut, insert_vpfn + i); |
| } |
| num_of_valid_entries += count; |
| } |
| |
| mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); |
| |
| if (dirty_pgds) |
| *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level); |
| |
| /* MMU cache flush operation here will depend on whether bottom level |
| * PGD is newly created or not. |
| * |
| * If bottom level PGD is newly created then no GPU cache maintenance is |
| * required as the PGD will not exist in GPU cache. Otherwise GPU cache |
| * maintenance is required for existing PGD. |
| */ |
| flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; |
| |
| kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), |
| kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), |
| flush_op); |
| |
| if (newly_created_pgd) { |
| err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, |
| new_pgds); |
| if (err) { |
| dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", |
| __func__, err); |
| |
| kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); |
| |
| kbase_kunmap(p, pgd_page); |
| goto fail_unlock_free_pgds; |
| } |
| } |
| |
| phys += (count / GPU_PAGES_PER_CPU_PAGE); |
| insert_vpfn += count; |
| remain -= count; |
| kbase_kunmap(p, pgd_page); |
| } |
| |
| rt_mutex_unlock(&mmut->mmu_lock); |
| |
| return 0; |
| |
| fail_unlock_free_pgds: |
| /* Free the pgds allocated by us from insert_level+1 to bottom level */ |
| for (l = cur_level; l > insert_level; l--) |
| kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); |
| |
| fail_unlock: |
| if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) { |
| /* Invalidate the pages we have partially completed */ |
| mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE, |
| insert_vpfn, dirty_pgds, start_phys, |
| ignore_page_migration); |
| } |
| |
| mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, |
| dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true); |
| kbase_mmu_free_pgds_list(kbdev, mmut); |
| rt_mutex_unlock(&mmut->mmu_lock); |
| |
| return err; |
| } |
| |
| int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| const u64 start_vpfn, struct tagged_addr *phys, size_t nr, |
| unsigned long flags, int const group_id, u64 *dirty_pgds, |
| struct kbase_va_region *reg) |
| { |
| int err; |
| |
| /* Early out if there is nothing to do */ |
| if (nr == 0) |
| return 0; |
| |
| err = mmu_insert_pages_no_flush(kbdev, mmut, start_vpfn, phys, nr, flags, group_id, |
| dirty_pgds, reg, false); |
| |
| return err; |
| } |
| |
| /* |
| * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space |
| * number 'as_nr'. |
| */ |
| int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, |
| struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, |
| int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, |
| struct kbase_va_region *reg) |
| { |
| int err; |
| u64 dirty_pgds = 0; |
| |
| CSTD_UNUSED(as_nr); |
| |
| /* Early out if there is nothing to do */ |
| if (nr == 0) |
| return 0; |
| |
| err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, |
| reg, false); |
| if (err) |
| return err; |
| |
| mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); |
| |
| return 0; |
| } |
| |
| KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); |
| |
| int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, |
| struct kbase_mmu_table *mmut, u64 vpfn, |
| struct tagged_addr *phys, size_t nr, |
| unsigned long flags, int as_nr, int const group_id, |
| enum kbase_caller_mmu_sync_info mmu_sync_info, |
| struct kbase_va_region *reg) |
| { |
| int err; |
| u64 dirty_pgds = 0; |
| |
| CSTD_UNUSED(as_nr); |
| |
| /* Early out if there is nothing to do */ |
| if (nr == 0) |
| return 0; |
| |
| /* Imported allocations don't have metadata and therefore always ignore the |
| * page migration logic. |
| */ |
| err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, |
| reg, true); |
| if (err) |
| return err; |
| |
| mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); |
| |
| return 0; |
| } |
| |
| int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| u64 vpfn, struct tagged_addr *phys, size_t nr, |
| unsigned long flags, int as_nr, int const group_id, |
| enum kbase_caller_mmu_sync_info mmu_sync_info, |
| struct kbase_va_region *reg) |
| { |
| int err; |
| u64 dirty_pgds = 0; |
| |
| CSTD_UNUSED(as_nr); |
| |
| /* Early out if there is nothing to do */ |
| if (nr == 0) |
| return 0; |
| |
| /* Memory aliases are always built on top of existing allocations, |
| * therefore the state of physical pages shall be updated. |
| */ |
| err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, |
| reg, false); |
| if (err) |
| return err; |
| |
| mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); |
| |
| return 0; |
| } |
| |
| #if !MALI_USE_CSF |
| /** |
| * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches |
| * without retaining the kbase context. |
| * @kctx: The KBase context. |
| * @vpfn: The virtual page frame number to start the flush on. |
| * @nr: The number of pages to flush. |
| * |
| * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any |
| * other locking. |
| */ |
| static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr) |
| { |
| struct kbase_device *kbdev = kctx->kbdev; |
| int err; |
| /* Calls to this function are inherently asynchronous, with respect to |
| * MMU operations. |
| */ |
| const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; |
| struct kbase_mmu_hw_op_param op_param; |
| |
| lockdep_assert_held(&kctx->kbdev->hwaccess_lock); |
| lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); |
| |
| /* Early out if there is nothing to do */ |
| if (nr == 0) |
| return; |
| |
| /* flush L2 and unlock the VA (resumes the MMU) */ |
| op_param.vpfn = vpfn; |
| op_param.nr = nr; |
| op_param.op = KBASE_MMU_OP_FLUSH_MEM; |
| op_param.kctx_id = kctx->id; |
| op_param.mmu_sync_info = mmu_sync_info; |
| if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { |
| /* Value used to prevent skipping of any levels when flushing */ |
| op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); |
| err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr], |
| &op_param); |
| } else { |
| err = kbase_mmu_hw_do_flush(kbdev, &kbdev->as[kctx->as_nr], |
| &op_param); |
| } |
| |
| if (err) { |
| /* Flush failed to complete, assume the |
| * GPU has hung and perform a reset to recover |
| */ |
| dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); |
| |
| if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu_locked(kbdev); |
| } |
| } |
| #endif |
| |
| void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) |
| { |
| lockdep_assert_held(&kbdev->hwaccess_lock); |
| lockdep_assert_held(&kbdev->mmu_hw_mutex); |
| KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); |
| |
| kbdev->mmu_mode->update(kbdev, mmut, as_nr); |
| } |
| KBASE_EXPORT_TEST_API(kbase_mmu_update); |
| |
| void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) |
| { |
| lockdep_assert_held(&kbdev->hwaccess_lock); |
| #if !MALI_USE_CSF |
| lockdep_assert_held(&kbdev->mmu_hw_mutex); |
| #endif |
| |
| kbdev->mmu_mode->disable_as(kbdev, as_nr); |
| } |
| |
| #if MALI_USE_CSF |
| void kbase_mmu_disable(struct kbase_context *kctx) |
| { |
| /* Calls to this function are inherently asynchronous, with respect to |
| * MMU operations. |
| */ |
| const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_mmu_hw_op_param op_param = { 0 }; |
| int lock_err, flush_err; |
| |
| /* ASSERT that the context has a valid as_nr, which is only the case |
| * when it's scheduled in. |
| * |
| * as_nr won't change because the caller has the hwaccess_lock |
| */ |
| KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); |
| |
| lockdep_assert_held(&kctx->kbdev->hwaccess_lock); |
| |
| op_param.vpfn = 0; |
| op_param.nr = ~0U; |
| op_param.op = KBASE_MMU_OP_FLUSH_MEM; |
| op_param.kctx_id = kctx->id; |
| op_param.mmu_sync_info = mmu_sync_info; |
| |
| #if MALI_USE_CSF |
| /* 0xF value used to prevent skipping of any levels when flushing */ |
| if (mmu_flush_cache_on_gpu_ctrl(kbdev)) |
| op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); |
| #endif |
| /* lock MMU to prevent existing jobs on GPU from executing while the AS is |
| * not yet disabled |
| */ |
| lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param); |
| if (lock_err) |
| dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, |
| kctx->id); |
| |
| /* Issue the flush command only when L2 cache is in stable power on state. |
| * Any other state for L2 cache implies that shader cores are powered off, |
| * which in turn implies there is no execution happening on the GPU. |
| */ |
| if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { |
| flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev, |
| GPU_COMMAND_CACHE_CLN_INV_L2_LSC); |
| if (flush_err) |
| dev_err(kbdev->dev, |
| "Failed to flush GPU cache when disabling AS %d for ctx %d_%d", |
| kctx->as_nr, kctx->tgid, kctx->id); |
| } |
| kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); |
| |
| if (!lock_err) { |
| /* unlock the MMU to allow it to resume */ |
| lock_err = |
| kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param); |
| if (lock_err) |
| dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, |
| kctx->tgid, kctx->id); |
| } |
| |
| #if !MALI_USE_CSF |
| /* |
| * JM GPUs has some L1 read only caches that need to be invalidated |
| * with START_FLUSH configuration. Purge the MMU disabled kctx from |
| * the slot_rb tracking field so such invalidation is performed when |
| * a new katom is executed on the affected slots. |
| */ |
| kbase_backend_slot_kctx_purge_locked(kbdev, kctx); |
| #endif |
| |
| /* kbase_gpu_cache_flush_and_busy_wait() will reset the GPU on timeout. Only |
| * reset the GPU if locking or unlocking fails. |
| */ |
| if (lock_err) |
| if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu_locked(kbdev); |
| } |
| #else |
| void kbase_mmu_disable(struct kbase_context *kctx) |
| { |
| /* ASSERT that the context has a valid as_nr, which is only the case |
| * when it's scheduled in. |
| * |
| * as_nr won't change because the caller has the hwaccess_lock |
| */ |
| KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); |
| |
| lockdep_assert_held(&kctx->kbdev->hwaccess_lock); |
| lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); |
| |
| /* |
| * The address space is being disabled, drain all knowledge of it out |
| * from the caches as pages and page tables might be freed after this. |
| * |
| * The job scheduler code will already be holding the locks and context |
| * so just do the flush. |
| */ |
| kbase_mmu_flush_noretain(kctx, 0, ~0); |
| |
| kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); |
| #if !MALI_USE_CSF |
| /* |
| * JM GPUs has some L1 read only caches that need to be invalidated |
| * with START_FLUSH configuration. Purge the MMU disabled kctx from |
| * the slot_rb tracking field so such invalidation is performed when |
| * a new katom is executed on the affected slots. |
| */ |
| kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); |
| #endif |
| } |
| #endif |
| KBASE_EXPORT_TEST_API(kbase_mmu_disable); |
| |
| static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, |
| struct kbase_mmu_table *mmut, phys_addr_t *pgds, |
| u64 vpfn, int level, |
| enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) |
| { |
| int current_level; |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| |
| for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); current_level--) { |
| phys_addr_t current_pgd = pgds[current_level]; |
| struct page *p = phys_to_page(current_pgd); |
| |
| u64 *current_page = kbase_kmap(p); |
| unsigned int current_valid_entries = |
| kbdev->mmu_mode->get_num_valid_entries(current_page); |
| unsigned int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FFU; |
| |
| /* We need to track every level that needs updating */ |
| if (dirty_pgds) |
| *dirty_pgds |= 1ULL << current_level; |
| |
| kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); |
| if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { |
| kbase_kunmap(p, current_page); |
| |
| /* Ensure the cacheline containing the last valid entry |
| * of PGD is invalidated from the GPU cache, before the |
| * PGD page is freed. |
| */ |
| kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, |
| current_pgd + (index * sizeof(u64)), sizeof(u64), |
| flush_op); |
| |
| kbase_mmu_add_to_free_pgds_list(mmut, p); |
| } else { |
| current_valid_entries--; |
| |
| kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries); |
| |
| kbase_kunmap(p, current_page); |
| |
| kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), |
| kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), |
| flush_op); |
| break; |
| } |
| } |
| } |
| |
| /** |
| * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. |
| * |
| * @kbdev: Pointer to kbase device. |
| * @kctx: Pointer to kbase context. |
| * @as_nr: Address space number, for GPU cache maintenance operations |
| * that happen outside a specific kbase context. |
| * @phys: Array of physical pages to flush. |
| * @phys_page_nr: Number of physical pages to flush. |
| * @op_param: Non-NULL pointer to struct containing information about the flush |
| * operation to perform. |
| * |
| * This function will do one of three things: |
| * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the |
| * individual pages that were unmapped if feature is supported on GPU. |
| * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is |
| * supported on GPU or, |
| * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. |
| * |
| * When performing a partial GPU cache flush, the number of physical |
| * pages does not have to be identical to the number of virtual pages on the MMU, |
| * to support a single physical address flush for an aliased page. |
| */ |
| static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, |
| struct kbase_context *kctx, int as_nr, |
| struct tagged_addr *phys, size_t phys_page_nr, |
| struct kbase_mmu_hw_op_param *op_param) |
| { |
| if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { |
| /* Full cache flush through the MMU_COMMAND */ |
| mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); |
| } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { |
| /* Full cache flush through the GPU_CONTROL */ |
| mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param); |
| } |
| #if MALI_USE_CSF |
| else { |
| /* Partial GPU cache flush with MMU cache invalidation */ |
| unsigned long irq_flags; |
| unsigned int i; |
| bool flush_done = false; |
| |
| mmu_invalidate(kbdev, kctx, as_nr, op_param); |
| |
| for (i = 0; !flush_done && i < phys_page_nr; i++) { |
| spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); |
| if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) |
| mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, |
| KBASE_MMU_OP_FLUSH_MEM); |
| else |
| flush_done = true; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); |
| } |
| } |
| #else |
| CSTD_UNUSED(phys); |
| CSTD_UNUSED(phys_page_nr); |
| #endif |
| } |
| |
| static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| u64 vpfn, size_t nr, u64 *dirty_pgds, |
| struct list_head *free_pgds_list, |
| enum kbase_mmu_op_type flush_op) |
| { |
| struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; |
| |
| CSTD_UNUSED(free_pgds_list); |
| |
| lockdep_assert_held(&mmut->mmu_lock); |
| kbase_mmu_reset_free_pgds_list(mmut); |
| /* Convert to GPU_PAGE_SIZE units. */ |
| vpfn *= GPU_PAGES_PER_CPU_PAGE; |
| nr *= GPU_PAGES_PER_CPU_PAGE; |
| |
| while (nr) { |
| unsigned int index = vpfn & 0x1FF; |
| unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; |
| unsigned int pcount; |
| int level; |
| u64 *page; |
| phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; |
| register unsigned int num_of_valid_entries; |
| phys_addr_t pgd = mmut->pgd; |
| struct page *p = phys_to_page(pgd); |
| |
| count = MIN(nr, count); |
| |
| /* need to check if this is a 2MB page or a small page */ |
| for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { |
| phys_addr_t next_pgd; |
| |
| index = (vpfn >> ((3 - level) * 9)) & 0x1FF; |
| page = kbase_kmap(p); |
| if (mmu_mode->ate_is_valid(page[index], level)) |
| break; /* keep the mapping */ |
| else if (!mmu_mode->pte_is_valid(page[index], level)) { |
| dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx", |
| level, vpfn << PAGE_SHIFT); |
| /* nothing here, advance to the next PTE of the current level */ |
| count = (1 << ((3 - level) * 9)); |
| count -= (vpfn & (count - 1)); |
| count = MIN(nr, count); |
| goto next; |
| } |
| next_pgd = mmu_mode->pte_to_phy_addr( |
| kbdev->mgm_dev->ops.mgm_pte_to_original_pte( |
| kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); |
| kbase_kunmap(p, page); |
| pgds[level] = pgd; |
| pgd = next_pgd; |
| p = phys_to_page(pgd); |
| } |
| |
| switch (level) { |
| case MIDGARD_MMU_LEVEL(0): |
| case MIDGARD_MMU_LEVEL(1): |
| dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, |
| level); |
| kbase_kunmap(p, page); |
| goto out; |
| case MIDGARD_MMU_LEVEL(2): |
| /* can only teardown if count >= 512 */ |
| if (count >= 512) { |
| pcount = 1; |
| } else { |
| dev_warn( |
| kbdev->dev, |
| "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down", |
| __func__, count); |
| pcount = 0; |
| } |
| break; |
| case MIDGARD_MMU_BOTTOMLEVEL: |
| /* page count is the same as the logical count */ |
| pcount = count; |
| break; |
| default: |
| dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__); |
| vpfn += count; |
| nr -= count; |
| continue; |
| } |
| |
| if (pcount > 0) |
| *dirty_pgds |= 1ULL << level; |
| |
| num_of_valid_entries = mmu_mode->get_num_valid_entries(page); |
| if (WARN_ON_ONCE(num_of_valid_entries < pcount)) |
| num_of_valid_entries = 0; |
| else |
| num_of_valid_entries -= pcount; |
| |
| /* Invalidate the entries we added */ |
| mmu_mode->entries_invalidate(&page[index], pcount); |
| |
| if (!num_of_valid_entries) { |
| kbase_kunmap(p, page); |
| |
| /* Ensure the cacheline(s) containing the last valid entries |
| * of PGD is invalidated from the GPU cache, before the |
| * PGD page is freed. |
| */ |
| kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), |
| pcount * sizeof(u64), flush_op); |
| |
| kbase_mmu_add_to_free_pgds_list(mmut, p); |
| |
| kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, |
| flush_op, dirty_pgds); |
| |
| vpfn += count; |
| nr -= count; |
| continue; |
| } |
| |
| mmu_mode->set_num_valid_entries(page, num_of_valid_entries); |
| |
| kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), |
| kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), |
| flush_op); |
| next: |
| kbase_kunmap(p, page); |
| vpfn += count; |
| nr -= count; |
| } |
| out: |
| return 0; |
| } |
| |
| /** |
| * mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table |
| * |
| * @kbdev: Pointer to kbase device. |
| * @mmut: Pointer to GPU MMU page table. |
| * @vpfn: Start page frame number (in PAGE_SIZE units) of the GPU virtual pages to unmap. |
| * @phys: Array of physical pages currently mapped to the virtual |
| * pages to unmap, or NULL. This is used for GPU cache maintenance |
| * and page migration support. |
| * @nr_phys_pages: Number of physical pages (in PAGE_SIZE units) to flush. |
| * @nr_virt_pages: Number of virtual pages (in PAGE_SIZE units) whose PTEs should be destroyed. |
| * @as_nr: Address space number, for GPU cache maintenance operations |
| * that happen outside a specific kbase context. |
| * @ignore_page_migration: Whether page migration metadata should be ignored. |
| * |
| * We actually discard the ATE and free the page table pages if no valid entries |
| * exist in the PGD. |
| * |
| * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is |
| * currently scheduled into the runpool, and so potentially uses a lot of locks. |
| * These locks must be taken in the correct order with respect to others |
| * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more |
| * information. |
| * |
| * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, |
| * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, |
| * GPU cache maintenance will be done as usual; that is, invalidating the whole GPU caches |
| * instead of specific physical address ranges. |
| * |
| * Return: 0 on success, otherwise an error code. |
| */ |
| static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, |
| struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, |
| int as_nr, bool ignore_page_migration) |
| { |
| u64 start_vpfn = vpfn; |
| enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; |
| struct kbase_mmu_hw_op_param op_param; |
| int err = -EFAULT; |
| u64 dirty_pgds = 0; |
| LIST_HEAD(free_pgds_list); |
| |
| /* Calls to this function are inherently asynchronous, with respect to |
| * MMU operations. |
| */ |
| const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; |
| |
| /* This function performs two operations: MMU maintenance and flushing |
| * the caches. To ensure internal consistency between the caches and the |
| * MMU, it does not make sense to be able to flush only the physical pages |
| * from the cache and keep the PTE, nor does it make sense to use this |
| * function to remove a PTE and keep the physical pages in the cache. |
| * |
| * However, we have legitimate cases where we can try to tear down a mapping |
| * with zero virtual and zero physical pages, so we must have the following |
| * behaviour: |
| * - if both physical and virtual page counts are zero, return early |
| * - if either physical and virtual page counts are zero, return early |
| * - if there are fewer physical pages than virtual pages, return -EINVAL |
| */ |
| if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0)) |
| return 0; |
| |
| if (unlikely(nr_virt_pages < nr_phys_pages)) |
| return -EINVAL; |
| |
| /* MMU cache flush strategy depends on the number of pages to unmap. In both cases |
| * the operation is invalidate but the granularity of cache maintenance may change |
| * according to the situation. |
| * |
| * If GPU control command operations are present and the number of pages is "small", |
| * then the optimal strategy is flushing on the physical address range of the pages |
| * which are affected by the operation. That implies both the PGDs which are modified |
| * or removed from the page table and the physical pages which are freed from memory. |
| * |
| * Otherwise, there's no alternative to invalidating the whole GPU cache. |
| */ |
| if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && |
| nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) |
| flush_op = KBASE_MMU_OP_FLUSH_PT; |
| |
| if (!rt_mutex_trylock(&mmut->mmu_lock)) { |
| /* |
| * Sometimes, mmu_lock takes long time to be released. |
| * In that case, kswapd is stuck until it can hold |
| * the lock. Instead, just bail out here so kswapd |
| * could reclaim other pages. |
| */ |
| if (current_is_kswapd()) |
| return -EBUSY; |
| rt_mutex_lock(&mmut->mmu_lock); |
| } |
| |
| err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds, |
| &free_pgds_list, flush_op); |
| |
| /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ |
| op_param = (struct kbase_mmu_hw_op_param){ |
| .vpfn = start_vpfn, |
| .nr = nr_virt_pages, |
| .mmu_sync_info = mmu_sync_info, |
| .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, |
| .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : |
| KBASE_MMU_OP_FLUSH_MEM, |
| .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), |
| }; |
| mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages, |
| &op_param); |
| |
| /* If page migration is enabled: the status of all physical pages involved |
| * shall be updated, unless they are not movable. Their status shall be |
| * updated before releasing the lock to protect against concurrent |
| * requests to migrate the pages, if they have been isolated. |
| */ |
| if (kbase_is_page_migration_enabled() && phys && !ignore_page_migration) |
| kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages); |
| |
| kbase_mmu_free_pgds_list(kbdev, mmut); |
| |
| rt_mutex_unlock(&mmut->mmu_lock); |
| |
| return err; |
| } |
| |
| int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, |
| struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, |
| int as_nr) |
| { |
| return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, |
| false); |
| } |
| |
| int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, |
| u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, |
| size_t nr_virt_pages, int as_nr) |
| { |
| return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, |
| true); |
| } |
| |
|