| /* SPDX-License-Identifier: GPL-2.0-only */ |
| /* |
| * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES |
| */ |
| #ifndef __GENERIC_PT_IOMMU_H |
| #define __GENERIC_PT_IOMMU_H |
| |
| #include <linux/generic_pt/common.h> |
| #include <linux/iommu.h> |
| #include <linux/mm_types.h> |
| |
| struct iommu_iotlb_gather; |
| struct pt_iommu_ops; |
| struct pt_iommu_driver_ops; |
| struct iommu_dirty_bitmap; |
| |
| /** |
| * DOC: IOMMU Radix Page Table |
| * |
| * The IOMMU implementation of the Generic Page Table provides an ops struct |
| * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and |
| * the generic map/unmap interface. |
| * |
| * This interface uses a caller provided locking approach. The caller must have |
| * a VA range lock concept that prevents concurrent threads from calling ops on |
| * the same VA. Generally the range lock must be at least as large as a single |
| * map call. |
| */ |
| |
| /** |
| * struct pt_iommu - Base structure for IOMMU page tables |
| * |
| * The format-specific struct will include this as the first member. |
| */ |
| struct pt_iommu { |
| /** |
| * @domain: The core IOMMU domain. The driver should use a union to |
| * overlay this memory with its previously existing domain struct to |
| * create an alias. |
| */ |
| struct iommu_domain domain; |
| |
| /** |
| * @ops: Function pointers to access the API |
| */ |
| const struct pt_iommu_ops *ops; |
| |
| /** |
| * @driver_ops: Function pointers provided by the HW driver to help |
| * manage HW details like caches. |
| */ |
| const struct pt_iommu_driver_ops *driver_ops; |
| |
| /** |
| * @nid: Node ID to use for table memory allocations. The IOMMU driver |
| * may want to set the NID to the device's NID, if there are multiple |
| * table walkers. |
| */ |
| int nid; |
| |
| /** |
| * @iommu_device: Device pointer used for any DMA cache flushing when |
| * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the |
| * page table which must have dma ops that perform cache flushing. |
| */ |
| struct device *iommu_device; |
| }; |
| |
| static inline struct pt_iommu *iommupt_from_domain(struct iommu_domain *domain) |
| { |
| if (!IS_ENABLED(CONFIG_IOMMU_PT) || !domain->is_iommupt) |
| return NULL; |
| return container_of(domain, struct pt_iommu, domain); |
| } |
| |
| /** |
| * struct pt_iommu_info - Details about the IOMMU page table |
| * |
| * Returned from pt_iommu_ops->get_info() |
| */ |
| struct pt_iommu_info { |
| /** |
| * @pgsize_bitmap: A bitmask where each set bit indicates |
| * a page size that can be natively stored in the page table. |
| */ |
| u64 pgsize_bitmap; |
| }; |
| |
| struct pt_iommu_ops { |
| /** |
| * @map_range: Install translation for an IOVA range |
| * @iommu_table: Table to manipulate |
| * @iova: IO virtual address to start |
| * @paddr: Physical/Output address to start |
| * @len: Length of the range starting from @iova |
| * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO |
| * @gfp: GFP flags for any memory allocations |
| * |
| * The range starting at IOVA will have paddr installed into it. The |
| * rage is automatically segmented into optimally sized table entries, |
| * and can have any valid alignment. |
| * |
| * On error the caller will probably want to invoke unmap on the range |
| * from iova up to the amount indicated by @mapped to return the table |
| * back to an unchanged state. |
| * |
| * Context: The caller must hold a write range lock that includes |
| * the whole range. |
| * |
| * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA |
| * that were mapped are added to @mapped, @mapped is not zerod first. |
| */ |
| int (*map_range)(struct pt_iommu *iommu_table, dma_addr_t iova, |
| phys_addr_t paddr, dma_addr_t len, unsigned int prot, |
| gfp_t gfp, size_t *mapped); |
| |
| /** |
| * @unmap_range: Make a range of IOVA empty/not present |
| * @iommu_table: Table to manipulate |
| * @iova: IO virtual address to start |
| * @len: Length of the range starting from @iova |
| * @iotlb_gather: Gather struct that must be flushed on return |
| * |
| * unmap_range() will remove a translation created by map_range(). It |
| * cannot subdivide a mapping created by map_range(), so it should be |
| * called with IOVA ranges that match those passed to map_pages. The |
| * IOVA range can aggregate contiguous map_range() calls so long as no |
| * individual range is split. |
| * |
| * Context: The caller must hold a write range lock that includes |
| * the whole range. |
| * |
| * Returns: Number of bytes of VA unmapped. iova + res will be the |
| * point unmapping stopped. |
| */ |
| size_t (*unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova, |
| dma_addr_t len, |
| struct iommu_iotlb_gather *iotlb_gather); |
| |
| /** |
| * @set_dirty: Make the iova write dirty |
| * @iommu_table: Table to manipulate |
| * @iova: IO virtual address to start |
| * |
| * This is only used by iommufd testing. It makes the iova dirty so that |
| * read_and_clear_dirty() will see it as dirty. Unlike all the other ops |
| * this one is safe to call without holding any locking. It may return |
| * -EAGAIN if there is a race. |
| */ |
| int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova); |
| |
| /** |
| * @get_info: Return the pt_iommu_info structure |
| * @iommu_table: Table to query |
| * |
| * Return some basic static information about the page table. |
| */ |
| void (*get_info)(struct pt_iommu *iommu_table, |
| struct pt_iommu_info *info); |
| |
| /** |
| * @deinit: Undo a format specific init operation |
| * @iommu_table: Table to destroy |
| * |
| * Release all of the memory. The caller must have already removed the |
| * table from all HW access and all caches. |
| */ |
| void (*deinit)(struct pt_iommu *iommu_table); |
| }; |
| |
| /** |
| * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations |
| * |
| * The IOMMU driver should implement these using container_of(iommu_table) to |
| * get to it's iommu_domain derived structure. All ops can be called in atomic |
| * contexts as they are buried under DMA API calls. |
| */ |
| struct pt_iommu_driver_ops { |
| /** |
| * @change_top: Update the top of table pointer |
| * @iommu_table: Table to operate on |
| * @top_paddr: New CPU physical address of the top pointer |
| * @top_level: IOMMU PT level of the new top |
| * |
| * Called under the get_top_lock() spinlock. The driver must update all |
| * HW references to this domain with a new top address and |
| * configuration. On return mappings placed in the new top must be |
| * reachable by the HW. |
| * |
| * top_level encodes the level in IOMMU PT format, level 0 is the |
| * smallest page size increasing from there. This has to be translated |
| * to any HW specific format. During this call the new top will not be |
| * visible to any other API. |
| * |
| * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if |
| * enabled. |
| */ |
| void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr, |
| unsigned int top_level); |
| |
| /** |
| * @get_top_lock: lock to hold when changing the table top |
| * @iommu_table: Table to operate on |
| * |
| * Return a lock to hold when changing the table top page table from |
| * being stored in HW. The lock will be held prior to calling |
| * change_top() and released once the top is fully visible. |
| * |
| * Typically this would be a lock that protects the iommu_domain's |
| * attachment list. |
| * |
| * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if |
| * enabled. |
| */ |
| spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table); |
| }; |
| |
| static inline void pt_iommu_deinit(struct pt_iommu *iommu_table) |
| { |
| /* |
| * It is safe to call pt_iommu_deinit() before an init, or if init |
| * fails. The ops pointer will only become non-NULL if deinit needs to be |
| * run. |
| */ |
| if (iommu_table->ops) |
| iommu_table->ops->deinit(iommu_table); |
| } |
| |
| /** |
| * struct pt_iommu_cfg - Common configuration values for all formats |
| */ |
| struct pt_iommu_cfg { |
| /** |
| * @features: Features required. Only these features will be turned on. |
| * The feature list should reflect what the IOMMU HW is capable of. |
| */ |
| unsigned int features; |
| /** |
| * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will |
| * imply the top level of the table. |
| */ |
| u8 hw_max_vasz_lg2; |
| /** |
| * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format |
| * might select a lower maximum OA. |
| */ |
| u8 hw_max_oasz_lg2; |
| }; |
| |
| /* Generate the exported function signatures from iommu_pt.h */ |
| #define IOMMU_PROTOTYPES(fmt) \ |
| phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \ |
| dma_addr_t iova); \ |
| int pt_iommu_##fmt##_read_and_clear_dirty( \ |
| struct iommu_domain *domain, unsigned long iova, size_t size, \ |
| unsigned long flags, struct iommu_dirty_bitmap *dirty); \ |
| int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ |
| const struct pt_iommu_##fmt##_cfg *cfg, \ |
| gfp_t gfp); \ |
| void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \ |
| struct pt_iommu_##fmt##_hw_info *info) |
| #define IOMMU_FORMAT(fmt, member) \ |
| struct pt_iommu_##fmt { \ |
| struct pt_iommu iommu; \ |
| struct pt_##fmt member; \ |
| }; \ |
| IOMMU_PROTOTYPES(fmt) |
| |
| /* |
| * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the |
| * iommu_pt |
| */ |
| #define IOMMU_PT_DOMAIN_OPS(fmt) \ |
| .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys |
| #define IOMMU_PT_DIRTY_OPS(fmt) \ |
| .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty |
| |
| /* |
| * The driver should setup its domain struct like |
| * union { |
| * struct iommu_domain domain; |
| * struct pt_iommu_xxx xx; |
| * }; |
| * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain); |
| * |
| * Which creates an alias between driver_domain.domain and |
| * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing |
| * driver_domain.domain users. |
| */ |
| #define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \ |
| static_assert(offsetof(s, pt_iommu_memb.domain) == \ |
| offsetof(s, domain_memb)) |
| |
| struct pt_iommu_amdv1_cfg { |
| struct pt_iommu_cfg common; |
| unsigned int starting_level; |
| }; |
| |
| struct pt_iommu_amdv1_hw_info { |
| u64 host_pt_root; |
| u8 mode; |
| }; |
| |
| IOMMU_FORMAT(amdv1, amdpt); |
| |
| /* amdv1_mock is used by the iommufd selftest */ |
| #define pt_iommu_amdv1_mock pt_iommu_amdv1 |
| #define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg |
| struct pt_iommu_amdv1_mock_hw_info; |
| IOMMU_PROTOTYPES(amdv1_mock); |
| |
| struct pt_iommu_vtdss_cfg { |
| struct pt_iommu_cfg common; |
| /* 4 is a 57 bit 5 level table */ |
| unsigned int top_level; |
| }; |
| |
| struct pt_iommu_vtdss_hw_info { |
| u64 ssptptr; |
| u8 aw; |
| }; |
| |
| IOMMU_FORMAT(vtdss, vtdss_pt); |
| |
| struct pt_iommu_riscv_64_cfg { |
| struct pt_iommu_cfg common; |
| }; |
| |
| struct pt_iommu_riscv_64_hw_info { |
| u64 ppn; |
| u8 fsc_iosatp_mode; |
| }; |
| |
| IOMMU_FORMAT(riscv_64, riscv_64pt); |
| |
| struct pt_iommu_x86_64_cfg { |
| struct pt_iommu_cfg common; |
| /* 4 is a 57 bit 5 level table */ |
| unsigned int top_level; |
| }; |
| |
| struct pt_iommu_x86_64_hw_info { |
| u64 gcr3_pt; |
| u8 levels; |
| }; |
| |
| IOMMU_FORMAT(x86_64, x86_64_pt); |
| |
| #undef IOMMU_PROTOTYPES |
| #undef IOMMU_FORMAT |
| #endif |