| /* SPDX-License-Identifier: MIT */ |
| /* |
| * Copyright © 2021 Intel Corporation |
| */ |
| |
| #ifndef __GPU_BUDDY_H__ |
| #define __GPU_BUDDY_H__ |
| |
| #include <linux/bitops.h> |
| #include <linux/list.h> |
| #include <linux/slab.h> |
| #include <linux/sched.h> |
| #include <linux/rbtree.h> |
| #include <linux/rbtree_augmented.h> |
| |
| /** |
| * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range |
| * |
| * When set, allocation is restricted to the range [start, end) specified |
| * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored |
| * and allocation can use any free space. |
| */ |
| #define GPU_BUDDY_RANGE_ALLOCATION BIT(0) |
| |
| /** |
| * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space |
| * |
| * Allocate starting from high addresses and working down. Useful for |
| * separating different allocation types (e.g., kernel vs userspace) |
| * to reduce fragmentation. |
| */ |
| #define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1) |
| |
| /** |
| * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks |
| * |
| * The allocation must be satisfied with a single contiguous block. |
| * If the requested size cannot be allocated contiguously, the |
| * allocation fails with -ENOSPC. |
| */ |
| #define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) |
| |
| /** |
| * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory |
| * |
| * Attempt to allocate from the clear tree first. If insufficient clear |
| * memory is available, falls back to dirty memory. Useful when the |
| * caller needs zeroed memory and wants to avoid GPU clear operations. |
| */ |
| #define GPU_BUDDY_CLEAR_ALLOCATION BIT(3) |
| |
| /** |
| * GPU_BUDDY_CLEARED - Mark returned blocks as cleared |
| * |
| * Used with gpu_buddy_free_list() to indicate that the memory being |
| * freed has been cleared (zeroed). The blocks will be placed in the |
| * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests. |
| */ |
| #define GPU_BUDDY_CLEARED BIT(4) |
| |
| /** |
| * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming |
| * |
| * By default, if an allocation is smaller than the allocated block, |
| * excess memory is trimmed and returned to the free pool. This flag |
| * disables trimming, keeping the full power-of-two block size. |
| */ |
| #define GPU_BUDDY_TRIM_DISABLE BIT(5) |
| |
| enum gpu_buddy_free_tree { |
| GPU_BUDDY_CLEAR_TREE = 0, |
| GPU_BUDDY_DIRTY_TREE, |
| GPU_BUDDY_MAX_FREE_TREES, |
| }; |
| |
| #define for_each_free_tree(tree) \ |
| for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++) |
| |
| /** |
| * struct gpu_buddy_block - Block within a buddy allocator |
| * |
| * Each block in the buddy allocator is represented by this structure. |
| * Blocks are organized in a binary tree where each parent block can be |
| * split into two children (left and right buddies). The allocator manages |
| * blocks at various orders (power-of-2 sizes) from chunk_size up to the |
| * largest contiguous region. |
| * |
| * @private: Private data owned by the allocator user (e.g., driver-specific data) |
| * @link: List node for user ownership while block is allocated |
| */ |
| struct gpu_buddy_block { |
| /* private: */ |
| /* |
| * Header bit layout: |
| * - Bits 63:12: block offset within the address space |
| * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT) |
| * - Bit 9: clear bit (1 if memory is zeroed) |
| * - Bits 8:6: reserved |
| * - Bits 5:0: order (log2 of size relative to chunk_size) |
| */ |
| #define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) |
| #define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) |
| #define GPU_BUDDY_ALLOCATED (1 << 10) |
| #define GPU_BUDDY_FREE (2 << 10) |
| #define GPU_BUDDY_SPLIT (3 << 10) |
| #define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) |
| /* Free to be used, if needed in the future */ |
| #define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) |
| #define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) |
| u64 header; |
| |
| struct gpu_buddy_block *left; |
| struct gpu_buddy_block *right; |
| struct gpu_buddy_block *parent; |
| /* public: */ |
| void *private; /* owned by creator */ |
| |
| /* |
| * While the block is allocated by the user through gpu_buddy_alloc*, |
| * the user has ownership of the link, for example to maintain within |
| * a list, if so desired. As soon as the block is freed with |
| * gpu_buddy_free* ownership is given back to the mm. |
| */ |
| union { |
| /* private: */ |
| struct rb_node rb; |
| /* public: */ |
| struct list_head link; |
| }; |
| /* private: */ |
| struct list_head tmp_link; |
| unsigned int subtree_max_alignment; |
| }; |
| |
| /* Order-zero must be at least SZ_4K */ |
| #define GPU_BUDDY_MAX_ORDER (63 - 12) |
| |
| /** |
| * struct gpu_buddy - GPU binary buddy allocator |
| * |
| * The buddy allocator provides efficient power-of-two memory allocation |
| * with fast allocation and free operations. It is commonly used for GPU |
| * memory management where allocations can be split into power-of-two |
| * block sizes. |
| * |
| * Locking should be handled by the user; a simple mutex around |
| * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list() |
| * should suffice. |
| * |
| * @n_roots: Number of root blocks in the roots array. |
| * @max_order: Maximum block order (log2 of largest block size / chunk_size). |
| * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K. |
| * @size: Total size of the address space managed by this allocator in bytes. |
| * @avail: Total free space currently available for allocation in bytes. |
| * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes. |
| * This is a subset of @avail. |
| */ |
| struct gpu_buddy { |
| /* private: */ |
| /* |
| * Array of red-black trees for free block management. |
| * Indexed as free_trees[clear/dirty][order] where: |
| * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content |
| * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content |
| * Each tree holds free blocks of the corresponding order. |
| */ |
| struct rb_root **free_trees; |
| /* |
| * Array of root blocks representing the top-level blocks of the |
| * binary tree(s). Multiple roots exist when the total size is not |
| * a power of two, with each root being the largest power-of-two |
| * that fits in the remaining space. |
| */ |
| struct gpu_buddy_block **roots; |
| /* public: */ |
| unsigned int n_roots; |
| unsigned int max_order; |
| u64 chunk_size; |
| u64 size; |
| u64 avail; |
| u64 clear_avail; |
| }; |
| |
| static inline u64 |
| gpu_buddy_block_offset(const struct gpu_buddy_block *block) |
| { |
| return block->header & GPU_BUDDY_HEADER_OFFSET; |
| } |
| |
| static inline unsigned int |
| gpu_buddy_block_order(struct gpu_buddy_block *block) |
| { |
| return block->header & GPU_BUDDY_HEADER_ORDER; |
| } |
| |
| static inline bool |
| gpu_buddy_block_is_free(struct gpu_buddy_block *block) |
| { |
| return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE; |
| } |
| |
| static inline bool |
| gpu_buddy_block_is_clear(struct gpu_buddy_block *block) |
| { |
| return block->header & GPU_BUDDY_HEADER_CLEAR; |
| } |
| |
| static inline u64 |
| gpu_buddy_block_size(struct gpu_buddy *mm, |
| struct gpu_buddy_block *block) |
| { |
| return mm->chunk_size << gpu_buddy_block_order(block); |
| } |
| |
| int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size); |
| |
| void gpu_buddy_fini(struct gpu_buddy *mm); |
| |
| int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, |
| u64 start, u64 end, u64 size, |
| u64 min_page_size, |
| struct list_head *blocks, |
| unsigned long flags); |
| |
| int gpu_buddy_block_trim(struct gpu_buddy *mm, |
| u64 *start, |
| u64 new_size, |
| struct list_head *blocks); |
| |
| void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear); |
| |
| void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block); |
| |
| void gpu_buddy_free_list(struct gpu_buddy *mm, |
| struct list_head *objects, |
| unsigned int flags); |
| |
| void gpu_buddy_print(struct gpu_buddy *mm); |
| void gpu_buddy_block_print(struct gpu_buddy *mm, |
| struct gpu_buddy_block *block); |
| #endif |