blob: f9ff7f1cc9b74c9019c63aa642dcec5d99335136 [file] [log] [blame]
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#ifndef __INTEL_ALLOCATOR_H__
#define __INTEL_ALLOCATOR_H__
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#include <stdint.h>
#include <stdatomic.h>
#include "i915/gem_submission.h"
#include "intel_reg.h"
/**
* SECTION:intel_allocator
* @short_description: igt implementation of allocator
* @title: Intel allocator
* @include: intel_allocator.h
*
* # Introduction
*
* With the era of discrete cards we requested to adopt IGT to handle
* addresses in userspace only (softpin, without support of relocations).
* Writing an allocator for single purpose would be relatively easy
* but supporting different tests with different requirements became
* quite complicated task where couple of scenarios may be not covered yet.
*
* # Assumptions
*
* - Allocator has to work in multiprocess / multithread environment.
* - Allocator backend (algorithm) should be plugable. Currently we support
* SIMPLE (borrowed from Mesa allocator), RELOC (pseudo allocator which
* returns incremented addresses without checking overlapping)
* and RANDOM (pseudo allocator which randomize addresses without
* checking overlapping).
* - Has to integrate in intel-bb (our simpler libdrm replacement used in
* couple of tests).
*
* # Implementation
*
* ## Single process (allows multiple threads)
*
* For single process we don't need to create dedicated
* entity (kind of arbiter) to solve allocations. Simple locking over
* allocator data structure is enough. As basic usage example would be:
*
* |[<!-- language="c" -->
* struct object {
* uint32_t handle;
* uint64_t offset;
* uint64_t size;
* };
*
* struct object obj1, obj2;
* uint64_t ahnd, startp, endp, size = 4096, align = 1 << 13;
* int fd = -1;
*
* fd = drm_open_driver(DRIVER_INTEL);
* ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
*
* obj1.handle = gem_create(4096);
* obj2.handle = gem_create(4096);
*
* // Reserve hole for an object in given address.
* // In this example the first possible address.
* intel_allocator_get_address_range(ahnd, &startp, &endp);
* obj1.offset = startp;
* igt_assert(intel_allocator_reserve(ahnd, obj1.handle, size, startp));
*
* // Get the most suitable offset for the object. Preferred way.
* obj2.offset = intel_allocator_alloc(ahnd, obj2.handle, size, align);
*
* ...
*
* // Reserved addresses can be only freed by unreserve.
* intel_allocator_unreserve(ahnd, obj1.handle, size, obj1.offset);
* intel_allocator_free(ahnd, obj2.handle);
*
* gem_close(obj1.handle);
* gem_close(obj2.handle);
* ]|
*
* Description:
* - ahnd is allocator handle (vm space handled by it)
* - we call get_address_range() to get start/end range provided by the
* allocator (we haven't specified its range in open so allocator code will
* assume some safe address range - we don't want to exercise some potential
* HW bugs on the last page)
* - alloc() / free() pair just gets address for gem object proposed by the
* allocator
* - reserve() / unreserve() pair gives us full control of acquire/return
* range we're interested in
*
* ## Multiple processes
*
* When process forks and its child uses same fd vm its address space is also
* the same. Some coordination - in this case interprocess communication -
* is required to assign proper addresses for gem objects and avoid collision.
* Additional thread is spawned for such case to cover child processes needs.
* It uses some form of communication channel to receive, perform action
* (alloc, free...) and send response to requesting process. Currently
* SYSVIPC message queue was chosen for this but it can replaced by other
* mechanism. Allocation techniques are same as for single process, we
* just need to wrap such code with:
*
*
* |[<!-- language="c" -->
*
*
* intel_allocator_multiprocess_start();
*
* ... allocation code (open, close, alloc, free, ...)
*
* intel_allocator_multiprocess_stop();
* ]|
*
* Calling start() spawns additional allocator thread ready for handling
* incoming allocation requests (open / close are also requests in that case).
*
* Calling stop() request to stop allocator thread unblocking all pending
* children (if any).
*/
enum allocator_strategy {
ALLOC_STRATEGY_NONE,
ALLOC_STRATEGY_LOW_TO_HIGH,
ALLOC_STRATEGY_HIGH_TO_LOW
};
struct intel_allocator {
int fd;
uint8_t type;
enum allocator_strategy strategy;
uint64_t default_alignment;
_Atomic(int32_t) refcount;
pthread_mutex_t mutex;
/* allocator's private structure */
void *priv;
void (*get_address_range)(struct intel_allocator *ial,
uint64_t *startp, uint64_t *endp);
uint64_t (*alloc)(struct intel_allocator *ial, uint32_t handle,
uint64_t size, uint64_t alignment,
enum allocator_strategy strategy);
bool (*is_allocated)(struct intel_allocator *ial, uint32_t handle,
uint64_t size, uint64_t offset);
bool (*reserve)(struct intel_allocator *ial,
uint32_t handle, uint64_t start, uint64_t end);
bool (*unreserve)(struct intel_allocator *ial,
uint32_t handle, uint64_t start, uint64_t end);
bool (*is_reserved)(struct intel_allocator *ial,
uint64_t start, uint64_t end);
bool (*free)(struct intel_allocator *ial, uint32_t handle);
void (*destroy)(struct intel_allocator *ial);
bool (*is_empty)(struct intel_allocator *ial);
void (*print)(struct intel_allocator *ial, bool full);
};
void intel_allocator_init(void);
void __intel_allocator_multiprocess_prepare(void);
void __intel_allocator_multiprocess_start(void);
void intel_allocator_multiprocess_start(void);
void intel_allocator_multiprocess_stop(void);
uint64_t intel_allocator_open(int fd, uint32_t ctx, uint8_t allocator_type);
uint64_t intel_allocator_open_full(int fd, uint32_t ctx,
uint64_t start, uint64_t end,
uint8_t allocator_type,
enum allocator_strategy strategy,
uint64_t default_alignment);
uint64_t intel_allocator_open_vm(int fd, uint32_t vm, uint8_t allocator_type);
uint64_t intel_allocator_open_vm_full(int fd, uint32_t vm,
uint64_t start, uint64_t end,
uint8_t allocator_type,
enum allocator_strategy strategy,
uint64_t default_alignment);
bool intel_allocator_close(uint64_t allocator_handle);
void intel_allocator_get_address_range(uint64_t allocator_handle,
uint64_t *startp, uint64_t *endp);
uint64_t __intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t alignment,
enum allocator_strategy strategy);
uint64_t intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t alignment);
uint64_t intel_allocator_alloc_with_strategy(uint64_t allocator_handle,
uint32_t handle,
uint64_t size, uint64_t alignment,
enum allocator_strategy strategy);
bool intel_allocator_free(uint64_t allocator_handle, uint32_t handle);
bool intel_allocator_is_allocated(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t offset);
bool intel_allocator_reserve(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t offset);
bool intel_allocator_unreserve(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t offset);
bool intel_allocator_is_reserved(uint64_t allocator_handle,
uint64_t size, uint64_t offset);
bool intel_allocator_reserve_if_not_allocated(uint64_t allocator_handle,
uint32_t handle,
uint64_t size, uint64_t offset,
bool *is_allocatedp);
void intel_allocator_print(uint64_t allocator_handle);
void intel_allocator_bind(uint64_t allocator_handle,
uint32_t sync_in, uint32_t sync_out);
#define ALLOC_INVALID_ADDRESS (-1ull)
#define INTEL_ALLOCATOR_NONE 0
#define INTEL_ALLOCATOR_RELOC 1
#define INTEL_ALLOCATOR_SIMPLE 2
#define GEN8_GTT_ADDRESS_WIDTH 48
static inline uint64_t CANONICAL(uint64_t offset)
{
return sign_extend64(offset, GEN8_GTT_ADDRESS_WIDTH - 1);
}
#define DECANONICAL(offset) (offset & ((1ull << GEN8_GTT_ADDRESS_WIDTH) - 1))
static inline uint64_t get_simple_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_SIMPLE);
}
static inline uint64_t get_simple_l2h_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
INTEL_ALLOCATOR_SIMPLE,
ALLOC_STRATEGY_LOW_TO_HIGH,
0);
}
static inline uint64_t get_simple_h2l_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
INTEL_ALLOCATOR_SIMPLE,
ALLOC_STRATEGY_HIGH_TO_LOW,
0);
}
static inline uint64_t get_reloc_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_RELOC);
}
static inline bool put_ahnd(uint64_t ahnd)
{
return !ahnd || intel_allocator_close(ahnd);
}
static inline uint64_t get_offset(uint64_t ahnd, uint32_t handle,
uint64_t size, uint64_t alignment)
{
if (!ahnd)
return 0;
return intel_allocator_alloc(ahnd, handle, size, alignment);
}
static inline bool put_offset(uint64_t ahnd, uint32_t handle)
{
if (!ahnd)
return 0;
return intel_allocator_free(ahnd, handle);
}
#endif