lib/intel_batchbuffer.c - platform/external/igt-gpu-tools - Git at Google

 /**************************************************************************
  *
  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/

 #include <search.h>
 #include <glib.h>

 #include "gpgpu_fill.h"
 #include "huc_copy.h"
 #include "i915/gem_create.h"
 #include "i915/gem_mman.h"
 #include "intel_blt.h"
 #include "igt_aux.h"
 #include "igt_syncobj.h"
 #include "intel_batchbuffer.h"
 #include "intel_bufops.h"
 #include "intel_chipset.h"
 #include "media_fill.h"
 #include "media_spin.h"
 #include "sw_sync.h"
 #include "veboxcopy.h"
 #include "xe/xe_ioctl.h"
 #include "xe/xe_query.h"

 #define BCS_SWCTRL 0x22200
 #define BCS_SRC_Y (1 << 0)
 #define BCS_DST_Y (1 << 1)

 /**
  * SECTION:intel_batchbuffer
  * @short_description: Batchbuffer and blitter support
  * @title: Batch Buffer
  * @include: igt.h
  *
  * Note that this library's header pulls in the [i-g-t core](igt-gpu-tools-i-g-t-core.html)
  * library as a dependency.
  */

 static bool intel_bb_do_tracking;
 static IGT_LIST_HEAD(intel_bb_list);
 static pthread_mutex_t intel_bb_list_lock = PTHREAD_MUTEX_INITIALIZER;

 #define CMD_POLY_STIPPLE_OFFSET       0x7906

 #define CHECK_RANGE(x) do { \
 	igt_assert_lte(0, (x)); \
 	igt_assert_lt((x), (1 << 15)); \
 } while (0)

 /*
  * pitches are in bytes if the surfaces are linear, number of dwords
  * otherwise
  */
 static uint32_t fast_copy_pitch(unsigned int stride, unsigned int tiling)
 {
 	if (tiling != I915_TILING_NONE)
 		return stride / 4;
 	else
 		return stride;
 }

 uint32_t fast_copy_dword0(unsigned int src_tiling,
 			  unsigned int dst_tiling)
 {
 	uint32_t dword0 = 0;

 	dword0 |= XY_FAST_COPY_BLT;

 	switch (src_tiling) {
 	case I915_TILING_X:
 		dword0 |= XY_FAST_COPY_SRC_TILING_X;
 		break;
 	case I915_TILING_Y:
 	case I915_TILING_4:
 	case I915_TILING_Yf:
 		dword0 |= XY_FAST_COPY_SRC_TILING_Yb_Yf;
 		break;
 	case I915_TILING_Ys:
 		dword0 |= XY_FAST_COPY_SRC_TILING_Ys;
 		break;
 	case I915_TILING_NONE:
 	default:
 		break;
 	}

 	switch (dst_tiling) {
 	case I915_TILING_X:
 		dword0 |= XY_FAST_COPY_DST_TILING_X;
 		break;
 	case I915_TILING_Y:
 	case I915_TILING_4:
 	case I915_TILING_Yf:
 		dword0 |= XY_FAST_COPY_DST_TILING_Yb_Yf;
 		break;
 	case I915_TILING_Ys:
 		dword0 |= XY_FAST_COPY_DST_TILING_Ys;
 		break;
 	case I915_TILING_NONE:
 	default:
 		break;
 	}

 	return dword0;
 }

 static bool new_tile_y_format(unsigned int tiling)
 {
 	return tiling == T_YFMAJOR || tiling == T_TILE4;
 }

 uint32_t fast_copy_dword1(int fd, unsigned int src_tiling,
 			  unsigned int dst_tiling,
 			  int bpp)
 {
 	uint32_t dword1 = 0;

 	if (blt_fast_copy_supports_tiling(fd, T_YMAJOR)) {
 		dword1 |= new_tile_y_format(src_tiling)
 				? XY_FAST_COPY_SRC_TILING_Yf : 0;
 		dword1 |= new_tile_y_format(dst_tiling)
 				? XY_FAST_COPY_DST_TILING_Yf : 0;
 	} else {
 		/* Always set bits for platforms that don't support legacy TileY */
 		dword1 |= XY_FAST_COPY_SRC_TILING_Yf | XY_FAST_COPY_DST_TILING_Yf;
 	}

 	switch (bpp) {
 	case 8:
 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_8;
 		break;
 	case 16:
 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_16;
 		break;
 	case 32:
 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_32;
 		break;
 	case 64:
 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_64;
 		break;
 	case 128:
 		dword1 |= XY_FAST_COPY_COLOR_DEPTH_128;
 		break;
 	default:
 		igt_assert(0);
 	}

 	return dword1;
 }

 static void
 fill_relocation(struct drm_i915_gem_relocation_entry *reloc,
 		uint32_t gem_handle, uint64_t presumed_offset,
 		uint32_t delta, /* in bytes */
 		uint32_t offset, /* in dwords */
 		uint32_t read_domains, uint32_t write_domains)
 {
 	reloc->target_handle = gem_handle;
 	reloc->delta = delta;
 	reloc->offset = offset * sizeof(uint32_t);
 	reloc->presumed_offset = presumed_offset;
 	reloc->read_domains = read_domains;
 	reloc->write_domain = write_domains;
 }

 static void
 fill_object(struct drm_i915_gem_exec_object2 *obj,
 	    uint32_t gem_handle, uint64_t gem_offset,
 	    struct drm_i915_gem_relocation_entry *relocs, uint32_t count)
 {
 	memset(obj, 0, sizeof(*obj));
 	obj->handle = gem_handle;
 	obj->offset = gem_offset;
 	obj->relocation_count = count;
 	obj->relocs_ptr = to_user_pointer(relocs);
 }

 static uint32_t find_engine(const intel_ctx_cfg_t *cfg, unsigned int class)
 {
 	unsigned int i;
 	uint32_t engine_id = -1;

 	for (i = 0; i < cfg->num_engines; i++) {
 		if (cfg->engines[i].engine_class == class)
 			engine_id = i;
 	}

 	igt_assert_f(engine_id != -1, "Requested engine not found!\n");

 	return engine_id;
 }

 static void exec_blit(int fd,
 		      struct drm_i915_gem_exec_object2 *objs,
 		      uint32_t count, uint32_t ctx,
 		      const intel_ctx_cfg_t *cfg)
 {
 	struct drm_i915_gem_execbuffer2 exec;
 	uint32_t devid = intel_get_drm_devid(fd);
 	uint32_t blt_id = HAS_BLT_RING(devid) ? I915_EXEC_BLT : I915_EXEC_DEFAULT;

 	if (cfg)
 		blt_id = find_engine(cfg, I915_ENGINE_CLASS_COPY);

 	exec = (struct drm_i915_gem_execbuffer2) {
 		.buffers_ptr = to_user_pointer(objs),
 		.buffer_count = count,
 		.flags = blt_id | I915_EXEC_NO_RELOC,
 		.rsvd1 = ctx,
 	};

 	gem_execbuf(fd, &exec);
 }

 static uint32_t src_copy_dword0(uint32_t src_tiling, uint32_t dst_tiling,
 				uint32_t bpp, uint32_t device_gen)
 {
 	uint32_t dword0 = 0;

 	dword0 |= XY_SRC_COPY_BLT_CMD;
 	if (bpp == 32)
 		dword0 |= XY_SRC_COPY_BLT_WRITE_RGB |
 			XY_SRC_COPY_BLT_WRITE_ALPHA;

 	if (device_gen >= 4 && src_tiling)
 		dword0 |= XY_SRC_COPY_BLT_SRC_TILED;
 	if (device_gen >= 4 && dst_tiling)
 		dword0 |= XY_SRC_COPY_BLT_DST_TILED;

 	return dword0;
 }

 static uint32_t src_copy_dword1(uint32_t dst_pitch, uint32_t bpp)
 {
 	uint32_t dword1 = 0;

 	switch (bpp) {
 	case 8:
 		break;
 	case 16:
 		dword1 |= 1 << 24; /* Only support 565 color */
 		break;
 	case 32:
 		dword1 |= 3 << 24;
 		break;
 	default:
 		igt_assert(0);
 	}

 	dword1 |= 0xcc << 16;
 	dword1 |= dst_pitch;

 	return dword1;
 }

 /**
  * igt_blitter_copy:
  * @fd: file descriptor of the i915 driver
  * @ahnd: handle to an allocator
  * @ctx: context within which execute copy blit
  * @src_handle: GEM handle of the source buffer
  * @src_delta: offset into the source GEM bo, in bytes
  * @src_stride: Stride (in bytes) of the source buffer
  * @src_tiling: Tiling mode of the source buffer
  * @src_x: X coordinate of the source region to copy
  * @src_y: Y coordinate of the source region to copy
  * @src_size: size of the src bo required for allocator and softpin
  * @width: Width of the region to copy
  * @height: Height of the region to copy
  * @bpp: source and destination bits per pixel
  * @dst_handle: GEM handle of the destination buffer
  * @dst_delta: offset into the destination GEM bo, in bytes
  * @dst_stride: Stride (in bytes) of the destination buffer
  * @dst_tiling: Tiling mode of the destination buffer
  * @dst_x: X coordinate of destination
  * @dst_y: Y coordinate of destination
  * @dst_size: size of the dst bo required for allocator and softpin
  *
  * Wrapper API to call appropriate blitter copy function.
  */

 void igt_blitter_copy(int fd,
 		      uint64_t ahnd,
 		      uint32_t ctx,
 		      const intel_ctx_cfg_t *cfg,
 		      /* src */
 		      uint32_t src_handle,
 		      uint32_t src_delta,
 		      uint32_t src_stride,
 		      uint32_t src_tiling,
 		      uint32_t src_x, uint32_t src_y,
 		      uint64_t src_size,
 		      /* size */
 		      uint32_t width, uint32_t height,
 		      /* bpp */
 		      uint32_t bpp,
 		      /* dst */
 		      uint32_t dst_handle,
 		      uint32_t dst_delta,
 		      uint32_t dst_stride,
 		      uint32_t dst_tiling,
 		      uint32_t dst_x, uint32_t dst_y,
 		      uint64_t dst_size)
 {
 	uint32_t devid;

 	devid = intel_get_drm_devid(fd);

 	if (intel_graphics_ver(devid) >= IP_VER(12, 60))
 		igt_blitter_fast_copy__raw(fd, ahnd, ctx, NULL,
 					   src_handle, src_delta,
 					   src_stride, src_tiling,
 					   src_x, src_y, src_size,
 					   width, height, bpp,
 					   dst_handle, dst_delta,
 					   dst_stride, dst_tiling,
 					   dst_x, dst_y, dst_size);
 	else
 		igt_blitter_src_copy(fd, ahnd, ctx, NULL,
 				     src_handle, src_delta,
 				     src_stride, src_tiling,
 				     src_x, src_y, src_size,
 				     width, height, bpp,
 				     dst_handle, dst_delta,
 				     dst_stride, dst_tiling,
 				     dst_x, dst_y, dst_size);
 }
 /**
  * igt_blitter_src_copy:
  * @fd: file descriptor of the i915 driver
  * @ahnd: handle to an allocator
  * @ctx: context within which execute copy blit
  * @cfg: intel_ctx configuration, NULL for default context or legacy mode
  * @src_handle: GEM handle of the source buffer
  * @src_delta: offset into the source GEM bo, in bytes
  * @src_stride: Stride (in bytes) of the source buffer
  * @src_tiling: Tiling mode of the source buffer
  * @src_x: X coordinate of the source region to copy
  * @src_y: Y coordinate of the source region to copy
  * @src_size: size of the src bo required for allocator and softpin
  * @width: Width of the region to copy
  * @height: Height of the region to copy
  * @bpp: source and destination bits per pixel
  * @dst_handle: GEM handle of the destination buffer
  * @dst_delta: offset into the destination GEM bo, in bytes
  * @dst_stride: Stride (in bytes) of the destination buffer
  * @dst_tiling: Tiling mode of the destination buffer
  * @dst_x: X coordinate of destination
  * @dst_y: Y coordinate of destination
  * @dst_size: size of the dst bo required for allocator and softpin
  *
  * Copy @src into @dst using the XY_SRC blit command.
  */
 void igt_blitter_src_copy(int fd,
 			  uint64_t ahnd,
 			  uint32_t ctx,
 			  const intel_ctx_cfg_t *cfg,
 			  /* src */
 			  uint32_t src_handle,
 			  uint32_t src_delta,
 			  uint32_t src_stride,
 			  uint32_t src_tiling,
 			  uint32_t src_x, uint32_t src_y,
 			  uint64_t src_size,

 			  /* size */
 			  uint32_t width, uint32_t height,

 			  /* bpp */
 			  uint32_t bpp,

 			  /* dst */
 			  uint32_t dst_handle,
 			  uint32_t dst_delta,
 			  uint32_t dst_stride,
 			  uint32_t dst_tiling,
 			  uint32_t dst_x, uint32_t dst_y,
 			  uint64_t dst_size)
 {
 	uint32_t batch[32];
 	struct drm_i915_gem_exec_object2 objs[3];
 	struct drm_i915_gem_relocation_entry relocs[2];
 	uint32_t batch_handle;
 	uint32_t src_pitch, dst_pitch;
 	uint32_t dst_reloc_offset, src_reloc_offset;
 	uint32_t gen = intel_gen(intel_get_drm_devid(fd));
 	uint64_t batch_offset, src_offset, dst_offset;
 	const bool has_64b_reloc = gen >= 8;
 	int i = 0;

 	batch_handle = gem_create(fd, 4096);
 	if (ahnd) {
 		src_offset = get_offset(ahnd, src_handle, src_size, 0);
 		dst_offset = get_offset(ahnd, dst_handle, dst_size, 0);
 		batch_offset = get_offset(ahnd, batch_handle, 4096, 0);
 	} else {
 		src_offset = 16 << 20;
 		dst_offset = ALIGN(src_offset + src_size, 1 << 20);
 		batch_offset = ALIGN(dst_offset + dst_size, 1 << 20);
 	}

 	memset(batch, 0, sizeof(batch));

 	igt_assert((src_tiling == I915_TILING_NONE) ||
 		   (src_tiling == I915_TILING_X) ||
 		   (src_tiling == I915_TILING_Y));
 	igt_assert((dst_tiling == I915_TILING_NONE) ||
 		   (dst_tiling == I915_TILING_X) ||
 		   (dst_tiling == I915_TILING_Y));

 	src_pitch = (gen >= 4 && src_tiling) ? src_stride / 4 : src_stride;
 	dst_pitch = (gen >= 4 && dst_tiling) ? dst_stride / 4 : dst_stride;

 	if (bpp == 64) {
 		bpp /= 2;
 		width *= 2;
 	}

 	CHECK_RANGE(src_x); CHECK_RANGE(src_y);
 	CHECK_RANGE(dst_x); CHECK_RANGE(dst_y);
 	CHECK_RANGE(width); CHECK_RANGE(height);
 	CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height);
 	CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height);
 	CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);

 	if ((src_tiling | dst_tiling) >= I915_TILING_Y) {
 		unsigned int mask;

 		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;

 		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
 		if (src_tiling == I915_TILING_Y)
 			mask |= BCS_SRC_Y;
 		if (dst_tiling == I915_TILING_Y)
 			mask |= BCS_DST_Y;
 		batch[i++] = mask;
 	}

 	batch[i] = src_copy_dword0(src_tiling, dst_tiling, bpp, gen);
 	batch[i++] |= 6 + 2 * has_64b_reloc;
 	batch[i++] = src_copy_dword1(dst_pitch, bpp);
 	batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */
 	batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */
 	dst_reloc_offset = i;
 	batch[i++] = dst_offset + dst_delta; /* dst address lower bits */
 	if (has_64b_reloc)
 		batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */
 	batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */
 	batch[i++] = src_pitch;
 	src_reloc_offset = i;
 	batch[i++] = src_offset + src_delta; /* src address lower bits */
 	if (has_64b_reloc)
 		batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */

 	if ((src_tiling | dst_tiling) >= I915_TILING_Y) {
 		igt_assert(gen >= 6);
 		batch[i++] = MI_FLUSH_DW_CMD | 2;
 		batch[i++] = 0;
 		batch[i++] = 0;
 		batch[i++] = 0;

 		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
 	}

 	batch[i++] = MI_BATCH_BUFFER_END;
 	batch[i++] = MI_NOOP;

 	igt_assert(i <= ARRAY_SIZE(batch));

 	gem_write(fd, batch_handle, 0, batch, sizeof(batch));

 	fill_relocation(&relocs[0], dst_handle, dst_offset,
 			dst_delta, dst_reloc_offset,
 			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 	fill_relocation(&relocs[1], src_handle, src_offset,
 			src_delta, src_reloc_offset,
 			I915_GEM_DOMAIN_RENDER, 0);

 	fill_object(&objs[0], dst_handle, dst_offset, NULL, 0);
 	fill_object(&objs[1], src_handle, src_offset, NULL, 0);
 	fill_object(&objs[2], batch_handle, batch_offset, relocs, !ahnd ? 2 : 0);

 	objs[0].flags |= EXEC_OBJECT_NEEDS_FENCE | EXEC_OBJECT_WRITE;
 	objs[1].flags |= EXEC_OBJECT_NEEDS_FENCE;

 	if (ahnd) {
 		objs[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 		objs[1].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 		objs[2].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 	}

 	exec_blit(fd, objs, 3, ctx, cfg);

 	gem_close(fd, batch_handle);
 }

 /**
  * igt_blitter_fast_copy__raw:
  * @fd: file descriptor of the i915 driver
  * @ahnd: handle to an allocator
  * @ctx: context within which execute copy blit
  * @cfg: intel_ctx configuration, NULL for default context or legacy mode
  * @src_handle: GEM handle of the source buffer
  * @src_delta: offset into the source GEM bo, in bytes
  * @src_stride: Stride (in bytes) of the source buffer
  * @src_tiling: Tiling mode of the source buffer
  * @src_x: X coordinate of the source region to copy
  * @src_y: Y coordinate of the source region to copy
  * @src_size: size of the src bo required for allocator and softpin
  * @width: Width of the region to copy
  * @height: Height of the region to copy
  * @bpp: source and destination bits per pixel
  * @dst_handle: GEM handle of the destination buffer
  * @dst_delta: offset into the destination GEM bo, in bytes
  * @dst_stride: Stride (in bytes) of the destination buffer
  * @dst_tiling: Tiling mode of the destination buffer
  * @dst_x: X coordinate of destination
  * @dst_y: Y coordinate of destination
  * @dst_size: size of the dst bo required for allocator and softpin
  *
  * Like igt_blitter_fast_copy(), but talking to the kernel directly.
  */
 void igt_blitter_fast_copy__raw(int fd,
 				uint64_t ahnd,
 				uint32_t ctx,
 				const intel_ctx_cfg_t *cfg,
 				/* src */
 				uint32_t src_handle,
 				unsigned int src_delta,
 				unsigned int src_stride,
 				unsigned int src_tiling,
 				unsigned int src_x, unsigned src_y,
 				uint64_t src_size,

 				/* size */
 				unsigned int width, unsigned int height,

 				/* bpp */
 				int bpp,

 				/* dst */
 				uint32_t dst_handle,
 				unsigned dst_delta,
 				unsigned int dst_stride,
 				unsigned int dst_tiling,
 				unsigned int dst_x, unsigned dst_y,
 				uint64_t dst_size)
 {
 	uint32_t batch[12];
 	struct drm_i915_gem_exec_object2 objs[3];
 	struct drm_i915_gem_relocation_entry relocs[2];
 	uint32_t batch_handle;
 	uint32_t dword0, dword1;
 	uint32_t src_pitch, dst_pitch;
 	uint64_t batch_offset, src_offset, dst_offset;
 	int i = 0;

 	batch_handle = gem_create(fd, 4096);
 	if (ahnd) {
 		src_offset = get_offset(ahnd, src_handle, src_size, 0);
 		dst_offset = get_offset(ahnd, dst_handle, dst_size, 0);
 		batch_offset = get_offset(ahnd, batch_handle, 4096, 0);
 	} else {
 		src_offset = 16 << 20;
 		dst_offset = ALIGN(src_offset + src_size, 1 << 20);
 		batch_offset = ALIGN(dst_offset + dst_size, 1 << 20);
 	}

 	src_pitch = fast_copy_pitch(src_stride, src_tiling);
 	dst_pitch = fast_copy_pitch(dst_stride, dst_tiling);
 	dword0 = fast_copy_dword0(src_tiling, dst_tiling);
 	dword1 = fast_copy_dword1(fd, src_tiling, dst_tiling, bpp);

 	CHECK_RANGE(src_x); CHECK_RANGE(src_y);
 	CHECK_RANGE(dst_x); CHECK_RANGE(dst_y);
 	CHECK_RANGE(width); CHECK_RANGE(height);
 	CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height);
 	CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height);
 	CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);

 	batch[i++] = dword0;
 	batch[i++] = dword1 | dst_pitch;
 	batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */
 	batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */
 	batch[i++] = dst_offset + dst_delta; /* dst address lower bits */
 	batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */
 	batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */
 	batch[i++] = src_pitch;
 	batch[i++] = src_offset + src_delta; /* src address lower bits */
 	batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */
 	batch[i++] = MI_BATCH_BUFFER_END;
 	batch[i++] = MI_NOOP;

 	igt_assert(i == ARRAY_SIZE(batch));

 	gem_write(fd, batch_handle, 0, batch, sizeof(batch));

 	fill_relocation(&relocs[0], dst_handle, dst_offset, dst_delta, 4,
 			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 	fill_relocation(&relocs[1], src_handle, src_offset, src_delta, 8,
 			I915_GEM_DOMAIN_RENDER, 0);

 	fill_object(&objs[0], dst_handle, dst_offset, NULL, 0);
 	objs[0].flags |= EXEC_OBJECT_WRITE;
 	fill_object(&objs[1], src_handle, src_offset, NULL, 0);
 	fill_object(&objs[2], batch_handle, batch_offset, relocs, !ahnd ? 2 : 0);

 	if (ahnd) {
 		objs[0].flags |= EXEC_OBJECT_PINNED;
 		objs[1].flags |= EXEC_OBJECT_PINNED;
 		objs[2].flags |= EXEC_OBJECT_PINNED;
 	}

 	exec_blit(fd, objs, 3, ctx, cfg);

 	gem_close(fd, batch_handle);
 }

 /**
  * igt_get_render_copyfunc:
  * @devid: pci device id
  *
  * Returns:
  *
  * The platform-specific render copy function pointer for the device
  * specified with @devid. Will return NULL when no render copy function is
  * implemented.
  */
 igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
 {
 	igt_render_copyfunc_t copy = NULL;

 	if (IS_GEN2(devid))
 		copy = gen2_render_copyfunc;
 	else if (IS_GEN3(devid))
 		copy = gen3_render_copyfunc;
 	else if (IS_GEN4(devid) || IS_GEN5(devid))
 		copy = gen4_render_copyfunc;
 	else if (IS_GEN6(devid))
 		copy = gen6_render_copyfunc;
 	else if (IS_GEN7(devid))
 		copy = gen7_render_copyfunc;
 	else if (IS_GEN8(devid))
 		copy = gen8_render_copyfunc;
 	else if (IS_GEN9(devid) || IS_GEN10(devid))
 		copy = gen9_render_copyfunc;
 	else if (IS_GEN11(devid))
 		copy = gen11_render_copyfunc;
 	else if (HAS_FLATCCS(devid))
 		copy = gen12p71_render_copyfunc;
 	else if (IS_METEORLAKE(devid))
 		copy = mtl_render_copyfunc;
 	else if (IS_GEN12(devid))
 		copy = gen12_render_copyfunc;

 	return copy;
 }

 igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid)
 {
 	igt_vebox_copyfunc_t copy = NULL;

 	if (IS_GEN12(devid))
 		copy = gen12_vebox_copyfunc;

 	return copy;
 }

 igt_render_clearfunc_t igt_get_render_clearfunc(int devid)
 {
 	if (IS_METEORLAKE(devid)) {
 		return mtl_render_clearfunc;
 	} else if (IS_DG2(devid)) {
 		return gen12p71_render_clearfunc;
 	} else if (IS_GEN12(devid)) {
 		return gen12_render_clearfunc;
 	} else {
 		return NULL;
 	}
 }

 /**
  * igt_get_media_fillfunc:
  * @devid: pci device id
  *
  * Returns:
  *
  * The platform-specific media fill function pointer for the device specified
  * with @devid. Will return NULL when no media fill function is implemented.
  */
 igt_fillfunc_t igt_get_media_fillfunc(int devid)
 {
 	igt_fillfunc_t fill = NULL;

 	if (intel_graphics_ver(devid) >= IP_VER(12, 50)) {
 		/* current implementation defeatured PIPELINE_MEDIA */
 	} else if (IS_GEN12(devid))
 		fill = gen12_media_fillfunc;
 	else if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid))
 		fill = gen9_media_fillfunc;
 	else if (IS_GEN8(devid))
 		fill = gen8_media_fillfunc;
 	else if (IS_GEN7(devid))
 		fill = gen7_media_fillfunc;

 	return fill;
 }

 igt_vme_func_t igt_get_media_vme_func(int devid)
 {
 	igt_vme_func_t fill = NULL;
 	const struct intel_device_info *devinfo = intel_get_device_info(devid);

 	if (IS_GEN11(devid) && !devinfo->is_elkhartlake && !devinfo->is_jasperlake)
 		fill = gen11_media_vme_func;

 	return fill;
 }

 /**
  * igt_get_gpgpu_fillfunc:
  * @devid: pci device id
  *
  * Returns:
  *
  * The platform-specific gpgpu fill function pointer for the device specified
  * with @devid. Will return NULL when no gpgpu fill function is implemented.
  */
 igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
 {
 	igt_fillfunc_t fill = NULL;

 	if (intel_graphics_ver(devid) >= IP_VER(12, 60))
 		fill = xehpc_gpgpu_fillfunc;
 	else if (intel_graphics_ver(devid) >= IP_VER(12, 50))
 		fill = xehp_gpgpu_fillfunc;
 	else if (IS_GEN12(devid))
 		fill = gen12_gpgpu_fillfunc;
 	else if (IS_GEN11(devid))
 		fill = gen11_gpgpu_fillfunc;
 	else if (IS_GEN9(devid) || IS_GEN10(devid))
 		fill = gen9_gpgpu_fillfunc;
 	else if (IS_GEN8(devid))
 		fill = gen8_gpgpu_fillfunc;
 	else if (IS_GEN7(devid))
 		fill = gen7_gpgpu_fillfunc;

 	return fill;
 }

 /**
  * igt_get_media_spinfunc:
  * @devid: pci device id
  *
  * Returns:
  *
  * The platform-specific media spin function pointer for the device specified
  * with @devid. Will return NULL when no media spin function is implemented.
  */
 igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
 {
 	igt_media_spinfunc_t spin = NULL;

 	if (IS_GEN9(devid))
 		spin = gen9_media_spinfunc;
 	else if (IS_GEN8(devid))
 		spin = gen8_media_spinfunc;

 	return spin;
 }

 /* Intel batchbuffer v2 */
 static bool intel_bb_debug_tree = false;

 /*
  * __reallocate_objects:
  * @ibb: pointer to intel_bb
  *
  * Increases number of objects if necessary.
  */
 static void __reallocate_objects(struct intel_bb *ibb)
 {
 	const uint32_t inc = 4096 / sizeof(*ibb->objects);

 	if (ibb->num_objects == ibb->allocated_objects) {
 		ibb->objects = realloc(ibb->objects,
 				       sizeof(*ibb->objects) *
 				       (inc + ibb->allocated_objects));

 		igt_assert(ibb->objects);
 		ibb->allocated_objects += inc;

 		memset(&ibb->objects[ibb->num_objects],	0,
 		       inc * sizeof(*ibb->objects));
 	}
 }

 static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb,
 					     uint32_t handle,
 					     uint64_t size,
 					     uint32_t alignment)
 {
 	uint64_t offset;

 	if (ibb->enforce_relocs)
 		return 0;

 	offset = intel_allocator_alloc(ibb->allocator_handle,
 				       handle, size, alignment);

 	return offset;
 }

 /**
  * __intel_bb_create:
  * @fd: drm fd - i915 or xe
  * @ctx: for i915 context id, for xe engine id
  * @vm: for xe vm_id, unused for i915
  * @cfg: for i915 intel_ctx configuration, NULL for default context or legacy mode,
  *       unused for xe
  * @size: size of the batchbuffer
  * @do_relocs: use relocations or allocator
  * @allocator_type: allocator type, must be INTEL_ALLOCATOR_NONE for relocations
  *
  * intel-bb assumes it will work in one of two modes - with relocations or
  * with using allocator (currently RELOC and SIMPLE are implemented).
  * Some description is required to describe how they maintain the addresses.
  *
  * Before entering into each scenarios generic rule is intel-bb keeps objects
  * and their offsets in the internal cache and reuses in subsequent execs.
  *
  * 1. intel-bb with relocations (i915 only)
  *
  * Creating new intel-bb adds handle to cache implicitly and sets its address
  * to 0. Objects added to intel-bb later also have address 0 set for first run.
  * After calling execbuf cache is altered with new addresses. As intel-bb
  * works in reloc mode addresses are only suggestion to the driver and we
  * cannot be sure they won't change at next exec.
  *
  * 2. with allocator (i915 or xe)
  *
  * This mode is valid only for ppgtt. Addresses are acquired from allocator
  * and softpinned (i915) or vm-binded (xe). intel-bb cache must be then
  * coherent with allocator (simple is coherent, reloc partially [doesn't
  * support address reservation]).
  * When we do intel-bb reset with purging cache it has to reacquire addresses
  * from allocator (allocator should return same address - what is true for
  * simple and reloc allocators).
  *
  * If we do reset without purging caches we use addresses from intel-bb cache
  * during execbuf objects construction.
  *
  * If we do reset with purging caches allocator entries are freed as well.
  *
  * __intel_bb_create checks if a context configuration for intel_ctx_t was
  * passed in. If this is the case, it copies the information over to the
  * newly created batch buffer.
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  */
 static struct intel_bb *
 __intel_bb_create(int fd, uint32_t ctx, uint32_t vm, const intel_ctx_cfg_t *cfg,
 		  uint32_t size, bool do_relocs,
 		  uint64_t start, uint64_t end,
 		  uint8_t allocator_type, enum allocator_strategy strategy)
 {
 	struct drm_i915_gem_exec_object2 *object;
 	struct intel_bb *ibb = calloc(1, sizeof(*ibb));

 	igt_assert(ibb);

 	ibb->devid = intel_get_drm_devid(fd);
 	ibb->gen = intel_gen(ibb->devid);
 	ibb->ctx = ctx;

 	ibb->fd = fd;
 	ibb->driver = is_i915_device(fd) ? INTEL_DRIVER_I915 :
 					   is_xe_device(fd) ? INTEL_DRIVER_XE : 0;
 	igt_assert(ibb->driver);

 	/*
 	 * If we don't have full ppgtt driver can change our addresses
 	 * so allocator is useless in this case. Just enforce relocations
 	 * for such gens and don't use allocator at all.
 	 */
 	if (ibb->driver == INTEL_DRIVER_I915) {
 		ibb->uses_full_ppgtt = gem_uses_full_ppgtt(fd);
 		ibb->alignment = gem_detect_safe_alignment(fd);
 		ibb->gtt_size = gem_aperture_size(fd);
 		ibb->handle = gem_create(fd, size);

 		if (!ibb->uses_full_ppgtt)
 			do_relocs = true;

 		/*
 		 * For softpin mode allocator has full control over offsets allocation
 		 * so we want kernel to not interfere with this.
 		 */
 		if (do_relocs) {
 			ibb->allows_obj_alignment = gem_allows_obj_alignment(fd);
 			allocator_type = INTEL_ALLOCATOR_NONE;
 		} else {
 			/* Use safe start offset instead assuming 0x0 is safe */
 			start = max_t(uint64_t, start, gem_detect_safe_start_offset(fd));

 			/* if relocs are set we won't use an allocator */
 			ibb->allocator_handle =
 				intel_allocator_open_full(fd, ctx, start, end,
 							  allocator_type,
 							  strategy, 0);
 		}

 		ibb->vm_id = 0;
 	} else {
 		igt_assert(!do_relocs);

 		ibb->alignment = xe_get_default_alignment(fd);
 		size = ALIGN(size, ibb->alignment);
 		ibb->handle = xe_bo_create_flags(fd, 0, size, visible_vram_if_possible(fd, 0));

 		/* Limit to 48-bit due to MI_* address limitation */
 		ibb->gtt_size = 1ull << min_t(uint32_t, xe_va_bits(fd), 48);
 		end = ibb->gtt_size;

 		if (!vm) {
 			igt_assert_f(!ctx, "No vm provided for engine");
 			vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
 		}

 		ibb->uses_full_ppgtt = true;
 		ibb->allocator_handle =
 			intel_allocator_open_full(fd, vm, start, end,
 						  allocator_type, strategy,
 						  ibb->alignment);
 		ibb->vm_id = vm;
 		ibb->last_engine = ~0U;
 	}

 	ibb->allocator_type = allocator_type;
 	ibb->allocator_strategy = strategy;
 	ibb->allocator_start = start;
 	ibb->allocator_end = end;
 	ibb->enforce_relocs = do_relocs;

 	ibb->size = size;
 	ibb->batch = calloc(1, size);
 	igt_assert(ibb->batch);
 	ibb->ptr = ibb->batch;
 	ibb->fence = -1;

 	/* Cache context configuration */
 	if (cfg) {
 		ibb->cfg = malloc(sizeof(*cfg));
 		igt_assert(ibb->cfg);
 		memcpy(ibb->cfg, cfg, sizeof(*cfg));
 	}

 	if ((ibb->gtt_size - 1) >> 32)
 		ibb->supports_48b_address = true;

 	object = intel_bb_add_object(ibb, ibb->handle, ibb->size,
 				     INTEL_BUF_INVALID_ADDRESS, ibb->alignment,
 				     false);
 	ibb->batch_offset = object->offset;

 	IGT_INIT_LIST_HEAD(&ibb->intel_bufs);

 	ibb->refcount = 1;

 	if (intel_bb_do_tracking && ibb->allocator_type != INTEL_ALLOCATOR_NONE) {
 		pthread_mutex_lock(&intel_bb_list_lock);
 		igt_list_add(&ibb->link, &intel_bb_list);
 		pthread_mutex_unlock(&intel_bb_list_lock);
 	}

 	return ibb;
 }

 /**
  * intel_bb_create_full:
  * @fd: drm fd - i915 or xe
  * @ctx: for i915 context id, for xe engine id
  * @vm: for xe vm_id, unused for i915
  * @cfg: intel_ctx configuration, NULL for default context or legacy mode
  * @size: size of the batchbuffer
  * @start: allocator vm start address
  * @end: allocator vm start address
  * @allocator_type: allocator type, SIMPLE, RELOC, ...
  * @strategy: allocation strategy
  *
  * Creates bb with context passed in @ctx, size in @size and allocator type
  * in @allocator_type. Relocations are set to false because IGT allocator
  * is used in that case. VM range is passed to allocator (@start and @end)
  * and allocation @strategy (suggestion to allocator about address allocation
  * preferences).
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  */
 struct intel_bb *intel_bb_create_full(int fd, uint32_t ctx, uint32_t vm,
 				      const intel_ctx_cfg_t *cfg, uint32_t size,
 				      uint64_t start, uint64_t end,
 				      uint8_t allocator_type,
 				      enum allocator_strategy strategy)
 {
 	return __intel_bb_create(fd, ctx, vm, cfg, size, false, start, end,
 				 allocator_type, strategy);
 }

 /**
  * intel_bb_create_with_allocator:
  * @fd: drm fd - i915 or xe
  * @ctx: for i915 context id, for xe engine id
  * @vm: for xe vm_id, unused for i915
  * @cfg: intel_ctx configuration, NULL for default context or legacy mode
  * @size: size of the batchbuffer
  * @allocator_type: allocator type, SIMPLE, RANDOM, ...
  *
  * Creates bb with context passed in @ctx, size in @size and allocator type
  * in @allocator_type. Relocations are set to false because IGT allocator
  * is used in that case.
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  */
 struct intel_bb *intel_bb_create_with_allocator(int fd, uint32_t ctx, uint32_t vm,
 						const intel_ctx_cfg_t *cfg,
 						uint32_t size,
 						uint8_t allocator_type)
 {
 	return __intel_bb_create(fd, ctx, vm, cfg, size, false, 0, 0,
 				 allocator_type, ALLOC_STRATEGY_HIGH_TO_LOW);
 }

 static bool aux_needs_softpin(int fd)
 {
 	return intel_gen(intel_get_drm_devid(fd)) >= 12;
 }

 static bool has_ctx_cfg(struct intel_bb *ibb)
 {
 	return ibb->cfg && ibb->cfg->num_engines > 0;
 }

 /**
  * intel_bb_create:
  * @fd: drm fd - i915 or xe
  * @size: size of the batchbuffer
  *
  * Creates bb with default context.
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  *
  * Notes:
  *
  * intel_bb must not be created in igt_fixture. The reason is intel_bb
  * "opens" connection to the allocator and when test completes it can
  * leave the allocator in unknown state (mostly for failed tests).
  * As igt_core was armed to reset the allocator infrastructure
  * connection to it inside intel_bb is not valid anymore.
  * Trying to use it leads to catastrofic errors.
  */
 struct intel_bb *intel_bb_create(int fd, uint32_t size)
 {
 	bool relocs = is_i915_device(fd) && gem_has_relocations(fd);

 	return __intel_bb_create(fd, 0, 0, NULL, size,
 				 relocs && !aux_needs_softpin(fd), 0, 0,
 				 INTEL_ALLOCATOR_SIMPLE,
 				 ALLOC_STRATEGY_HIGH_TO_LOW);
 }

 /**
  * intel_bb_create_with_context:
  * @fd: drm fd - i915 or xe
  * @ctx: for i915 context id, for xe engine id
  * @vm: for xe vm_id, unused for i915
  * @cfg: intel_ctx configuration, NULL for default context or legacy mode
  * @size: size of the batchbuffer
  *
  * Creates bb with context passed in @ctx and @cfg configuration (when
  * working with custom engines layout).
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  */
 struct intel_bb *
 intel_bb_create_with_context(int fd, uint32_t ctx, uint32_t vm,
 			     const intel_ctx_cfg_t *cfg, uint32_t size)
 {
 	bool relocs = is_i915_device(fd) && gem_has_relocations(fd);

 	return __intel_bb_create(fd, ctx, vm, cfg, size,
 				 relocs && !aux_needs_softpin(fd), 0, 0,
 				 INTEL_ALLOCATOR_SIMPLE,
 				 ALLOC_STRATEGY_HIGH_TO_LOW);
 }

 /**
  * intel_bb_create_with_relocs:
  * @fd: drm fd - i915
  * @size: size of the batchbuffer
  *
  * Creates bb which will disable passing addresses.
  * This will lead to relocations when objects are not previously pinned.
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  */
 struct intel_bb *intel_bb_create_with_relocs(int fd, uint32_t size)
 {
 	igt_require(is_i915_device(fd) && gem_has_relocations(fd));

 	return __intel_bb_create(fd, 0, 0, NULL, size, true, 0, 0,
 				 INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE);
 }

 /**
  * intel_bb_create_with_relocs_and_context:
  * @fd: drm fd - i915
  * @ctx: context
  * @cfg: intel_ctx configuration, NULL for default context or legacy mode
  * @size: size of the batchbuffer
  *
  * Creates bb with default context which will disable passing addresses.
  * This will lead to relocations when objects are not previously pinned.
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  */
 struct intel_bb *
 intel_bb_create_with_relocs_and_context(int fd, uint32_t ctx,
 					const intel_ctx_cfg_t *cfg,
 					uint32_t size)
 {
 	igt_require(is_i915_device(fd) && gem_has_relocations(fd));

 	return __intel_bb_create(fd, ctx, 0, cfg, size, true, 0, 0,
 				 INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE);
 }

 /**
  * intel_bb_create_no_relocs:
  * @fd: drm fd
  * @size: size of the batchbuffer
  *
  * Creates bb with disabled relocations.
  * This enables passing addresses and requires pinning objects.
  *
  * Returns:
  *
  * Pointer the intel_bb, asserts on failure.
  */
 struct intel_bb *intel_bb_create_no_relocs(int fd, uint32_t size)
 {
 	igt_require(gem_uses_full_ppgtt(fd));

 	return __intel_bb_create(fd, 0, 0, NULL, size, false, 0, 0,
 				 INTEL_ALLOCATOR_SIMPLE,
 				 ALLOC_STRATEGY_HIGH_TO_LOW);
 }

 static void __intel_bb_destroy_relocations(struct intel_bb *ibb)
 {
 	uint32_t i;

 	/* Free relocations */
 	for (i = 0; i < ibb->num_objects; i++) {
 		free(from_user_pointer(ibb->objects[i]->relocs_ptr));
 		ibb->objects[i]->relocs_ptr = to_user_pointer(NULL);
 		ibb->objects[i]->relocation_count = 0;
 	}

 	ibb->relocs = NULL;
 	ibb->num_relocs = 0;
 	ibb->allocated_relocs = 0;
 }

 static void __intel_bb_destroy_objects(struct intel_bb *ibb)
 {
 	free(ibb->objects);
 	ibb->objects = NULL;

 	tdestroy(ibb->current, free);
 	ibb->current = NULL;

 	ibb->num_objects = 0;
 	ibb->allocated_objects = 0;
 }

 static void __intel_bb_destroy_cache(struct intel_bb *ibb)
 {
 	tdestroy(ibb->root, free);
 	ibb->root = NULL;
 }

 static void __intel_bb_remove_intel_bufs(struct intel_bb *ibb)
 {
 	struct intel_buf *entry, *tmp;

 	igt_list_for_each_entry_safe(entry, tmp, &ibb->intel_bufs, link)
 		intel_bb_remove_intel_buf(ibb, entry);
 }

 /**
  * intel_bb_destroy:
  * @ibb: pointer to intel_bb
  *
  * Frees all relocations / objects allocated during filling the batch.
  */
 void intel_bb_destroy(struct intel_bb *ibb)
 {
 	igt_assert(ibb);

 	ibb->refcount--;
 	igt_assert_f(ibb->refcount == 0, "Trying to destroy referenced bb!");

 	__intel_bb_remove_intel_bufs(ibb);
 	__intel_bb_destroy_relocations(ibb);
 	__intel_bb_destroy_objects(ibb);
 	__intel_bb_destroy_cache(ibb);

 	if (ibb->allocator_type != INTEL_ALLOCATOR_NONE) {
 		if (intel_bb_do_tracking) {
 			pthread_mutex_lock(&intel_bb_list_lock);
 			igt_list_del(&ibb->link);
 			pthread_mutex_unlock(&intel_bb_list_lock);
 		}

 		intel_allocator_free(ibb->allocator_handle, ibb->handle);
 		intel_allocator_close(ibb->allocator_handle);
 	}
 	gem_close(ibb->fd, ibb->handle);

 	if (ibb->fence >= 0)
 		close(ibb->fence);
 	if (ibb->engine_syncobj)
 		syncobj_destroy(ibb->fd, ibb->engine_syncobj);
 	if (ibb->vm_id && !ibb->ctx)
 		xe_vm_destroy(ibb->fd, ibb->vm_id);

 	free(ibb->batch);
 	free(ibb->cfg);
 	free(ibb);
 }

 static struct drm_xe_vm_bind_op *xe_alloc_bind_ops(struct intel_bb *ibb,
 						   uint32_t op, uint32_t region)
 {
 	struct drm_i915_gem_exec_object2 **objects = ibb->objects;
 	struct drm_xe_vm_bind_op *bind_ops, *ops;
 	bool set_obj = (op & 0xffff) == XE_VM_BIND_OP_MAP;

 	bind_ops = calloc(ibb->num_objects, sizeof(*bind_ops));
 	igt_assert(bind_ops);

 	igt_debug("bind_ops: %s\n", set_obj ? "MAP" : "UNMAP");
 	for (int i = 0; i < ibb->num_objects; i++) {
 		ops = &bind_ops[i];

 		if (set_obj)
 			ops->obj = objects[i]->handle;

 		ops->op = op;
 		ops->obj_offset = 0;
 		ops->addr = objects[i]->offset;
 		ops->range = objects[i]->rsvd1;
 		ops->region = region;

 		igt_debug("  [%d]: handle: %u, offset: %llx, size: %llx\n",
 			  i, ops->obj, (long long)ops->addr, (long long)ops->range);
 	}

 	return bind_ops;
 }

 static void __unbind_xe_objects(struct intel_bb *ibb)
 {
 	struct drm_xe_sync syncs[2] = {
 		{ .flags = DRM_XE_SYNC_SYNCOBJ },
 		{ .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
 	};
 	int ret;

 	syncs[0].handle = ibb->engine_syncobj;
 	syncs[1].handle = syncobj_create(ibb->fd, 0);

 	if (ibb->num_objects > 1) {
 		struct drm_xe_vm_bind_op *bind_ops;
 		uint32_t op = XE_VM_BIND_OP_UNMAP | XE_VM_BIND_FLAG_ASYNC;

 		bind_ops = xe_alloc_bind_ops(ibb, op, 0);
 		xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops,
 				 ibb->num_objects, syncs, 2);
 		free(bind_ops);
 	} else {
 		igt_debug("bind: UNMAP\n");
 		igt_debug("  offset: %llx, size: %llx\n",
 			  (long long)ibb->batch_offset, (long long)ibb->size);
 		xe_vm_unbind_async(ibb->fd, ibb->vm_id, 0, 0,
 				   ibb->batch_offset, ibb->size, syncs, 2);
 	}
 	ret = syncobj_wait_err(ibb->fd, &syncs[1].handle, 1, INT64_MAX, 0);
 	igt_assert_eq(ret, 0);
 	syncobj_destroy(ibb->fd, syncs[1].handle);

 	ibb->xe_bound = false;
 }

 /*
  * intel_bb_reset:
  * @ibb: pointer to intel_bb
  * @purge_objects_cache: if true destroy internal execobj and relocs + cache
  *
  * Recreate batch bo when there's no additional reference.
  *
  * When purge_object_cache == true we destroy cache as well as remove intel_buf
  * from intel-bb tracking list. Removing intel_bufs releases their addresses
  * in the allocator.
 */

 void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
 {
 	uint32_t i;

 	if (purge_objects_cache && ibb->refcount > 1)
 		igt_warn("Cannot purge objects cache on bb, refcount > 1!");

 	/* Someone keeps reference, just exit */
 	if (ibb->refcount > 1)
 		return;

 	/*
 	 * To avoid relocation objects previously pinned to high virtual
 	 * addresses should keep 48bit flag. Ensure we won't clear it
 	 * in the reset path.
 	 */
 	for (i = 0; i < ibb->num_objects; i++)
 		ibb->objects[i]->flags &= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;

 	if (ibb->driver == INTEL_DRIVER_XE && ibb->xe_bound)
 		__unbind_xe_objects(ibb);

 	__intel_bb_destroy_relocations(ibb);
 	__intel_bb_destroy_objects(ibb);
 	__reallocate_objects(ibb);

 	if (purge_objects_cache) {
 		__intel_bb_remove_intel_bufs(ibb);
 		__intel_bb_destroy_cache(ibb);
 	}

 	/*
 	 * When we use allocators we're in no-reloc mode so we have to free
 	 * and reacquire offset (ibb->handle can change in multiprocess
 	 * environment). We also have to remove and add it again to
 	 * objects and cache tree.
 	 */
 	if (ibb->allocator_type != INTEL_ALLOCATOR_NONE && !purge_objects_cache)
 		intel_bb_remove_object(ibb, ibb->handle, ibb->batch_offset,
 				       ibb->size);

 	gem_close(ibb->fd, ibb->handle);
 	if (ibb->driver == INTEL_DRIVER_I915)
 		ibb->handle = gem_create(ibb->fd, ibb->size);
 	else
 		ibb->handle = xe_bo_create_flags(ibb->fd, 0, ibb->size,
 						 visible_vram_if_possible(ibb->fd, 0));

 	/* Reacquire offset for RELOC and SIMPLE */
 	if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE ||
 	    ibb->allocator_type == INTEL_ALLOCATOR_RELOC)
 		ibb->batch_offset = __intel_bb_get_offset(ibb,
 							  ibb->handle,
 							  ibb->size,
 							  ibb->alignment);

 	intel_bb_add_object(ibb, ibb->handle, ibb->size,
 			    ibb->batch_offset,
 			    ibb->alignment, false);
 	ibb->ptr = ibb->batch;
 	memset(ibb->batch, 0, ibb->size);
 }

 /*
  * intel_bb_sync:
  * @ibb: pointer to intel_bb
  *
  * Waits for bb completion. Returns 0 on success, otherwise errno.
  */
 int intel_bb_sync(struct intel_bb *ibb)
 {
 	int ret;

 	if (ibb->fence < 0 && !ibb->engine_syncobj)
 		return 0;

 	if (ibb->fence >= 0) {
 		ret = sync_fence_wait(ibb->fence, -1);
 		if (ret == 0) {
 			close(ibb->fence);
 			ibb->fence = -1;
 		}
 	} else {
 		igt_assert_neq(ibb->engine_syncobj, 0);
 		ret = syncobj_wait_err(ibb->fd, &ibb->engine_syncobj,
 				       1, INT64_MAX, 0);
 	}

 	return ret;
 }

 /*
  * intel_bb_print:
  * @ibb: pointer to intel_bb
  *
  * Prints batch to stdout.
  */
 void intel_bb_print(struct intel_bb *ibb)
 {
 	igt_info("drm fd: %d, gen: %d, devid: %u, debug: %d\n",
 		 ibb->fd, ibb->gen, ibb->devid, ibb->debug);
 	igt_info("handle: %u, size: %u, batch: %p, ptr: %p\n",
 		 ibb->handle, ibb->size, ibb->batch, ibb->ptr);
 	igt_info("gtt_size: %" PRIu64 ", supports 48bit: %d\n",
 		 ibb->gtt_size, ibb->supports_48b_address);
 	igt_info("ctx: %u\n", ibb->ctx);
 	igt_info("root: %p\n", ibb->root);
 	igt_info("objects: %p, num_objects: %u, allocated obj: %u\n",
 		 ibb->objects, ibb->num_objects, ibb->allocated_objects);
 	igt_info("relocs: %p, num_relocs: %u, allocated_relocs: %u\n----\n",
 		 ibb->relocs, ibb->num_relocs, ibb->allocated_relocs);
 }

 /*
  * intel_bb_dump:
  * @ibb: pointer to intel_bb
  * @filename: name to which write bb
  *
  * Dump batch bo to file.
  */
 void intel_bb_dump(struct intel_bb *ibb, const char *filename)
 {
 	FILE *out;
 	void *ptr;

 	ptr = gem_mmap__device_coherent(ibb->fd, ibb->handle, 0, ibb->size,
 					PROT_READ);
 	out = fopen(filename, "wb");
 	igt_assert(out);
 	fwrite(ptr, ibb->size, 1, out);
 	fclose(out);
 	munmap(ptr, ibb->size);
 }

 /**
  * intel_bb_set_debug:
  * @ibb: pointer to intel_bb
  * @debug: true / false
  *
  * Sets debug to true / false. Execbuf is then called synchronously and
  * object/reloc arrays are printed after execution.
  */
 void intel_bb_set_debug(struct intel_bb *ibb, bool debug)
 {
 	ibb->debug = debug;
 }

 /**
  * intel_bb_set_dump_base64:
  * @ibb: pointer to intel_bb
  * @dump: true / false
  *
  * Do bb dump as base64 string before execbuf call.
  */
 void intel_bb_set_dump_base64(struct intel_bb *ibb, bool dump)
 {
 	ibb->dump_base64 = dump;
 }

 static int __compare_objects(const void *p1, const void *p2)
 {
 	const struct drm_i915_gem_exec_object2 *o1 = p1, *o2 = p2;

 	return (int) ((int64_t) o1->handle - (int64_t) o2->handle);
 }

 static struct drm_i915_gem_exec_object2 *
 __add_to_cache(struct intel_bb *ibb, uint32_t handle)
 {
 	struct drm_i915_gem_exec_object2 **found, *object;

 	object = malloc(sizeof(*object));
 	igt_assert(object);

 	object->handle = handle;
 	object->alignment = 0;
 	found = tsearch((void *) object, &ibb->root, __compare_objects);

 	if (*found == object) {
 		memset(object, 0, sizeof(*object));
 		object->handle = handle;
 		object->offset = INTEL_BUF_INVALID_ADDRESS;
 	} else {
 		free(object);
 		object = *found;
 	}

 	return object;
 }

 static bool __remove_from_cache(struct intel_bb *ibb, uint32_t handle)
 {
 	struct drm_i915_gem_exec_object2 **found, *object;

 	object = intel_bb_find_object(ibb, handle);
 	if (!object) {
 		igt_warn("Object: handle: %u not found\n", handle);
 		return false;
 	}

 	found = tdelete((void *) object, &ibb->root, __compare_objects);
 	if (!found)
 		return false;

 	free(object);

 	return true;
 }

 static int __compare_handles(const void *p1, const void *p2)
 {
 	return (int) (*(int32_t *) p1 - *(int32_t *) p2);
 }

 static void __add_to_objects(struct intel_bb *ibb,
 			     struct drm_i915_gem_exec_object2 *object)
 {
 	uint32_t **found, *handle;

 	handle = malloc(sizeof(*handle));
 	igt_assert(handle);

 	*handle = object->handle;
 	found = tsearch((void *) handle, &ibb->current, __compare_handles);

 	if (*found == handle) {
 		__reallocate_objects(ibb);
 		igt_assert(ibb->num_objects < ibb->allocated_objects);
 		ibb->objects[ibb->num_objects++] = object;
 	} else {
 		free(handle);
 	}
 }

 static void __remove_from_objects(struct intel_bb *ibb,
 				  struct drm_i915_gem_exec_object2 *object)
 {
 	uint32_t i, **handle, *to_free;
 	bool found = false;

 	for (i = 0; i < ibb->num_objects; i++) {
 		if (ibb->objects[i] == object) {
 			found = true;
 			break;
 		}
 	}

 	/*
 	 * When we reset bb (without purging) we have:
 	 * 1. cache which contains all cached objects
 	 * 2. objects array which contains only bb object (cleared in reset
 	 *    path with bb object added at the end)
 	 * So !found is normal situation and no warning is added here.
 	 */
 	if (!found)
 		return;

 	ibb->num_objects--;
 	if (i < ibb->num_objects)
 		memmove(&ibb->objects[i], &ibb->objects[i + 1],
 			sizeof(object) * (ibb->num_objects - i));

 	handle = tfind((void *) &object->handle,
 		       &ibb->current, __compare_handles);
 	if (!handle) {
 		igt_warn("Object %u doesn't exist in the tree, can't remove",
 			 object->handle);
 		return;
 	}

 	to_free = *handle;
 	tdelete((void *) &object->handle, &ibb->current, __compare_handles);
 	free(to_free);
 }

 /**
  * __intel_bb_add_object:
  * @ibb: pointer to intel_bb
  * @handle: which handle to add to objects array
  * @size: object size
  * @offset: presumed offset of the object when no relocation is enforced
  * @alignment: alignment of the object, if 0 it will be set to page size
  * @write: does a handle is a render target
  *
  * Function adds or updates execobj slot in bb objects array and
  * in the object tree. When object is a render target it has to
  * be marked with EXEC_OBJECT_WRITE flag.
  */
 static struct drm_i915_gem_exec_object2 *
 __intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
 		      uint64_t offset, uint64_t alignment, bool write)
 {
 	struct drm_i915_gem_exec_object2 *object;

 	igt_assert(INVALID_ADDR(offset) || alignment == 0
 		   || ALIGN(offset, alignment) == offset);
 	igt_assert(is_power_of_two(alignment));

 	if (ibb->driver == INTEL_DRIVER_I915)
 		alignment = max_t(uint64_t, alignment, gem_detect_safe_alignment(ibb->fd));
 	else
 		alignment = max_t(uint64_t, ibb->alignment, alignment);

 	object = __add_to_cache(ibb, handle);
 	__add_to_objects(ibb, object);

 	/*
 	 * If object->offset == INVALID_ADDRESS we added freshly object to the
 	 * cache. In that case we have two choices:
 	 * a) get new offset (passed offset was invalid)
 	 * b) use offset passed in the call (valid)
 	 */
 	if (INVALID_ADDR(object->offset)) {
 		if (INVALID_ADDR(offset)) {
 			offset = __intel_bb_get_offset(ibb, handle, size,
 						       alignment);
 		} else {
 			offset = offset & (ibb->gtt_size - 1);

 			/*
 			 * For simple allocator check entry consistency
 			 * - reserve if it is not already allocated.
 			 */
 			if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE) {
 				bool allocated, reserved;

 				reserved = intel_allocator_reserve_if_not_allocated(ibb->allocator_handle,
 										    handle, size, offset,
 										    &allocated);
 				igt_assert_f(allocated || reserved,
 					     "Can't get offset, allocated: %d, reserved: %d\n",
 					     allocated, reserved);
 			}
 		}
 	} else {
 		/*
 		 * This assertion makes sense only when we have to be consistent
 		 * with underlying allocator. For relocations and when !ppgtt
 		 * we can expect addresses passed by the user can be moved
 		 * within the driver.
 		 */
 		if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE)
 			igt_assert_f(object->offset == offset,
 				     "(pid: %ld) handle: %u, offset not match: %" PRIx64 " <> %" PRIx64 "\n",
 				     (long) getpid(), handle,
 				     (uint64_t) object->offset,
 				     offset);
 	}

 	object->offset = offset;

 	if (write)
 		object->flags |= EXEC_OBJECT_WRITE;

 	if (ibb->supports_48b_address)
 		object->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;

 	if (ibb->uses_full_ppgtt && !ibb->enforce_relocs)
 		object->flags |= EXEC_OBJECT_PINNED;

 	if (ibb->allows_obj_alignment)
 		object->alignment = alignment;

 	if (ibb->driver == INTEL_DRIVER_XE) {
 		object->alignment = alignment;
 		object->rsvd1 = size;
 	}

 	return object;
 }

 struct drm_i915_gem_exec_object2 *
 intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
 		    uint64_t offset, uint64_t alignment, bool write)
 {
 	struct drm_i915_gem_exec_object2 *obj = NULL;

 	obj = __intel_bb_add_object(ibb, handle, size, offset,
 				    alignment, write);
 	igt_assert(obj);

 	return obj;
 }

 bool intel_bb_remove_object(struct intel_bb *ibb, uint32_t handle,
 			    uint64_t offset, uint64_t size)
 {
 	struct drm_i915_gem_exec_object2 *object;
 	bool is_reserved;

 	object = intel_bb_find_object(ibb, handle);
 	if (!object)
 		return false;

 	if (ibb->allocator_type != INTEL_ALLOCATOR_NONE) {
 		intel_allocator_free(ibb->allocator_handle, handle);
 		is_reserved = intel_allocator_is_reserved(ibb->allocator_handle,
 							  size, offset);
 		if (is_reserved)
 			intel_allocator_unreserve(ibb->allocator_handle, handle,
 						  size, offset);
 	}

 	__remove_from_objects(ibb, object);
 	__remove_from_cache(ibb, handle);

 	return true;
 }

 static struct drm_i915_gem_exec_object2 *
 __intel_bb_add_intel_buf(struct intel_bb *ibb, struct intel_buf *buf,
 			 uint64_t alignment, bool write)
 {
 	struct drm_i915_gem_exec_object2 *obj;

 	igt_assert(ibb);
 	igt_assert(buf);
 	igt_assert(!buf->ibb || buf->ibb == ibb);
 	igt_assert(ALIGN(alignment, 4096) == alignment);

 	if (!alignment) {
 		alignment = 0x1000;

 		/*
 		 * TODO:
 		 * Find out why MTL need special alignment, spec says 32k
 		 * is enough for MTL.
 		 */
 		if (ibb->gen >= 12 && buf->compression)
 			alignment = IS_METEORLAKE(ibb->devid) ? 0x100000 : 0x10000;

 		/* For gen3 ensure tiled buffers are aligned to power of two size */
 		if (ibb->gen == 3 && buf->tiling) {
 			alignment = 1024 * 1024;

 			while (alignment < buf->surface[0].size)
 				alignment <<= 1;
 		}
 	}

 	obj = intel_bb_add_object(ibb, buf->handle, intel_buf_bo_size(buf),
 				  buf->addr.offset, alignment, write);
 	buf->addr.offset = obj->offset;

 	if (igt_list_empty(&buf->link)) {
 		igt_list_add_tail(&buf->link, &ibb->intel_bufs);
 		buf->ibb = ibb;
 	} else {
 		igt_assert(buf->ibb == ibb);
 	}

 	return obj;
 }

 struct drm_i915_gem_exec_object2 *
 intel_bb_add_intel_buf(struct intel_bb *ibb, struct intel_buf *buf, bool write)
 {
 	return __intel_bb_add_intel_buf(ibb, buf, 0, write);
 }

 struct drm_i915_gem_exec_object2 *
 intel_bb_add_intel_buf_with_alignment(struct intel_bb *ibb, struct intel_buf *buf,
 				      uint64_t alignment, bool write)
 {
 	return __intel_bb_add_intel_buf(ibb, buf, alignment, write);
 }

 bool intel_bb_remove_intel_buf(struct intel_bb *ibb, struct intel_buf *buf)
 {
 	bool removed;

 	igt_assert(ibb);
 	igt_assert(buf);
 	igt_assert(!buf->ibb || buf->ibb == ibb);

 	if (igt_list_empty(&buf->link))
 		return false;

 	removed = intel_bb_remove_object(ibb, buf->handle,
 					 buf->addr.offset,
 					 intel_buf_bo_size(buf));
 	if (removed) {
 		buf->addr.offset = INTEL_BUF_INVALID_ADDRESS;
 		buf->ibb = NULL;
 		igt_list_del_init(&buf->link);
 	}

 	return removed;
 }

 void intel_bb_print_intel_bufs(struct intel_bb *ibb)
 {
 	struct intel_buf *entry;

 	igt_list_for_each_entry(entry, &ibb->intel_bufs, link) {
 		igt_info("handle: %u, ibb: %p, offset: %lx\n",
 			 entry->handle, entry->ibb,
 			 (long) entry->addr.offset);
 	}
 }

 struct drm_i915_gem_exec_object2 *
 intel_bb_find_object(struct intel_bb *ibb, uint32_t handle)
 {
 	struct drm_i915_gem_exec_object2 object = { .handle = handle };
 	struct drm_i915_gem_exec_object2 **found;

 	found = tfind((void *) &object, &ibb->root, __compare_objects);
 	if (!found)
 		return NULL;

 	return *found;
 }

 bool
 intel_bb_object_set_flag(struct intel_bb *ibb, uint32_t handle, uint64_t flag)
 {
 	struct drm_i915_gem_exec_object2 object = { .handle = handle };
 	struct drm_i915_gem_exec_object2 **found;

 	igt_assert_f(ibb->root, "Trying to search in null tree\n");

 	found = tfind((void *) &object, &ibb->root, __compare_objects);
 	if (!found) {
 		igt_warn("Trying to set fence on not found handle: %u\n",
 			 handle);
 		return false;
 	}

 	(*found)->flags |= flag;

 	return true;
 }

 bool
 intel_bb_object_clear_flag(struct intel_bb *ibb, uint32_t handle, uint64_t flag)
 {
 	struct drm_i915_gem_exec_object2 object = { .handle = handle };
 	struct drm_i915_gem_exec_object2 **found;

 	found = tfind((void *) &object, &ibb->root, __compare_objects);
 	if (!found) {
 		igt_warn("Trying to set fence on not found handle: %u\n",
 			 handle);
 		return false;
 	}

 	(*found)->flags &= ~flag;

 	return true;
 }

 /*
  * intel_bb_add_reloc:
  * @ibb: pointer to intel_bb
  * @to_handle: object handle in which do relocation
  * @handle: object handle which address will be taken to patch the @to_handle
  * @read_domains: gem domain bits for the relocation
  * @write_domain: gem domain bit for the relocation
  * @delta: delta value to add to @buffer's gpu address
  * @offset: offset within bb to be patched
  *
  * When relocations are requested function allocates additional relocation slot
  * in reloc array for a handle.
  * Object must be previously added to bb.
  */
 static uint64_t intel_bb_add_reloc(struct intel_bb *ibb,
 				   uint32_t to_handle,
 				   uint32_t handle,
 				   uint32_t read_domains,
 				   uint32_t write_domain,
 				   uint64_t delta,
 				   uint64_t offset,
 				   uint64_t presumed_offset)
 {
 	struct drm_i915_gem_relocation_entry *relocs;
 	struct drm_i915_gem_exec_object2 *object, *to_object;
 	uint32_t i;

 	object = intel_bb_find_object(ibb, handle);
 	igt_assert(object);

 	/* In no-reloc mode we just return the previously assigned address */
 	if (!ibb->enforce_relocs)
 		goto out;

 	/* For ibb we have relocs allocated in chunks */
 	if (to_handle == ibb->handle) {
 		relocs = ibb->relocs;
 		if (ibb->num_relocs == ibb->allocated_relocs) {
 			ibb->allocated_relocs += 4096 / sizeof(*relocs);
 			relocs = realloc(relocs, sizeof(*relocs) * ibb->allocated_relocs);
 			igt_assert(relocs);
 			ibb->relocs = relocs;
 		}
 		i = ibb->num_relocs++;
 	} else {
 		to_object = intel_bb_find_object(ibb, to_handle);
 		igt_assert_f(to_object, "object has to be added to ibb first!\n");

 		i = to_object->relocation_count++;
 		relocs = from_user_pointer(to_object->relocs_ptr);
 		relocs = realloc(relocs, sizeof(*relocs) * to_object->relocation_count);
 		to_object->relocs_ptr = to_user_pointer(relocs);
 		igt_assert(relocs);
 	}

 	memset(&relocs[i], 0, sizeof(*relocs));
 	relocs[i].target_handle = handle;
 	relocs[i].read_domains = read_domains;
 	relocs[i].write_domain = write_domain;
 	relocs[i].delta = delta;
 	relocs[i].offset = offset;
 	if (ibb->enforce_relocs)
 		relocs[i].presumed_offset = -1;
 	else
 		relocs[i].presumed_offset = object->offset;

 	igt_debug("add reloc: to_handle: %u, handle: %u, r/w: 0x%x/0x%x, "
 		  "delta: 0x%" PRIx64 ", "
 		  "offset: 0x%" PRIx64 ", "
 		  "poffset: %p\n",
 		  to_handle, handle, read_domains, write_domain,
 		  delta, offset,
 		  from_user_pointer(relocs[i].presumed_offset));

 out:
 	return object->offset;
 }

 static uint64_t __intel_bb_emit_reloc(struct intel_bb *ibb,
 				      uint32_t to_handle,
 				      uint32_t to_offset,
 				      uint32_t handle,
 				      uint32_t read_domains,
 				      uint32_t write_domain,
 				      uint64_t delta,
 				      uint64_t presumed_offset)
 {
 	uint64_t address;

 	igt_assert(ibb);

 	address = intel_bb_add_reloc(ibb, to_handle, handle,
 				     read_domains, write_domain,
 				     delta, to_offset,
 				     presumed_offset);

 	intel_bb_out(ibb, delta + address);
 	if (ibb->gen >= 8)
 		intel_bb_out(ibb, (delta + address) >> 32);

 	return address;
 }

 /**
  * intel_bb_emit_reloc:
  * @ibb: pointer to intel_bb
  * @handle: object handle which address will be taken to patch the bb
  * @read_domains: gem domain bits for the relocation
  * @write_domain: gem domain bit for the relocation
  * @delta: delta value to add to @buffer's gpu address
  * @presumed_offset: address of the object in address space. If -1 is passed
  * then final offset of the object will be randomized (for no-reloc bb) or
  * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
  * case address is known it should passed in @presumed_offset (for no-reloc).
  * @write: does a handle is a render target
  *
  * Function prepares relocation (execobj if required + reloc) and emits
  * offset in bb. For I915_EXEC_NO_RELOC presumed_offset is a hint we already
  * have object in valid place and relocation step can be skipped in this case.
  *
  * Note: delta is value added to address, mostly used when some instructions
  * require modify-bit set to apply change. Which delta is valid depends
  * on instruction (see instruction specification).
  */
 uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
 			     uint32_t handle,
 			     uint32_t read_domains,
 			     uint32_t write_domain,
 			     uint64_t delta,
 			     uint64_t presumed_offset)
 {
 	igt_assert(ibb);

 	return __intel_bb_emit_reloc(ibb, ibb->handle, intel_bb_offset(ibb),
 				     handle, read_domains, write_domain,
 				     delta, presumed_offset);
 }

 uint64_t intel_bb_emit_reloc_fenced(struct intel_bb *ibb,
 				    uint32_t handle,
 				    uint32_t read_domains,
 				    uint32_t write_domain,
 				    uint64_t delta,
 				    uint64_t presumed_offset)
 {
 	uint64_t address;

 	address = intel_bb_emit_reloc(ibb, handle, read_domains, write_domain,
 				      delta, presumed_offset);

 	intel_bb_object_set_flag(ibb, handle, EXEC_OBJECT_NEEDS_FENCE);

 	return address;
 }

 /**
  * intel_bb_offset_reloc:
  * @ibb: pointer to intel_bb
  * @handle: object handle which address will be taken to patch the bb
  * @read_domains: gem domain bits for the relocation
  * @write_domain: gem domain bit for the relocation
  * @offset: offset within bb to be patched
  * @presumed_offset: address of the object in address space. If -1 is passed
  * then final offset of the object will be randomized (for no-reloc bb) or
  * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
  * case address is known it should passed in @presumed_offset (for no-reloc).
  *
  * Function prepares relocation (execobj if required + reloc). It it used
  * for editing batchbuffer via modifying structures. It means when we're
  * preparing batchbuffer it is more descriptive to edit the structure
  * than emitting dwords. But it require for some fields to point the
  * relocation. For that case @offset is passed by the user and it points
  * to the offset in bb where the relocation will be applied.
  */
 uint64_t intel_bb_offset_reloc(struct intel_bb *ibb,
 			       uint32_t handle,
 			       uint32_t read_domains,
 			       uint32_t write_domain,
 			       uint32_t offset,
 			       uint64_t presumed_offset)
 {
 	igt_assert(ibb);

 	return intel_bb_add_reloc(ibb, ibb->handle, handle,
 				  read_domains, write_domain,
 				  0, offset, presumed_offset);
 }

 uint64_t intel_bb_offset_reloc_with_delta(struct intel_bb *ibb,
 					  uint32_t handle,
 					  uint32_t read_domains,
 					  uint32_t write_domain,
 					  uint32_t delta,
 					  uint32_t offset,
 					  uint64_t presumed_offset)
 {
 	igt_assert(ibb);

 	return intel_bb_add_reloc(ibb, ibb->handle, handle,
 				  read_domains, write_domain,
 				  delta, offset, presumed_offset);
 }

 uint64_t intel_bb_offset_reloc_to_object(struct intel_bb *ibb,
 					 uint32_t to_handle,
 					 uint32_t handle,
 					 uint32_t read_domains,
 					 uint32_t write_domain,
 					 uint32_t delta,
 					 uint32_t offset,
 					 uint64_t presumed_offset)
 {
 	igt_assert(ibb);

 	return intel_bb_add_reloc(ibb, to_handle, handle,
 				  read_domains, write_domain,
 				  delta, offset, presumed_offset);
 }

 /*
  * @intel_bb_set_pxp:
  * @ibb: pointer to intel_bb
  * @new_state: enable or disable pxp session
  * @apptype: pxp session input identifies what type of session to enable
  * @appid: pxp session input provides which appid to use
  *
  * This function merely stores the pxp state and session information to
  * be retrieved and programmed later by supporting libraries such as
  * gen12_render_copy that must program the HW within the same dispatch
  */
 void intel_bb_set_pxp(struct intel_bb *ibb, bool new_state,
 		      uint32_t apptype, uint32_t appid)
 {
 	igt_assert(ibb);

 	ibb->pxp.enabled = new_state;
 	ibb->pxp.apptype = new_state ? apptype : 0;
 	ibb->pxp.appid   = new_state ? appid : 0;
 }

 static void intel_bb_dump_execbuf(struct intel_bb *ibb,
 				  struct drm_i915_gem_execbuffer2 *execbuf)
 {
 	struct drm_i915_gem_exec_object2 *objects;
 	struct drm_i915_gem_relocation_entry *relocs, *reloc;
 	int i, j;
 	uint64_t address;

 	igt_debug("execbuf [pid: %ld, fd: %d, ctx: %u]\n",
 		  (long) getpid(), ibb->fd, ibb->ctx);
 	igt_debug("execbuf batch len: %u, start offset: 0x%x, "
 		  "DR1: 0x%x, DR4: 0x%x, "
 		  "num clip: %u, clipptr: 0x%llx, "
 		  "flags: 0x%llx, rsvd1: 0x%llx, rsvd2: 0x%llx\n",
 		  execbuf->batch_len, execbuf->batch_start_offset,
 		  execbuf->DR1, execbuf->DR4,
 		  execbuf->num_cliprects, execbuf->cliprects_ptr,
 		  execbuf->flags, execbuf->rsvd1, execbuf->rsvd2);

 	igt_debug("execbuf buffer_count: %d\n", execbuf->buffer_count);
 	for (i = 0; i < execbuf->buffer_count; i++) {
 		objects = &((struct drm_i915_gem_exec_object2 *)
 			    from_user_pointer(execbuf->buffers_ptr))[i];
 		relocs = from_user_pointer(objects->relocs_ptr);
 		address = objects->offset;
 		igt_debug(" [%d] handle: %u, reloc_count: %d, reloc_ptr: %p, "
 			  "align: 0x%llx, offset: 0x%" PRIx64 ", flags: 0x%llx, "
 			  "rsvd1: 0x%llx, rsvd2: 0x%llx\n",
 			  i, objects->handle, objects->relocation_count,
 			  relocs,
 			  objects->alignment,
 			  address,
 			  objects->flags,
 			  objects->rsvd1, objects->rsvd2);
 		if (objects->relocation_count) {
 			igt_debug("\texecbuf relocs:\n");
 			for (j = 0; j < objects->relocation_count; j++) {
 				reloc = &relocs[j];
 				address = reloc->presumed_offset;
 				igt_debug("\t [%d] target handle: %u, "
 					  "offset: 0x%llx, delta: 0x%x, "
 					  "presumed_offset: 0x%" PRIx64 ", "
 					  "read_domains: 0x%x, "
 					  "write_domain: 0x%x\n",
 					  j, reloc->target_handle,
 					  reloc->offset, reloc->delta,
 					  address,
 					  reloc->read_domains,
 					  reloc->write_domain);
 			}
 		}
 	}
 }

 static void intel_bb_dump_base64(struct intel_bb *ibb, int linelen)
 {
 	int outsize;
 	gchar *str, *pos;

 	igt_info("--- bb ---\n");
 	pos = str = g_base64_encode((const guchar *) ibb->batch, ibb->size);
 	outsize = strlen(str);

 	while (outsize > 0) {
 		igt_info("%.*s\n", min(outsize, linelen), pos);
 		pos += linelen;
 		outsize -= linelen;
 	}

 	free(str);
 }

 static void print_node(const void *node, VISIT which, int depth)
 {
 	const struct drm_i915_gem_exec_object2 *object =
 		*(const struct drm_i915_gem_exec_object2 **) node;
 	(void) depth;

 	switch (which) {
 	case preorder:
 	case endorder:
 		break;

 	case postorder:
 	case leaf:
 		igt_info("\t handle: %u, offset: 0x%" PRIx64 "\n",
 			 object->handle, (uint64_t) object->offset);
 		break;
 	}
 }

 void intel_bb_dump_cache(struct intel_bb *ibb)
 {
 	igt_info("[pid: %ld] dump cache\n", (long) getpid());
 	twalk(ibb->root, print_node);
 }

 static struct drm_i915_gem_exec_object2 *
 create_objects_array(struct intel_bb *ibb)
 {
 	struct drm_i915_gem_exec_object2 *objects;
 	uint32_t i;

 	objects = malloc(sizeof(*objects) * ibb->num_objects);
 	igt_assert(objects);

 	for (i = 0; i < ibb->num_objects; i++) {
 		objects[i] = *(ibb->objects[i]);
 		objects[i].offset = CANONICAL(objects[i].offset);
 	}

 	return objects;
 }

 static void update_offsets(struct intel_bb *ibb,
 			   struct drm_i915_gem_exec_object2 *objects)
 {
 	struct drm_i915_gem_exec_object2 *object;
 	struct intel_buf *entry;
 	uint32_t i;

 	for (i = 0; i < ibb->num_objects; i++) {
 		object = intel_bb_find_object(ibb, objects[i].handle);
 		igt_assert(object);

 		object->offset = DECANONICAL(objects[i].offset);

 		if (i == 0)
 			ibb->batch_offset = object->offset;
 	}

 	igt_list_for_each_entry(entry, &ibb->intel_bufs, link) {
 		object = intel_bb_find_object(ibb, entry->handle);
 		igt_assert(object);

 		if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE)
 			igt_assert(object->offset == entry->addr.offset);
 		else
 			entry->addr.offset = object->offset;

 		entry->addr.ctx = ibb->ctx;
 	}
 }

 #define LINELEN 76

 static int
 __xe_bb_exec(struct intel_bb *ibb, uint64_t flags, bool sync)
 {
 	uint32_t engine = flags & (I915_EXEC_BSD_MASK | I915_EXEC_RING_MASK);
 	uint32_t engine_id;
 	struct drm_xe_sync syncs[2] = {
 		{ .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
 		{ .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
 	};
 	struct drm_xe_vm_bind_op *bind_ops;
 	void *map;

 	igt_assert_eq(ibb->num_relocs, 0);
 	igt_assert_eq(ibb->xe_bound, false);

 	if (ibb->ctx) {
 		engine_id = ibb->ctx;
 	} else if (ibb->last_engine != engine) {
 		struct drm_xe_engine_class_instance inst = { };

 		inst.engine_instance =
 			(flags & I915_EXEC_BSD_MASK) >> I915_EXEC_BSD_SHIFT;

 		switch (flags & I915_EXEC_RING_MASK) {
 		case I915_EXEC_DEFAULT:
 		case I915_EXEC_BLT:
 			inst.engine_class = DRM_XE_ENGINE_CLASS_COPY;
 			break;
 		case I915_EXEC_BSD:
 			inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
 			break;
 		case I915_EXEC_RENDER:
 			if (IS_PONTEVECCHIO(xe_dev_id(ibb->fd)))
 				inst.engine_class = DRM_XE_ENGINE_CLASS_COMPUTE;
 			else
 				inst.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
 			break;
 		case I915_EXEC_VEBOX:
 			inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
 			break;
 		default:
 			igt_assert_f(false, "Unknown engine: %x", (uint32_t) flags);
 		}
 		igt_debug("Run on %s\n", xe_engine_class_string(inst.engine_class));

 		if (ibb->engine_id)
 			xe_exec_queue_destroy(ibb->fd, ibb->engine_id);

 		ibb->engine_id = engine_id =
 			xe_exec_queue_create(ibb->fd, ibb->vm_id, &inst, 0);
 	} else {
 		engine_id = ibb->engine_id;
 	}
 	ibb->last_engine = engine;

 	map = xe_bo_map(ibb->fd, ibb->handle, ibb->size);
 	memcpy(map, ibb->batch, ibb->size);
 	gem_munmap(map, ibb->size);

 	syncs[0].handle = syncobj_create(ibb->fd, 0);
 	if (ibb->num_objects > 1) {
 		bind_ops = xe_alloc_bind_ops(ibb, XE_VM_BIND_OP_MAP | XE_VM_BIND_FLAG_ASYNC, 0);
 		xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops,
 				 ibb->num_objects, syncs, 1);
 		free(bind_ops);
 	} else {
 		igt_debug("bind: MAP\n");
 		igt_debug("  handle: %u, offset: %llx, size: %llx\n",
 			  ibb->handle, (long long)ibb->batch_offset,
 			  (long long)ibb->size);
 		xe_vm_bind_async(ibb->fd, ibb->vm_id, 0, ibb->handle, 0,
 				 ibb->batch_offset, ibb->size, syncs, 1);
 	}
 	ibb->xe_bound = true;

 	syncs[0].flags &= ~DRM_XE_SYNC_SIGNAL;
 	ibb->engine_syncobj = syncobj_create(ibb->fd, 0);
 	syncs[1].handle = ibb->engine_syncobj;

 	xe_exec_sync(ibb->fd, engine_id, ibb->batch_offset, syncs, 2);

 	if (sync)
 		intel_bb_sync(ibb);

 	return 0;
 }

 /*
  * __intel_bb_exec:
  * @ibb: pointer to intel_bb
  * @end_offset: offset of the last instruction in the bb
  * @flags: flags passed directly to execbuf
  * @sync: if true wait for execbuf completion, otherwise caller is responsible
  * to wait for completion
  *
  * Returns: 0 on success, otherwise errno.
  *
  * Note: In this step execobj for bb is allocated and inserted to the objects
  * array.
 */
 int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
 			   uint64_t flags, bool sync)
 {
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_exec_object2 *objects;
 	int ret, fence, new_fence;

 	ibb->objects[0]->relocs_ptr = to_user_pointer(ibb->relocs);
 	ibb->objects[0]->relocation_count = ibb->num_relocs;
 	ibb->objects[0]->handle = ibb->handle;
 	ibb->objects[0]->offset = ibb->batch_offset;

 	gem_write(ibb->fd, ibb->handle, 0, ibb->batch, ibb->size);

 	memset(&execbuf, 0, sizeof(execbuf));
 	objects = create_objects_array(ibb);
 	execbuf.buffers_ptr = to_user_pointer(objects);
 	execbuf.buffer_count = ibb->num_objects;
 	execbuf.batch_len = end_offset;
 	execbuf.rsvd1 = ibb->ctx;
 	execbuf.flags = flags | I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_OUT;
 	if (ibb->enforce_relocs)
 		execbuf.flags &= ~I915_EXEC_NO_RELOC;
 	execbuf.rsvd2 = 0;

 	if (ibb->dump_base64)
 		intel_bb_dump_base64(ibb, LINELEN);

 	/* For debugging on CI, remove in final series */
 	intel_bb_dump_execbuf(ibb, &execbuf);

 	ret = __gem_execbuf_wr(ibb->fd, &execbuf);
 	if (ret) {
 		intel_bb_dump_execbuf(ibb, &execbuf);
 		free(objects);
 		return ret;
 	}

 	/* Update addresses in the cache */
 	update_offsets(ibb, objects);

 	/* Save/merge fences */
 	fence = execbuf.rsvd2 >> 32;

 	if (ibb->fence < 0) {
 		ibb->fence = fence;
 	} else {
 		new_fence = sync_fence_merge(ibb->fence, fence);
 		close(ibb->fence);
 		close(fence);
 		ibb->fence = new_fence;
 	}

 	if (sync || ibb->debug)
 		igt_assert(intel_bb_sync(ibb) == 0);

 	if (ibb->debug) {
 		intel_bb_dump_execbuf(ibb, &execbuf);
 		if (intel_bb_debug_tree) {
 			igt_info("\nTree:\n");
 			twalk(ibb->root, print_node);
 		}
 	}

 	free(objects);

 	return 0;
 }

 /**
  * intel_bb_exec:
  * @ibb: pointer to intel_bb
  * @end_offset: offset of the last instruction in the bb (for i915)
  * @flags: flags passed directly to execbuf
  * @sync: if true wait for execbuf completion, otherwise caller is responsible
  * to wait for completion
  *
  * Do execbuf on context selected during bb creation. Asserts on failure.
 */
 void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
 		   uint64_t flags, bool sync)
 {
 	if (ibb->dump_base64)
 		intel_bb_dump_base64(ibb, LINELEN);

 	if (ibb->driver == INTEL_DRIVER_I915)
 		igt_assert_eq(__intel_bb_exec(ibb, end_offset, flags, sync), 0);
 	else
 		igt_assert_eq(__xe_bb_exec(ibb, flags, sync), 0);
 }

 /**
  * intel_bb_get_object_address:
  * @ibb: pointer to intel_bb
  * @handle: object handle
  *
  * When objects addresses are previously pinned and we don't want to relocate
  * we need to acquire them from previous execbuf. Function returns previous
  * object offset for @handle or 0 if object is not found.
  */
 uint64_t intel_bb_get_object_offset(struct intel_bb *ibb, uint32_t handle)
 {
 	struct drm_i915_gem_exec_object2 object = { .handle = handle };
 	struct drm_i915_gem_exec_object2 **found;

 	igt_assert(ibb);

 	found = tfind((void *)&object, &ibb->root, __compare_objects);
 	if (!found)
 		return INTEL_BUF_INVALID_ADDRESS;

 	return (*found)->offset;
 }

 /*
  * intel_bb_emit_bbe:
  * @ibb: batchbuffer
  *
  * Outputs MI_BATCH_BUFFER_END and ensures batch is properly aligned.
  */
 uint32_t intel_bb_emit_bbe(struct intel_bb *ibb)
 {
 	/* Mark the end of the buffer. */
 	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
 	intel_bb_ptr_align(ibb, 8);

 	return intel_bb_offset(ibb);
 }

 /*
  * intel_bb_emit_flush_common:
  * @ibb: batchbuffer
  *
  * Emits instructions which completes batch buffer.
  *
  * Returns: offset in batch buffer where there's end of instructions.
  */
 uint32_t intel_bb_emit_flush_common(struct intel_bb *ibb)
 {
 	if (intel_bb_offset(ibb) == 0)
 		return 0;

 	if (ibb->gen == 5) {
 		/*
 		 * emit gen5 w/a without batch space checks - we reserve that
 		 * already.
 		 */
 		intel_bb_out(ibb, CMD_POLY_STIPPLE_OFFSET << 16);
 		intel_bb_out(ibb, 0);
 	}

 	/* Round batchbuffer usage to 2 DWORDs. */
 	if ((intel_bb_offset(ibb) & 4) == 0)
 		intel_bb_out(ibb, 0);

 	intel_bb_emit_bbe(ibb);

 	return intel_bb_offset(ibb);
 }

 static void intel_bb_exec_with_ring(struct intel_bb *ibb,uint32_t ring)
 {
 	intel_bb_exec(ibb, intel_bb_offset(ibb),
 		      ring | I915_EXEC_NO_RELOC, false);
 	intel_bb_reset(ibb, false);
 }

 /*
  * intel_bb_flush:
  * @ibb: batchbuffer
  * @ring: ring
  *
  * If batch is not empty emit batch buffer end, execute on ring,
  * then reset the batch.
  */
 void intel_bb_flush(struct intel_bb *ibb, uint32_t ring)
 {
 	if (intel_bb_emit_flush_common(ibb) == 0)
 		return;

 	intel_bb_exec_with_ring(ibb, ring);
 }

 /*
  * intel_bb_flush_render:
  * @ibb: batchbuffer
  *
  * If batch is not empty emit batch buffer end, find the render engine id,
  * execute on the ring and reset the batch. Context used to execute
  * is batch context.
  */
 void intel_bb_flush_render(struct intel_bb *ibb)
 {
 	uint32_t ring;

 	if (intel_bb_emit_flush_common(ibb) == 0)
 		return;

 	if (has_ctx_cfg(ibb))
 		ring = find_engine(ibb->cfg, I915_ENGINE_CLASS_RENDER);
 	else
 		ring = I915_EXEC_RENDER;

 	intel_bb_exec_with_ring(ibb, ring);
 }

 /*
  * intel_bb_flush_blit:
  * @ibb: batchbuffer
  *
  * If batch is not empty emit batch buffer end, find a suitable ring
  * (depending on gen and context configuration) and reset the batch.
  * Context used to execute is batch context.
  */
 void intel_bb_flush_blit(struct intel_bb *ibb)
 {
 	uint32_t ring;

 	if (intel_bb_emit_flush_common(ibb) == 0)
 		return;

 	if (has_ctx_cfg(ibb))
 		ring = find_engine(ibb->cfg, I915_ENGINE_CLASS_COPY);
 	else
 		ring = HAS_BLT_RING(ibb->devid) ? I915_EXEC_BLT : I915_EXEC_DEFAULT;

 	intel_bb_exec_with_ring(ibb, ring);
 }

 /*
  * intel_bb_copy_data:
  * @ibb: batchbuffer
  * @data: pointer of data which should be copied into batch
  * @bytes: number of bytes to copy, must be dword multiplied
  * @align: alignment in the batch
  *
  * Function copies @bytes of data pointed by @data into batch buffer.
  */
 uint32_t intel_bb_copy_data(struct intel_bb *ibb,
 			    const void *data, unsigned int bytes,
 			    uint32_t align)
 {
 	uint32_t *subdata, offset;

 	igt_assert((bytes & 3) == 0);

 	intel_bb_ptr_align(ibb, align);
 	offset = intel_bb_offset(ibb);
 	igt_assert(offset + bytes < ibb->size);

 	subdata = intel_bb_ptr(ibb);
 	memcpy(subdata, data, bytes);
 	intel_bb_ptr_add(ibb, bytes);

 	return offset;
 }

 /*
  * intel_bb_blit_start:
  * @ibb: batchbuffer
  * @flags: flags to blit command
  *
  * Function emits XY_SRC_COPY_BLT instruction with size appropriate size
  * which depend on gen.
  */
 void intel_bb_blit_start(struct intel_bb *ibb, uint32_t flags)
 {
 	if (blt_has_xy_src_copy(ibb->fd))
 		intel_bb_out(ibb, XY_SRC_COPY_BLT_CMD |
 			     XY_SRC_COPY_BLT_WRITE_ALPHA |
 			     XY_SRC_COPY_BLT_WRITE_RGB |
 			     flags |
 			     (6 + 2 * (ibb->gen >= 8)));
 	else if (blt_has_fast_copy(ibb->fd))
 		intel_bb_out(ibb, XY_FAST_COPY_BLT | flags);
 	else
 		igt_assert_f(0, "No supported blit command found\n");
 }

 /*
  * intel_bb_emit_blt_copy:
  * @ibb: batchbuffer
  * @src: source buffer (intel_buf)
  * @src_x1: source x1 position
  * @src_y1: source y1 position
  * @src_pitch: source pitch
  * @dst: destination buffer (intel_buf)
  * @dst_x1: destination x1 position
  * @dst_y1: destination y1 position
  * @dst_pitch: destination pitch
  * @width: width of data to copy
  * @height: height of data to copy
  *
  * Function emits complete blit command.
  */
 void intel_bb_emit_blt_copy(struct intel_bb *ibb,
 			    struct intel_buf *src,
 			    int src_x1, int src_y1, int src_pitch,
 			    struct intel_buf *dst,
 			    int dst_x1, int dst_y1, int dst_pitch,
 			    int width, int height, int bpp)
 {
 	const unsigned int gen = ibb->gen;
 	uint32_t cmd_bits = 0;
 	uint32_t br13_bits;
 	uint32_t mask;

 	igt_assert(bpp*(src_x1 + width) <= 8*src_pitch);
 	igt_assert(bpp*(dst_x1 + width) <= 8*dst_pitch);
 	igt_assert(src_pitch * (src_y1 + height) <= src->surface[0].size);
 	igt_assert(dst_pitch * (dst_y1 + height) <= dst->surface[0].size);

 	if (gen >= 4 && src->tiling != I915_TILING_NONE) {
 		src_pitch /= 4;
 		if (blt_has_xy_src_copy(ibb->fd))
 			cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
 		else if (blt_has_fast_copy(ibb->fd))
 			cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling);
 		else
 			igt_assert_f(0, "No supported blit command found\n");
 	}

 	if (gen >= 4 && dst->tiling != I915_TILING_NONE) {
 		dst_pitch /= 4;
 		if (blt_has_xy_src_copy(ibb->fd))
 			cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
 		else
 			cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling);
 	}

 	CHECK_RANGE(src_x1); CHECK_RANGE(src_y1);
 	CHECK_RANGE(dst_x1); CHECK_RANGE(dst_y1);
 	CHECK_RANGE(width); CHECK_RANGE(height);
 	CHECK_RANGE(src_x1 + width); CHECK_RANGE(src_y1 + height);
 	CHECK_RANGE(dst_x1 + width); CHECK_RANGE(dst_y1 + height);
 	CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);

 	br13_bits = 0;
 	if (blt_has_xy_src_copy(ibb->fd)) {
 		switch (bpp) {
 		case 8:
 			break;
 		case 16:		/* supporting only RGB565, not ARGB1555 */
 			br13_bits |= 1 << 24;
 			break;
 		case 32:
 			br13_bits |= 3 << 24;
 			cmd_bits |= (XY_SRC_COPY_BLT_WRITE_ALPHA |
 				     XY_SRC_COPY_BLT_WRITE_RGB);
 			break;
 		default:
 			igt_assert_f(0, "Unsupported pixel depth\n");
 		}
 	} else {
 		br13_bits = fast_copy_dword1(ibb->fd, src->tiling, dst->tiling, bpp);
 	}

 	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
 		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1));
 		intel_bb_out(ibb, BCS_SWCTRL);

 		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
 		if (src->tiling == I915_TILING_Y)
 			mask |= BCS_SRC_Y;
 		if (dst->tiling == I915_TILING_Y)
 			mask |= BCS_DST_Y;
 		intel_bb_out(ibb, mask);
 	}

 	intel_bb_add_intel_buf(ibb, src, false);
 	intel_bb_add_intel_buf(ibb, dst, true);

 	intel_bb_blit_start(ibb, cmd_bits);
 	intel_bb_out(ibb, (br13_bits) |
 		     (0xcc << 16) | /* copy ROP */
 		     dst_pitch);
 	intel_bb_out(ibb, (dst_y1 << 16) | dst_x1); /* dst x1,y1 */
 	intel_bb_out(ibb, ((dst_y1 + height) << 16) | (dst_x1 + width)); /* dst x2,y2 */
 	intel_bb_emit_reloc_fenced(ibb, dst->handle,
 				   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 				   0, dst->addr.offset);
 	intel_bb_out(ibb, (src_y1 << 16) | src_x1); /* src x1,y1 */
 	intel_bb_out(ibb, src_pitch);
 	intel_bb_emit_reloc_fenced(ibb, src->handle,
 				   I915_GEM_DOMAIN_RENDER, 0,
 				   0, src->addr.offset);

 	if (gen >= 6 && src->handle == dst->handle) {
 		intel_bb_out(ibb, XY_SETUP_CLIP_BLT_CMD);
 		intel_bb_out(ibb, 0);
 		intel_bb_out(ibb, 0);
 	}

 	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
 		igt_assert(ibb->gen >= 6);
 		intel_bb_out(ibb, MI_FLUSH_DW_CMD | 2);
 		intel_bb_out(ibb, 0);
 		intel_bb_out(ibb, 0);
 		intel_bb_out(ibb, 0);

 		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1));
 		intel_bb_out(ibb, BCS_SWCTRL);
 		intel_bb_out(ibb, (BCS_SRC_Y | BCS_DST_Y) << 16);
 	}
 }

 void intel_bb_blt_copy(struct intel_bb *ibb,
 		       struct intel_buf *src,
 		       int src_x1, int src_y1, int src_pitch,
 		       struct intel_buf *dst,
 		       int dst_x1, int dst_y1, int dst_pitch,
 		       int width, int height, int bpp)
 {
 	intel_bb_emit_blt_copy(ibb, src, src_x1, src_y1, src_pitch,
 			       dst, dst_x1, dst_y1, dst_pitch,
 			       width, height, bpp);
 	intel_bb_flush_blit(ibb);
 }

 /**
  * intel_bb_copy_intel_buf:
  * @batch: batchbuffer object
  * @src: source buffer (intel_buf)
  * @dst: destination libdrm buffer object
  * @size: size of the copy range in bytes
  *
  * Emits a copy operation using blitter commands into the supplied batch.
  * A total of @size bytes from the start of @src is copied
  * over to @dst. Note that @size must be page-aligned.
  */
 void intel_bb_copy_intel_buf(struct intel_bb *ibb,
 			     struct intel_buf *src, struct intel_buf *dst,
 			     long int size)
 {
 	igt_assert(size % 4096 == 0);

 	intel_bb_blt_copy(ibb,
 			  src, 0, 0, 4096,
 			  dst, 0, 0, 4096,
 			  4096/4, size/4096, 32);
 }

 /**
  * igt_get_huc_copyfunc:
  * @devid: pci device id
  *
  * Returns:
  *
  * The platform-specific huc copy function pointer for the device specified
  * with @devid. Will return NULL when no media spin function is implemented.
  */
 igt_huc_copyfunc_t igt_get_huc_copyfunc(int devid)
 {
 	igt_huc_copyfunc_t copy = NULL;

 	if (IS_GEN12(devid) || IS_GEN11(devid) || IS_GEN9(devid))
 		copy = gen9_huc_copyfunc;

 	return copy;
 }

 /**
  * intel_bb_track:
  * @do_tracking: bool
  *
  * Turn on (true) or off (false) tracking for intel_batchbuffers.
  */
 void intel_bb_track(bool do_tracking)
 {
 	if (intel_bb_do_tracking == do_tracking)
 		return;

 	if (intel_bb_do_tracking) {
 		struct intel_bb *entry, *tmp;

 		pthread_mutex_lock(&intel_bb_list_lock);
 		igt_list_for_each_entry_safe(entry, tmp, &intel_bb_list, link)
 			igt_list_del(&entry->link);
 		pthread_mutex_unlock(&intel_bb_list_lock);
 	}

 	intel_bb_do_tracking = do_tracking;
 }

 static void __intel_bb_reinit_alloc(struct intel_bb *ibb)
 {
 	if (ibb->allocator_type == INTEL_ALLOCATOR_NONE)
 		return;

 	ibb->allocator_handle = intel_allocator_open_full(ibb->fd, ibb->ctx,
 							  ibb->allocator_start, ibb->allocator_end,
 							  ibb->allocator_type,
 							  ibb->allocator_strategy,
 							  ibb->alignment);

 	intel_bb_reset(ibb, true);
 }

 /**
  * intel_bb_reinit_allocator:
  *
  * Reinit allocator and get offsets in tracked intel_batchbuffers.
  */
 void intel_bb_reinit_allocator(void)
 {
 	struct intel_bb *iter;

 	if (!intel_bb_do_tracking)
 		return;

 	pthread_mutex_lock(&intel_bb_list_lock);
 	igt_list_for_each_entry(iter, &intel_bb_list, link)
 		__intel_bb_reinit_alloc(iter);
 	pthread_mutex_unlock(&intel_bb_list_lock);
 }