lib/igt_dummyload.c - platform/external/igt-gpu-tools - Git at Google

 /*
  * Copyright © 2016 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  *
  */

 #include <time.h>
 #include <signal.h>
 #include <pthread.h>
 #include <sys/poll.h>

 #include <i915_drm.h>

 #include "igt_core.h"
 #include "drmtest.h"
 #include "igt_device.h"
 #include "igt_dummyload.h"
 #include "igt_gt.h"
 #include "intel_chipset.h"
 #include "intel_reg.h"
 #include "ioctl_wrappers.h"
 #include "sw_sync.h"
 #include "igt_vgem.h"
 #include "i915/gem_engine_topology.h"
 #include "i915/gem_mman.h"

 /**
  * SECTION:igt_dummyload
  * @short_description: Library for submitting GPU workloads
  * @title: Dummyload
  * @include: igt.h
  *
  * A lot of igt testcases need some GPU workload to make sure a race window is
  * big enough. Unfortunately having a fixed amount of workload leads to
  * spurious test failures or overly long runtimes on some fast/slow platforms.
  * This library contains functionality to submit GPU workloads that should
  * consume exactly a specific amount of time.
  */

 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)

 #define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)

 #define MI_ARB_CHK (0x5 << 23)

 static const int BATCH_SIZE = 4096;
 static const int LOOP_START_OFFSET = 64;

 static IGT_LIST(spin_list);
 static pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;

 static int
 emit_recursive_batch(igt_spin_t *spin,
 		     int fd, const struct igt_spin_factory *opts)
 {
 #define SCRATCH 0
 #define BATCH IGT_SPIN_BATCH
 	const int gen = intel_gen(intel_get_drm_devid(fd));
 	struct drm_i915_gem_relocation_entry relocs[2], *r;
 	struct drm_i915_gem_execbuffer2 *execbuf;
 	struct drm_i915_gem_exec_object2 *obj;
 	unsigned int flags[GEM_MAX_ENGINES];
 	unsigned int nengine;
 	int fence_fd = -1;
 	uint32_t *cs, *batch;
 	int i;

 	nengine = 0;
 	if (opts->engine == ALL_ENGINES) {
 		struct intel_execution_engine2 *engine;

 		for_each_context_engine(fd, opts->ctx, engine) {
 			if (opts->flags & IGT_SPIN_POLL_RUN &&
 			    !gem_class_can_store_dword(fd, engine->class))
 				continue;

 			flags[nengine++] = engine->flags;
 		}
 	} else {
 		flags[nengine++] = opts->engine;
 	}
 	igt_require(nengine);

 	memset(&spin->execbuf, 0, sizeof(spin->execbuf));
 	execbuf = &spin->execbuf;
 	memset(spin->obj, 0, sizeof(spin->obj));
 	obj = spin->obj;
 	memset(relocs, 0, sizeof(relocs));

 	obj[BATCH].handle = gem_create(fd, BATCH_SIZE);
 	batch = __gem_mmap__wc(fd, obj[BATCH].handle,
 			       0, BATCH_SIZE, PROT_WRITE);
 	if (!batch)
 		batch = gem_mmap__gtt(fd, obj[BATCH].handle,
 				      BATCH_SIZE, PROT_WRITE);

 	gem_set_domain(fd, obj[BATCH].handle,
 		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
 	execbuf->buffer_count++;
 	cs = batch;

 	if (opts->dependency) {
 		igt_assert(!(opts->flags & IGT_SPIN_POLL_RUN));

 		r = &relocs[obj[BATCH].relocation_count++];

 		/* dummy write to dependency */
 		obj[SCRATCH].handle = opts->dependency;
 		r->presumed_offset = 0;
 		r->target_handle = obj[SCRATCH].handle;
 		r->offset = sizeof(uint32_t) * 1020;
 		r->delta = 0;
 		r->read_domains = I915_GEM_DOMAIN_RENDER;
 		r->write_domain = I915_GEM_DOMAIN_RENDER;

 		execbuf->buffer_count++;
 	} else if (opts->flags & IGT_SPIN_POLL_RUN) {
 		r = &relocs[obj[BATCH].relocation_count++];

 		igt_assert(!opts->dependency);

 		if (gen == 4 || gen == 5) {
 			execbuf->flags |= I915_EXEC_SECURE;
 			igt_require(__igt_device_set_master(fd) == 0);
 		}

 		spin->poll_handle = gem_create(fd, 4096);
 		obj[SCRATCH].handle = spin->poll_handle;

 		if (__gem_set_caching(fd, spin->poll_handle,
 				      I915_CACHING_CACHED) == 0)
 			spin->poll = gem_mmap__cpu(fd, spin->poll_handle,
 						   0, 4096,
 						   PROT_READ | PROT_WRITE);
 		else
 			spin->poll = gem_mmap__wc(fd, spin->poll_handle,
 						  0, 4096,
 						  PROT_READ | PROT_WRITE);

 		igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0);

 		/* batch is first */
 		r->presumed_offset = 4096;
 		r->target_handle = obj[SCRATCH].handle;
 		r->offset = sizeof(uint32_t) * 1;
 		r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;

 		*cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);

 		if (gen >= 8) {
 			*cs++ = r->presumed_offset + r->delta;
 			*cs++ = 0;
 		} else if (gen >= 4) {
 			*cs++ = 0;
 			*cs++ = r->presumed_offset + r->delta;
 			r->offset += sizeof(uint32_t);
 		} else {
 			cs[-1]--;
 			*cs++ = r->presumed_offset + r->delta;
 		}

 		*cs++ = 1;

 		execbuf->buffer_count++;
 	}

 	spin->handle = obj[BATCH].handle;

 	igt_assert_lt(cs - batch, LOOP_START_OFFSET / sizeof(*cs));
 	spin->condition = batch + LOOP_START_OFFSET / sizeof(*cs);
 	cs = spin->condition;

 	/* Allow ourselves to be preempted */
 	if (!(opts->flags & IGT_SPIN_NO_PREEMPTION))
 		*cs++ = MI_ARB_CHK;

 	/* Pad with a few nops so that we do not completely hog the system.
 	 *
 	 * Part of the attraction of using a recursive batch is that it is
 	 * hard on the system (executing the "function" call is apparently
 	 * quite expensive). However, the GPU may hog the entire system for
 	 * a few minutes, preventing even NMI. Quite why this is so is unclear,
 	 * but presumably it relates to the PM_INTRMSK workaround on gen6/gen7.
 	 * If we give the system a break by having the GPU execute a few nops
 	 * between function calls, that appears enough to keep SNB out of
 	 * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262
 	 */
 	if (!(opts->flags & IGT_SPIN_FAST))
 		cs += 1000;

 	/* recurse */
 	r = &relocs[obj[BATCH].relocation_count++];
 	r->target_handle = obj[BATCH].handle;
 	r->offset = (cs + 1 - batch) * sizeof(*cs);
 	r->read_domains = I915_GEM_DOMAIN_COMMAND;
 	r->delta = LOOP_START_OFFSET;
 	if (gen >= 8) {
 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 		*cs++ = r->delta;
 		*cs++ = 0;
 	} else if (gen >= 6) {
 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8;
 		*cs++ = r->delta;
 	} else {
 		*cs++ = MI_BATCH_BUFFER_START | 2 << 6;
 		if (gen < 4)
 			r->delta |= 1;
 		*cs = r->delta;
 		cs++;
 	}
 	obj[BATCH].relocs_ptr = to_user_pointer(relocs);

 	execbuf->buffers_ptr = to_user_pointer(obj +
 					       (2 - execbuf->buffer_count));
 	execbuf->rsvd1 = opts->ctx;

 	if (opts->flags & IGT_SPIN_FENCE_OUT)
 		execbuf->flags |= I915_EXEC_FENCE_OUT;

 	for (i = 0; i < nengine; i++) {
 		execbuf->flags &= ~ENGINE_MASK;
 		execbuf->flags |= flags[i];

 		gem_execbuf_wr(fd, execbuf);

 		if (opts->flags & IGT_SPIN_FENCE_OUT) {
 			int _fd = execbuf->rsvd2 >> 32;

 			igt_assert(_fd >= 0);
 			if (fence_fd == -1) {
 				fence_fd = _fd;
 			} else {
 				int old_fd = fence_fd;

 				fence_fd = sync_fence_merge(old_fd, _fd);
 				close(old_fd);
 				close(_fd);
 			}
 			igt_assert(fence_fd >= 0);
 		}
 	}

 	igt_assert_lt(cs - batch, BATCH_SIZE / sizeof(*cs));

 	/* Make it easier for callers to resubmit. */
 	for (i = 0; i < ARRAY_SIZE(spin->obj); i++) {
 		spin->obj[i].relocation_count = 0;
 		spin->obj[i].relocs_ptr = 0;
 		spin->obj[i].flags = EXEC_OBJECT_PINNED;
 	}

 	spin->cmd_precondition = *spin->condition;

 	return fence_fd;
 }

 static igt_spin_t *
 spin_create(int fd, const struct igt_spin_factory *opts)
 {
 	igt_spin_t *spin;

 	spin = calloc(1, sizeof(struct igt_spin));
 	igt_assert(spin);

 	spin->out_fence = emit_recursive_batch(spin, fd, opts);

 	pthread_mutex_lock(&list_lock);
 	igt_list_add(&spin->link, &spin_list);
 	pthread_mutex_unlock(&list_lock);

 	return spin;
 }

 igt_spin_t *
 __igt_spin_factory(int fd, const struct igt_spin_factory *opts)
 {
 	return spin_create(fd, opts);
 }

 /**
  * igt_spin_factory:
  * @fd: open i915 drm file descriptor
  * @opts: controlling options such as context, engine, dependencies etc
  *
  * Start a recursive batch on a ring. Immediately returns a #igt_spin_t that
  * contains the batch's handle that can be waited upon. The returned structure
  * must be passed to igt_spin_free() for post-processing.
  *
  * Returns:
  * Structure with helper internal state for igt_spin_free().
  */
 igt_spin_t *
 igt_spin_factory(int fd, const struct igt_spin_factory *opts)
 {
 	igt_spin_t *spin;

 	igt_require_gem(fd);

 	if (opts->engine != ALL_ENGINES) {
 		struct intel_execution_engine2 e;
 		int class;

 		if (!gem_context_lookup_engine(fd, opts->engine,
 					       opts->ctx, &e)) {
 			class = e.class;
 		} else {
 			gem_require_ring(fd, opts->engine);
 			class = gem_execbuf_flags_to_engine_class(opts->engine);
 		}

 		if (opts->flags & IGT_SPIN_POLL_RUN)
 			igt_require(gem_class_can_store_dword(fd, class));
 	}

 	spin = spin_create(fd, opts);

 	igt_assert(gem_bo_busy(fd, spin->handle));
 	if (opts->flags & IGT_SPIN_FENCE_OUT) {
 		struct pollfd pfd = { spin->out_fence, POLLIN };

 		igt_assert(poll(&pfd, 1, 0) == 0);
 	}

 	return spin;
 }

 static void notify(union sigval arg)
 {
 	igt_spin_t *spin = arg.sival_ptr;

 	igt_spin_end(spin);
 }

 /**
  * igt_spin_set_timeout:
  * @spin: spin state from igt_spin_new()
  * @ns: amount of time in nanoseconds the batch continues to execute
  *      before finishing.
  *
  * Specify a timeout. This ends the recursive batch associated with @spin after
  * the timeout has elapsed.
  */
 void igt_spin_set_timeout(igt_spin_t *spin, int64_t ns)
 {
 	timer_t timer;
 	struct sigevent sev;
 	struct itimerspec its;

 	igt_assert(ns > 0);
 	if (!spin)
 		return;

 	igt_assert(!spin->timer);

 	memset(&sev, 0, sizeof(sev));
 	sev.sigev_notify = SIGEV_THREAD;
 	sev.sigev_value.sival_ptr = spin;
 	sev.sigev_notify_function = notify;
 	igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0);
 	igt_assert(timer);

 	memset(&its, 0, sizeof(its));
 	its.it_value.tv_sec = ns / NSEC_PER_SEC;
 	its.it_value.tv_nsec = ns % NSEC_PER_SEC;
 	igt_assert(timer_settime(timer, 0, &its, NULL) == 0);

 	spin->timer = timer;
 }

 /**
  * igt_spin_reset:
  * @spin: spin state from igt_spin_new()
  *
  * Reset the state of spin, allowing its reuse.
  */
 void igt_spin_reset(igt_spin_t *spin)
 {
 	if (igt_spin_has_poll(spin))
 		spin->poll[SPIN_POLL_START_IDX] = 0;

 	*spin->condition = spin->cmd_precondition;
 	__sync_synchronize();
 }

 /**
  * igt_spin_end:
  * @spin: spin state from igt_spin_new()
  *
  * End the spinner associated with @spin manually.
  */
 void igt_spin_end(igt_spin_t *spin)
 {
 	if (!spin)
 		return;

 	*spin->condition = MI_BATCH_BUFFER_END;
 	__sync_synchronize();
 }

 /**
  * igt_spin_free:
  * @fd: open i915 drm file descriptor
  * @spin: spin state from igt_spin_new()
  *
  * This function does the necessary post-processing after starting a
  * spin with igt_spin_new() and then frees it.
  */
 void igt_spin_free(int fd, igt_spin_t *spin)
 {
 	if (!spin)
 		return;

 	pthread_mutex_lock(&list_lock);
 	igt_list_del(&spin->link);
 	pthread_mutex_unlock(&list_lock);

 	if (spin->timer)
 		timer_delete(spin->timer);

 	igt_spin_end(spin);
 	gem_munmap((void *)((unsigned long)spin->condition & (~4095UL)),
 		   BATCH_SIZE);

 	if (spin->poll) {
 		gem_munmap(spin->poll, 4096);
 		gem_close(fd, spin->poll_handle);
 	}

 	gem_close(fd, spin->handle);

 	if (spin->out_fence >= 0)
 		close(spin->out_fence);

 	free(spin);
 }

 void igt_terminate_spins(void)
 {
 	struct igt_spin *iter;

 	pthread_mutex_lock(&list_lock);
 	igt_list_for_each(iter, &spin_list, link)
 		igt_spin_end(iter);
 	pthread_mutex_unlock(&list_lock);
 }

 void igt_unshare_spins(void)
 {
 	struct igt_spin *it, *n;

 	/* Disable the automatic termination on inherited spinners */
 	igt_list_for_each_safe(it, n, &spin_list, link)
 		igt_list_init(&it->link);
 	igt_list_init(&spin_list);
 }

 static uint32_t plug_vgem_handle(struct igt_cork *cork, int fd)
 {
 	struct vgem_bo bo;
 	int dmabuf;
 	uint32_t handle;

 	cork->vgem.device = drm_open_driver(DRIVER_VGEM);
 	igt_require(vgem_has_fences(cork->vgem.device));

 	bo.width = bo.height = 1;
 	bo.bpp = 4;
 	vgem_create(cork->vgem.device, &bo);
 	cork->vgem.fence = vgem_fence_attach(cork->vgem.device, &bo, VGEM_FENCE_WRITE);

 	dmabuf = prime_handle_to_fd(cork->vgem.device, bo.handle);
 	handle = prime_fd_to_handle(fd, dmabuf);
 	close(dmabuf);

 	return handle;
 }

 static void unplug_vgem_handle(struct igt_cork *cork)
 {
 	vgem_fence_signal(cork->vgem.device, cork->vgem.fence);
 	close(cork->vgem.device);
 }

 static uint32_t plug_sync_fd(struct igt_cork *cork)
 {
 	int fence;

 	igt_require_sw_sync();

 	cork->sw_sync.timeline = sw_sync_timeline_create();
 	fence = sw_sync_timeline_create_fence(cork->sw_sync.timeline, 1);

 	return fence;
 }

 static void unplug_sync_fd(struct igt_cork *cork)
 {
 	sw_sync_timeline_inc(cork->sw_sync.timeline, 1);
 	close(cork->sw_sync.timeline);
 }

 /**
  * igt_cork_plug:
  * @fd: open drm file descriptor
  * @method: method to utilize for corking.
  * @cork: structure that will be filled with the state of the cork bo.
  * Note: this has to match the corking method.
  *
  * This function provides a mechanism to stall submission. It provides two
  * blocking methods:
  *
  * VGEM_BO.
  * Imports a vgem bo with a fence attached to it. This bo can be used as a
  * dependency during submission to stall execution until the fence is signaled.
  *
  * SW_SYNC:
  * Creates a timeline and then a fence on that timeline. The fence can be used
  * as an input fence to a request, the request will be stalled until the fence
  * is signaled.
  *
  * The parameters required to unblock the execution and to cleanup are stored in
  * the provided cork structure.
  *
  * Returns:
  * Handle of the imported BO / Sw sync fence FD.
  */
 uint32_t igt_cork_plug(struct igt_cork *cork, int fd)
 {
 	igt_assert(cork->fd == -1);

 	switch (cork->type) {
 	case CORK_SYNC_FD:
 		return plug_sync_fd(cork);

 	case CORK_VGEM_HANDLE:
 		return plug_vgem_handle(cork, fd);

 	default:
 		igt_assert_f(0, "Invalid cork type!\n");
 		return 0;
 	}
 }

 /**
  * igt_cork_unplug:
  * @method: method to utilize for corking.
  * @cork: cork state from igt_cork_plug()
  *
  * This function unblocks the execution by signaling the fence attached to the
  * imported bo and does the necessary post-processing.
  *
  * NOTE: the handle returned by igt_cork_plug is not closed during this phase.
  */
 void igt_cork_unplug(struct igt_cork *cork)
 {
 	igt_assert(cork->fd != -1);

 	switch (cork->type) {
 	case CORK_SYNC_FD:
 		unplug_sync_fd(cork);
 		break;

 	case CORK_VGEM_HANDLE:
 		unplug_vgem_handle(cork);
 		break;

 	default:
 		igt_assert_f(0, "Invalid cork type!\n");
 	}

 	cork->fd = -1; /* Reset cork */
 }
	/*
	* Copyright © 2016 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*
	*/

	#include <time.h>
	#include <signal.h>
	#include <pthread.h>
	#include <sys/poll.h>

	#include <i915_drm.h>

	#include "igt_core.h"
	#include "drmtest.h"
	#include "igt_device.h"
	#include "igt_dummyload.h"
	#include "igt_gt.h"
	#include "intel_chipset.h"
	#include "intel_reg.h"
	#include "ioctl_wrappers.h"
	#include "sw_sync.h"
	#include "igt_vgem.h"
	#include "i915/gem_engine_topology.h"
	#include "i915/gem_mman.h"

	/**
	* SECTION:igt_dummyload
	* @short_description: Library for submitting GPU workloads
	* @title: Dummyload
	* @include: igt.h
	*
	* A lot of igt testcases need some GPU workload to make sure a race window is
	* big enough. Unfortunately having a fixed amount of workload leads to
	* spurious test failures or overly long runtimes on some fast/slow platforms.
	* This library contains functionality to submit GPU workloads that should
	* consume exactly a specific amount of time.
	*/

	#define LOCAL_I915_EXEC_BSD_SHIFT (13)
	#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT)

	#define ENGINE_MASK (I915_EXEC_RING_MASK \| LOCAL_I915_EXEC_BSD_MASK)

	#define MI_ARB_CHK (0x5 << 23)

	static const int BATCH_SIZE = 4096;
	static const int LOOP_START_OFFSET = 64;

	static IGT_LIST(spin_list);
	static pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;

	static int
	emit_recursive_batch(igt_spin_t *spin,
	int fd, const struct igt_spin_factory *opts)
	{
	#define SCRATCH 0
	#define BATCH IGT_SPIN_BATCH
	const int gen = intel_gen(intel_get_drm_devid(fd));
	struct drm_i915_gem_relocation_entry relocs[2], *r;
	struct drm_i915_gem_execbuffer2 *execbuf;
	struct drm_i915_gem_exec_object2 *obj;
	unsigned int flags[GEM_MAX_ENGINES];
	unsigned int nengine;
	int fence_fd = -1;
	uint32_t cs, batch;
	int i;

	nengine = 0;
	if (opts->engine == ALL_ENGINES) {
	struct intel_execution_engine2 *engine;

	for_each_context_engine(fd, opts->ctx, engine) {
	if (opts->flags & IGT_SPIN_POLL_RUN &&
	!gem_class_can_store_dword(fd, engine->class))
	continue;

	flags[nengine++] = engine->flags;
	}
	} else {
	flags[nengine++] = opts->engine;
	}
	igt_require(nengine);

	memset(&spin->execbuf, 0, sizeof(spin->execbuf));
	execbuf = &spin->execbuf;
	memset(spin->obj, 0, sizeof(spin->obj));
	obj = spin->obj;
	memset(relocs, 0, sizeof(relocs));

	obj[BATCH].handle = gem_create(fd, BATCH_SIZE);
	batch = __gem_mmap__wc(fd, obj[BATCH].handle,
	0, BATCH_SIZE, PROT_WRITE);
	if (!batch)
	batch = gem_mmap__gtt(fd, obj[BATCH].handle,
	BATCH_SIZE, PROT_WRITE);

	gem_set_domain(fd, obj[BATCH].handle,
	I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
	execbuf->buffer_count++;
	cs = batch;

	if (opts->dependency) {
	igt_assert(!(opts->flags & IGT_SPIN_POLL_RUN));

	r = &relocs[obj[BATCH].relocation_count++];

	/* dummy write to dependency */
	obj[SCRATCH].handle = opts->dependency;
	r->presumed_offset = 0;
	r->target_handle = obj[SCRATCH].handle;
	r->offset = sizeof(uint32_t) * 1020;
	r->delta = 0;
	r->read_domains = I915_GEM_DOMAIN_RENDER;
	r->write_domain = I915_GEM_DOMAIN_RENDER;

	execbuf->buffer_count++;
	} else if (opts->flags & IGT_SPIN_POLL_RUN) {
	r = &relocs[obj[BATCH].relocation_count++];

	igt_assert(!opts->dependency);

	if (gen == 4 \|\| gen == 5) {
	execbuf->flags \|= I915_EXEC_SECURE;
	igt_require(__igt_device_set_master(fd) == 0);
	}

	spin->poll_handle = gem_create(fd, 4096);
	obj[SCRATCH].handle = spin->poll_handle;

	if (__gem_set_caching(fd, spin->poll_handle,
	I915_CACHING_CACHED) == 0)
	spin->poll = gem_mmap__cpu(fd, spin->poll_handle,
	0, 4096,
	PROT_READ \| PROT_WRITE);
	else
	spin->poll = gem_mmap__wc(fd, spin->poll_handle,
	0, 4096,
	PROT_READ \| PROT_WRITE);

	igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0);

	/* batch is first */
	r->presumed_offset = 4096;
	r->target_handle = obj[SCRATCH].handle;
	r->offset = sizeof(uint32_t) * 1;
	r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;

	*cs++ = MI_STORE_DWORD_IMM \| (gen < 6 ? 1 << 22 : 0);

	if (gen >= 8) {
	*cs++ = r->presumed_offset + r->delta;
	*cs++ = 0;
	} else if (gen >= 4) {
	*cs++ = 0;
	*cs++ = r->presumed_offset + r->delta;
	r->offset += sizeof(uint32_t);
	} else {
	cs[-1]--;
	*cs++ = r->presumed_offset + r->delta;
	}

	*cs++ = 1;

	execbuf->buffer_count++;
	}

	spin->handle = obj[BATCH].handle;

	igt_assert_lt(cs - batch, LOOP_START_OFFSET / sizeof(*cs));
	spin->condition = batch + LOOP_START_OFFSET / sizeof(*cs);
	cs = spin->condition;

	/* Allow ourselves to be preempted */
	if (!(opts->flags & IGT_SPIN_NO_PREEMPTION))
	*cs++ = MI_ARB_CHK;

	/* Pad with a few nops so that we do not completely hog the system.
	*
	* Part of the attraction of using a recursive batch is that it is
	* hard on the system (executing the "function" call is apparently
	* quite expensive). However, the GPU may hog the entire system for
	* a few minutes, preventing even NMI. Quite why this is so is unclear,
	* but presumably it relates to the PM_INTRMSK workaround on gen6/gen7.
	* If we give the system a break by having the GPU execute a few nops
	* between function calls, that appears enough to keep SNB out of
	* trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262
	*/
	if (!(opts->flags & IGT_SPIN_FAST))
	cs += 1000;

	/* recurse */
	r = &relocs[obj[BATCH].relocation_count++];
	r->target_handle = obj[BATCH].handle;
	r->offset = (cs + 1 - batch) * sizeof(*cs);
	r->read_domains = I915_GEM_DOMAIN_COMMAND;
	r->delta = LOOP_START_OFFSET;
	if (gen >= 8) {
	*cs++ = MI_BATCH_BUFFER_START \| 1 << 8 \| 1;
	*cs++ = r->delta;
	*cs++ = 0;
	} else if (gen >= 6) {
	*cs++ = MI_BATCH_BUFFER_START \| 1 << 8;
	*cs++ = r->delta;
	} else {
	*cs++ = MI_BATCH_BUFFER_START \| 2 << 6;
	if (gen < 4)
	r->delta \|= 1;
	*cs = r->delta;
	cs++;
	}
	obj[BATCH].relocs_ptr = to_user_pointer(relocs);

	execbuf->buffers_ptr = to_user_pointer(obj +
	(2 - execbuf->buffer_count));
	execbuf->rsvd1 = opts->ctx;

	if (opts->flags & IGT_SPIN_FENCE_OUT)
	execbuf->flags \|= I915_EXEC_FENCE_OUT;

	for (i = 0; i < nengine; i++) {
	execbuf->flags &= ~ENGINE_MASK;
	execbuf->flags \|= flags[i];

	gem_execbuf_wr(fd, execbuf);

	if (opts->flags & IGT_SPIN_FENCE_OUT) {
	int _fd = execbuf->rsvd2 >> 32;

	igt_assert(_fd >= 0);
	if (fence_fd == -1) {
	fence_fd = _fd;
	} else {
	int old_fd = fence_fd;

	fence_fd = sync_fence_merge(old_fd, _fd);
	close(old_fd);
	close(_fd);
	}
	igt_assert(fence_fd >= 0);
	}
	}

	igt_assert_lt(cs - batch, BATCH_SIZE / sizeof(*cs));

	/* Make it easier for callers to resubmit. */
	for (i = 0; i < ARRAY_SIZE(spin->obj); i++) {
	spin->obj[i].relocation_count = 0;
	spin->obj[i].relocs_ptr = 0;
	spin->obj[i].flags = EXEC_OBJECT_PINNED;
	}

	spin->cmd_precondition = *spin->condition;

	return fence_fd;
	}

	static igt_spin_t *
	spin_create(int fd, const struct igt_spin_factory *opts)
	{
	igt_spin_t *spin;

	spin = calloc(1, sizeof(struct igt_spin));
	igt_assert(spin);

	spin->out_fence = emit_recursive_batch(spin, fd, opts);

	pthread_mutex_lock(&list_lock);
	igt_list_add(&spin->link, &spin_list);
	pthread_mutex_unlock(&list_lock);

	return spin;
	}

	igt_spin_t *
	__igt_spin_factory(int fd, const struct igt_spin_factory *opts)
	{
	return spin_create(fd, opts);
	}

	/**
	* igt_spin_factory:
	* @fd: open i915 drm file descriptor
	* @opts: controlling options such as context, engine, dependencies etc
	*
	* Start a recursive batch on a ring. Immediately returns a #igt_spin_t that
	* contains the batch's handle that can be waited upon. The returned structure
	* must be passed to igt_spin_free() for post-processing.
	*
	* Returns:
	* Structure with helper internal state for igt_spin_free().
	*/
	igt_spin_t *
	igt_spin_factory(int fd, const struct igt_spin_factory *opts)
	{
	igt_spin_t *spin;

	igt_require_gem(fd);

	if (opts->engine != ALL_ENGINES) {
	struct intel_execution_engine2 e;
	int class;

	if (!gem_context_lookup_engine(fd, opts->engine,
	opts->ctx, &e)) {
	class = e.class;
	} else {
	gem_require_ring(fd, opts->engine);
	class = gem_execbuf_flags_to_engine_class(opts->engine);
	}

	if (opts->flags & IGT_SPIN_POLL_RUN)
	igt_require(gem_class_can_store_dword(fd, class));
	}

	spin = spin_create(fd, opts);

	igt_assert(gem_bo_busy(fd, spin->handle));
	if (opts->flags & IGT_SPIN_FENCE_OUT) {
	struct pollfd pfd = { spin->out_fence, POLLIN };

	igt_assert(poll(&pfd, 1, 0) == 0);
	}

	return spin;
	}

	static void notify(union sigval arg)
	{
	igt_spin_t *spin = arg.sival_ptr;

	igt_spin_end(spin);
	}

	/**
	* igt_spin_set_timeout:
	* @spin: spin state from igt_spin_new()
	* @ns: amount of time in nanoseconds the batch continues to execute
	* before finishing.
	*
	* Specify a timeout. This ends the recursive batch associated with @spin after
	* the timeout has elapsed.
	*/
	void igt_spin_set_timeout(igt_spin_t *spin, int64_t ns)
	{
	timer_t timer;
	struct sigevent sev;
	struct itimerspec its;

	igt_assert(ns > 0);
	if (!spin)
	return;

	igt_assert(!spin->timer);

	memset(&sev, 0, sizeof(sev));
	sev.sigev_notify = SIGEV_THREAD;
	sev.sigev_value.sival_ptr = spin;
	sev.sigev_notify_function = notify;
	igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0);
	igt_assert(timer);

	memset(&its, 0, sizeof(its));
	its.it_value.tv_sec = ns / NSEC_PER_SEC;
	its.it_value.tv_nsec = ns % NSEC_PER_SEC;
	igt_assert(timer_settime(timer, 0, &its, NULL) == 0);

	spin->timer = timer;
	}

	/**
	* igt_spin_reset:
	* @spin: spin state from igt_spin_new()
	*
	* Reset the state of spin, allowing its reuse.
	*/
	void igt_spin_reset(igt_spin_t *spin)
	{
	if (igt_spin_has_poll(spin))
	spin->poll[SPIN_POLL_START_IDX] = 0;

	*spin->condition = spin->cmd_precondition;
	__sync_synchronize();
	}

	/**
	* igt_spin_end:
	* @spin: spin state from igt_spin_new()
	*
	* End the spinner associated with @spin manually.
	*/
	void igt_spin_end(igt_spin_t *spin)
	{
	if (!spin)
	return;

	*spin->condition = MI_BATCH_BUFFER_END;
	__sync_synchronize();
	}

	/**
	* igt_spin_free:
	* @fd: open i915 drm file descriptor
	* @spin: spin state from igt_spin_new()
	*
	* This function does the necessary post-processing after starting a
	* spin with igt_spin_new() and then frees it.
	*/
	void igt_spin_free(int fd, igt_spin_t *spin)
	{
	if (!spin)
	return;

	pthread_mutex_lock(&list_lock);
	igt_list_del(&spin->link);
	pthread_mutex_unlock(&list_lock);

	if (spin->timer)
	timer_delete(spin->timer);

	igt_spin_end(spin);
	gem_munmap((void *)((unsigned long)spin->condition & (~4095UL)),
	BATCH_SIZE);

	if (spin->poll) {
	gem_munmap(spin->poll, 4096);
	gem_close(fd, spin->poll_handle);
	}

	gem_close(fd, spin->handle);

	if (spin->out_fence >= 0)
	close(spin->out_fence);

	free(spin);
	}

	void igt_terminate_spins(void)
	{
	struct igt_spin *iter;

	pthread_mutex_lock(&list_lock);
	igt_list_for_each(iter, &spin_list, link)
	igt_spin_end(iter);
	pthread_mutex_unlock(&list_lock);
	}

	void igt_unshare_spins(void)
	{
	struct igt_spin it, n;

	/* Disable the automatic termination on inherited spinners */
	igt_list_for_each_safe(it, n, &spin_list, link)
	igt_list_init(&it->link);
	igt_list_init(&spin_list);
	}

	static uint32_t plug_vgem_handle(struct igt_cork *cork, int fd)
	{
	struct vgem_bo bo;
	int dmabuf;
	uint32_t handle;

	cork->vgem.device = drm_open_driver(DRIVER_VGEM);
	igt_require(vgem_has_fences(cork->vgem.device));

	bo.width = bo.height = 1;
	bo.bpp = 4;
	vgem_create(cork->vgem.device, &bo);
	cork->vgem.fence = vgem_fence_attach(cork->vgem.device, &bo, VGEM_FENCE_WRITE);

	dmabuf = prime_handle_to_fd(cork->vgem.device, bo.handle);
	handle = prime_fd_to_handle(fd, dmabuf);
	close(dmabuf);

	return handle;
	}

	static void unplug_vgem_handle(struct igt_cork *cork)
	{
	vgem_fence_signal(cork->vgem.device, cork->vgem.fence);
	close(cork->vgem.device);
	}

	static uint32_t plug_sync_fd(struct igt_cork *cork)
	{
	int fence;

	igt_require_sw_sync();

	cork->sw_sync.timeline = sw_sync_timeline_create();
	fence = sw_sync_timeline_create_fence(cork->sw_sync.timeline, 1);

	return fence;
	}

	static void unplug_sync_fd(struct igt_cork *cork)
	{
	sw_sync_timeline_inc(cork->sw_sync.timeline, 1);
	close(cork->sw_sync.timeline);
	}

	/**
	* igt_cork_plug:
	* @fd: open drm file descriptor
	* @method: method to utilize for corking.
	* @cork: structure that will be filled with the state of the cork bo.
	* Note: this has to match the corking method.
	*
	* This function provides a mechanism to stall submission. It provides two
	* blocking methods:
	*
	* VGEM_BO.
	* Imports a vgem bo with a fence attached to it. This bo can be used as a
	* dependency during submission to stall execution until the fence is signaled.
	*
	* SW_SYNC:
	* Creates a timeline and then a fence on that timeline. The fence can be used
	* as an input fence to a request, the request will be stalled until the fence
	* is signaled.
	*
	* The parameters required to unblock the execution and to cleanup are stored in
	* the provided cork structure.
	*
	* Returns:
	* Handle of the imported BO / Sw sync fence FD.
	*/
	uint32_t igt_cork_plug(struct igt_cork *cork, int fd)
	{
	igt_assert(cork->fd == -1);

	switch (cork->type) {
	case CORK_SYNC_FD:
	return plug_sync_fd(cork);

	case CORK_VGEM_HANDLE:
	return plug_vgem_handle(cork, fd);

	default:
	igt_assert_f(0, "Invalid cork type!\n");
	return 0;
	}
	}

	/**
	* igt_cork_unplug:
	* @method: method to utilize for corking.
	* @cork: cork state from igt_cork_plug()
	*
	* This function unblocks the execution by signaling the fence attached to the
	* imported bo and does the necessary post-processing.
	*
	* NOTE: the handle returned by igt_cork_plug is not closed during this phase.
	*/
	void igt_cork_unplug(struct igt_cork *cork)
	{
	igt_assert(cork->fd != -1);

	switch (cork->type) {
	case CORK_SYNC_FD:
	unplug_sync_fd(cork);
	break;

	case CORK_VGEM_HANDLE:
	unplug_vgem_handle(cork);
	break;

	default:
	igt_assert_f(0, "Invalid cork type!\n");
	}

	cork->fd = -1; /* Reset cork */
	}