radeonsi: implement GL_INTEL_blackhole_render
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7031>
diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt
index 112a189..e2c9ff1 100644
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@@ -1,4 +1,5 @@
GL 4.5 on llvmpipe
+GL_INTEL_blackhole_render on radeonsi
GL_NV_copy_depth_to_color for NIR
GL_NV_half_float
GL_NV_shader_atomic_int64 on radeonsi
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 3919e9f..5cb238c 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -28,6 +28,9 @@
/* The public winsys interface header for the radeon driver. */
+/* Skip command submission. Same as RADEON_NOOP=1. */
+#define RADEON_FLUSH_NOOP (1u << 30)
+
/* Whether the next IB can start immediately and not wait for draws and
* dispatches from the current IB to finish. */
#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31)
diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c
index db9b5deb..98f37f2 100644
--- a/src/gallium/drivers/radeonsi/si_dma_cs.c
+++ b/src/gallium/drivers/radeonsi/si_dma_cs.c
@@ -300,6 +300,9 @@
if (check_vm)
si_save_cs(ctx->ws, cs, &saved, true);
+ if (ctx->is_noop)
+ flags |= RADEON_FLUSH_NOOP;
+
ctx->ws->cs_flush(cs, flags, &ctx->last_sdma_fence);
if (fence)
ctx->ws->fence_reference(fence, ctx->last_sdma_fence);
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 958d06b..d162e06 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -162,6 +162,7 @@
case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
case PIPE_CAP_NO_CLIP_ON_COPY_TEX:
case PIPE_CAP_SHADER_ATOMIC_INT64:
+ case PIPE_CAP_FRONTEND_NOOP:
return 1;
case PIPE_CAP_GLSL_ZERO_INIT:
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 81d9368..4d49079 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -227,6 +227,9 @@
}
}
+ if (ctx->is_noop)
+ flags |= RADEON_FLUSH_NOOP;
+
/* Flush the CS. */
ws->cs_flush(cs, flags, &ctx->last_gfx_fence);
if (fence)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 9676894..59e55da 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -423,6 +423,14 @@
}
}
+static void si_set_frontend_noop(struct pipe_context *ctx, bool enable)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+
+ ctx->flush(ctx, NULL, PIPE_FLUSH_ASYNC);
+ sctx->is_noop = enable;
+}
+
static struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags)
{
struct si_screen *sscreen = (struct si_screen *)screen;
@@ -556,6 +564,7 @@
sctx->b.set_context_param = si_set_context_param;
sctx->b.get_device_reset_status = si_get_reset_status;
sctx->b.set_device_reset_callback = si_set_device_reset_callback;
+ sctx->b.set_frontend_noop = si_set_frontend_noop;
si_init_all_descriptors(sctx);
si_init_buffer_functions(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 8854af6..e5c6900 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -954,6 +954,7 @@
unsigned wait_mem_number;
uint16_t prefetch_L2_mask;
+ bool is_noop;
bool has_graphics;
bool gfx_flush_in_progress : 1;
bool gfx_last_ib_is_busy : 1;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index e07d2c4..a5cbdf8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1796,7 +1796,8 @@
/* If the CS is not empty or overflowed.... */
if (likely(radeon_emitted(&cs->main.base, 0) &&
cs->main.base.current.cdw <= cs->main.base.current.max_dw &&
- !debug_get_option_noop())) {
+ !debug_get_option_noop() &&
+ !(flags & RADEON_FLUSH_NOOP))) {
struct amdgpu_cs_context *cur = cs->csc;
/* Set IB sizes. */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 403ade2..7ea79c1 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -638,7 +638,8 @@
cs->cst = tmp;
/* If the CS is not empty or overflowed, emit it in a separate thread. */
- if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
+ if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw &&
+ !debug_get_option_noop() && !(flags & RADEON_FLUSH_NOOP)) {
unsigned i, num_relocs;
num_relocs = cs->cst->num_relocs;