zink: use VK_EXT_multisampled_render_to_single_sampled for EXT_multisample_render_to_texture
this extension was added for the purpose of emulating the GL ext,
and using it is reasonably straightforward
the only (somewhat) invasive part is modifying the renderpass/dynamic hashes
to have samplecounts in the key, but this is also not too much work
now only fbfetch requires real renderpasses, and everything else is dynamic
fixes #7559
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20883>
diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c
index adea166..000e3f3 100644
--- a/src/gallium/drivers/zink/zink_context.c
+++ b/src/gallium/drivers/zink/zink_context.c
@@ -2378,7 +2378,10 @@
find_rp_state(struct zink_context *ctx)
{
bool found = false;
- struct set_entry *he = _mesa_set_search_or_add(&ctx->rendering_state_cache, &ctx->gfx_pipeline_state.rendering_info, &found);
+ /* calc the state idx using the samples to account for msrtss */
+ unsigned idx = zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ?
+ util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples + 1) : 0;
+ struct set_entry *he = _mesa_set_search_or_add(&ctx->rendering_state_cache[idx], &ctx->gfx_pipeline_state.rendering_info, &found);
struct zink_rendering_info *info;
if (found) {
info = (void*)he->key;
@@ -2386,7 +2389,7 @@
}
info = ralloc(ctx, struct zink_rendering_info);
memcpy(info, &ctx->gfx_pipeline_state.rendering_info, sizeof(VkPipelineRenderingCreateInfo));
- info->id = ctx->rendering_state_cache.entries;
+ info->id = ctx->rendering_state_cache[idx].entries;
he->key = info;
return info->id;
}
@@ -2569,6 +2572,15 @@
ctx->gfx_pipeline_state.dirty |= rp_changed;
ctx->gfx_pipeline_state.rp_state = rp_state;
+ VkMultisampledRenderToSingleSampledInfoEXT msrtss = {
+ VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT,
+ NULL,
+ VK_TRUE,
+ ctx->gfx_pipeline_state.rast_samples + 1,
+ };
+
+ ctx->dynamic_fb.info.pNext = ctx->transient_attachments ? &msrtss : NULL;
+ assert(!ctx->transient_attachments || msrtss.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT);
VKCTX(CmdBeginRendering)(ctx->batch.state->cmdbuf, &ctx->dynamic_fb.info);
ctx->batch.in_rp = true;
return clear_buffers;
@@ -2609,7 +2621,8 @@
* - msrtss is TODO
* - dynamic rendering doesn't have input attachments
*/
- if (!zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering || ctx->transient_attachments || ctx->fbfetch_outputs)
+ if (!zink_screen(ctx->base.screen)->info.have_KHR_dynamic_rendering ||
+ (ctx->transient_attachments && !zink_screen(ctx->base.screen)->info.have_EXT_multisampled_render_to_single_sampled) || ctx->fbfetch_outputs)
clear_buffers = zink_begin_render_pass(ctx);
else
clear_buffers = begin_rendering(ctx);
@@ -3114,10 +3127,10 @@
struct pipe_surface *psurf = ctx->fb_state.cbufs[i];
if (psurf) {
struct zink_surface *transient = zink_transient_surface(psurf);
- if (transient)
+ if (transient || psurf->nr_samples)
ctx->transient_attachments |= BITFIELD_BIT(i);
if (!samples)
- samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, 1);
+ samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1);
struct zink_resource *res = zink_resource(psurf->texture);
if (zink_csurface(psurf)->info.layerCount > layers)
ctx->fb_layer_mismatch |= BITFIELD_BIT(i);
@@ -3148,10 +3161,10 @@
if (ctx->fb_state.zsbuf) {
struct pipe_surface *psurf = ctx->fb_state.zsbuf;
struct zink_surface *transient = zink_transient_surface(psurf);
- if (transient)
+ if (transient || psurf->nr_samples)
ctx->transient_attachments |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
if (!samples)
- samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, 1);
+ samples = MAX3(transient ? transient->base.nr_samples : 1, psurf->texture->nr_samples, psurf->nr_samples ? psurf->nr_samples : 1);
if (zink_csurface(psurf)->info.layerCount > layers)
ctx->fb_layer_mismatch |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
zink_resource(psurf->texture)->fb_bind_count++;
@@ -4981,7 +4994,8 @@
_mesa_hash_table_init(&ctx->framebuffer_cache, ctx, hash_framebuffer_imageless, equals_framebuffer_imageless);
if (!zink_init_render_pass(ctx))
goto fail;
- _mesa_set_init(&ctx->rendering_state_cache, ctx, hash_rendering_state, equals_rendering_state);
+ for (unsigned i = 0; i < ARRAY_SIZE(ctx->rendering_state_cache); i++)
+ _mesa_set_init(&ctx->rendering_state_cache[i], ctx, hash_rendering_state, equals_rendering_state);
ctx->dynamic_fb.info.pColorAttachments = ctx->dynamic_fb.attachments;
ctx->dynamic_fb.info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO;
for (unsigned i = 0; i < ARRAY_SIZE(ctx->dynamic_fb.attachments); i++) {
diff --git a/src/gallium/drivers/zink/zink_render_pass.c b/src/gallium/drivers/zink/zink_render_pass.c
index 1f9a400..80bae85 100644
--- a/src/gallium/drivers/zink/zink_render_pass.c
+++ b/src/gallium/drivers/zink/zink_render_pass.c
@@ -78,6 +78,7 @@
pstate->num_cresolves = state->num_cresolves;
pstate->num_zsresolves = state->num_zsresolves;
pstate->fbfetch = 0;
+ pstate->msaa_samples = state->msaa_samples;
for (int i = 0; i < state->num_cbufs; i++) {
struct zink_rt_attrib *rt = state->rts + i;
attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
@@ -215,6 +216,15 @@
} else
subpass.pNext = NULL;
+ VkMultisampledRenderToSingleSampledInfoEXT msrtss = {
+ VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT,
+ &subpass.pNext,
+ VK_TRUE,
+ state->msaa_samples,
+ };
+ if (state->msaa_samples)
+ subpass.pNext = &msrtss;
+
VkRenderPassCreateInfo2 rpci = {0};
rpci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2;
rpci.attachmentCount = num_attachments + state->num_cresolves + state->num_zsresolves;
@@ -466,6 +476,8 @@
}
state.num_rts++;
}
+ state.msaa_samples = screen->info.have_EXT_multisampled_render_to_single_sampled && ctx->transient_attachments ?
+ ctx->gfx_pipeline_state.rast_samples + 1 : 0;
state.num_cbufs = fb->nr_cbufs;
assert(!state.num_cresolves || state.num_cbufs == state.num_cresolves);
@@ -751,7 +763,7 @@
setup_framebuffer(ctx);
if (ctx->batch.in_rp)
return 0;
- /* TODO: use VK_EXT_multisampled_render_to_single_sampled */
+
if (ctx->framebuffer->rp->state.msaa_expand_mask) {
uint32_t rp_state = ctx->gfx_pipeline_state.rp_state;
struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass;
@@ -796,7 +808,7 @@
{
if (ctx->batch.in_rp) {
VKCTX(CmdEndRenderPass)(ctx->batch.state->cmdbuf);
- /* TODO: use VK_EXT_multisampled_render_to_single_sampled */
+
for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) {
struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i];
if (csurf)
diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c
index b99c7c4..b1c812e 100644
--- a/src/gallium/drivers/zink/zink_resource.c
+++ b/src/gallium/drivers/zink/zink_resource.c
@@ -520,6 +520,11 @@
ici->tiling = screen->info.have_EXT_image_drm_format_modifier && modifiers_count ?
VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
bind & (PIPE_BIND_LINEAR | ZINK_BIND_DMABUF) ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ /* XXX: does this have perf implications anywhere? hopefully not */
+ if (ici->samples == VK_SAMPLE_COUNT_1_BIT &&
+ screen->info.have_EXT_multisampled_render_to_single_sampled &&
+ ici->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ ici->flags |= VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT;
ici->sharingMode = VK_SHARING_MODE_EXCLUSIVE;
ici->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
diff --git a/src/gallium/drivers/zink/zink_surface.c b/src/gallium/drivers/zink/zink_surface.c
index d0afa8e..c1602a2 100644
--- a/src/gallium/drivers/zink/zink_surface.c
+++ b/src/gallium/drivers/zink/zink_surface.c
@@ -203,7 +203,7 @@
/* create a new surface */
struct zink_surface *surface = create_surface(pctx, pres, templ, ivci, actually);
/* only transient surfaces have nr_samples set */
- surface->base.nr_samples = 0;
+ surface->base.nr_samples = zink_screen(pctx->screen)->info.have_EXT_multisampled_render_to_single_sampled ? templ->nr_samples : 0;
surface->hash = hash;
surface->ivci = *ivci;
return surface;
@@ -292,8 +292,7 @@
struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)wrap_surface(pctx, psurf);
- /* TODO: use VK_EXT_multisampled_render_to_single_sampled and skip this entirely */
- if (templ->nr_samples) {
+ if (templ->nr_samples && !zink_screen(pctx->screen)->info.have_EXT_multisampled_render_to_single_sampled) {
/* transient fb attachment: not cached */
struct pipe_resource rtempl = *pres;
rtempl.nr_samples = templ->nr_samples;
@@ -319,7 +318,7 @@
{
struct zink_surface *surface = zink_surface(psurface);
struct zink_resource *res = zink_resource(psurface->texture);
- if (!psurface->nr_samples && !surface->is_swapchain) {
+ if ((!psurface->nr_samples || screen->info.have_EXT_multisampled_render_to_single_sampled) && !surface->is_swapchain) {
simple_mtx_lock(&res->surface_mtx);
if (psurface->reference.count) {
/* a different context got a cache hit during deletion: this surface is alive again */
diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h
index 3584277..1e0208c 100644
--- a/src/gallium/drivers/zink/zink_types.h
+++ b/src/gallium/drivers/zink/zink_types.h
@@ -1037,6 +1037,7 @@
unsigned num_rts;
uint32_t clears; //for extra verification and update flagging
uint16_t msaa_expand_mask;
+ uint16_t msaa_samples; //used with VK_EXT_multisampled_render_to_single_sampled
};
struct zink_pipeline_rt {
@@ -1045,7 +1046,8 @@
};
struct zink_render_pass_pipeline_state {
- uint32_t num_attachments:22;
+ uint32_t num_attachments:14;
+ uint32_t msaa_samples : 8;
uint32_t fbfetch:1;
uint32_t color_read:1;
uint32_t depth_read:1;
@@ -1397,7 +1399,6 @@
struct zink_ctx_surface {
struct pipe_surface base;
struct zink_surface *surf; //the actual surface
- /* TODO: use VK_EXT_multisampled_render_to_single_sampled */
struct zink_ctx_surface *transient; //for use with EXT_multisample_render_to_texture
bool transient_init; //whether the transient surface has data
};
@@ -1619,7 +1620,7 @@
} dynamic_fb;
uint32_t fb_layer_mismatch; //bitmask
unsigned depth_bias_scale_factor;
- struct set rendering_state_cache;
+ struct set rendering_state_cache[6]; //[util_logbase2_ceil(msrtss samplecount)]
struct set render_pass_state_cache;
struct hash_table *render_pass_cache;
VkExtent2D swapchain_size;