zink: more correctly handle PIPE_QUERY_PRIMITIVES_GENERATED queries

in normal operation we want to be using INPUT_ASSEMBLY_PRIMITIVES_BIT,
but then when we break out the geometry shaders we actually want to
be using GEOMETRY_SHADER_PRIMITIVES_BIT, which means we need to track
whether a query has a gs active for draws

to do this, we keep a list of all these queries with this type and
iterate over it every draw to flag the gs state of the query that's
being drawn to. this works because our ring buffer of batches will
always wait on a fence after a full cycle, meaning there can only ever
be 4 queries with outstanding results

Fixes: e40a77ea5d0 ("zink: use right vulkan type for GL_PRIMITIVES_GENERATED queries")

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7195>
diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h
index a901bfb..593c022 100644
--- a/src/gallium/drivers/zink/zink_context.h
+++ b/src/gallium/drivers/zink/zink_context.h
@@ -129,6 +129,7 @@
    struct pipe_stencil_ref stencil_ref;
 
    struct list_head suspended_queries;
+   struct list_head primitives_generated_queries;
    bool queries_disabled;
 
    struct pipe_resource *dummy_buffer;
diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c
index b8a1fad..cedba3f 100644
--- a/src/gallium/drivers/zink/zink_draw.c
+++ b/src/gallium/drivers/zink/zink_draw.c
@@ -1,6 +1,7 @@
 #include "zink_compiler.h"
 #include "zink_context.h"
 #include "zink_program.h"
+#include "zink_query.h"
 #include "zink_resource.h"
 #include "zink_screen.h"
 #include "zink_state.h"
@@ -472,6 +473,8 @@
                            gfx_program->layout, 0, 1, &desc_set, 0, NULL);
    zink_bind_vertex_buffers(batch, ctx);
 
+   zink_query_update_gs_states(ctx);
+
    if (ctx->num_so_targets) {
       for (unsigned i = 0; i < ctx->num_so_targets; i++) {
          struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
diff --git a/src/gallium/drivers/zink/zink_query.c b/src/gallium/drivers/zink/zink_query.c
index 7fdd532..5526a01 100644
--- a/src/gallium/drivers/zink/zink_query.c
+++ b/src/gallium/drivers/zink/zink_query.c
@@ -32,6 +32,9 @@
    unsigned fences;
    struct list_head active_list;
 
+   struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
+   bool have_gs[4]; /* geometry shaders use GEOMETRY_SHADER_PRIMITIVES_BIT; sized by ctx->batches[] array size */
+
    union pipe_query_result accumulated_result;
 };
 
@@ -110,7 +113,8 @@
    pool_create.queryType = query->vkqtype;
    pool_create.queryCount = query->num_queries;
    if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
-     pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
+     pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
+                                      VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT;
 
    VkResult status = vkCreateQueryPool(screen->dev, &pool_create, NULL, &query->query_pool);
    if (status != VK_SUCCESS) {
@@ -196,10 +200,11 @@
    int num_results = query->curr_query - query->last_checked_query;
    int result_size = 1;
       /* these query types emit 2 values */
-   if (query->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
+   if (query->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
+       query->vkqtype == VK_QUERY_TYPE_PIPELINE_STATISTICS)
       result_size = 2;
 
-   /* verify that we have an in-bounds number of results pending */
+   /* verify that we have the expected number of results pending */
    assert(query->curr_query <= ARRAY_SIZE(results) / result_size);
    VkResult status = vkGetQueryPoolResults(screen->dev, query->query_pool,
                                            query->last_checked_query, num_results,
@@ -234,7 +239,8 @@
          result->u64 += results[i];
          break;
       case PIPE_QUERY_PRIMITIVES_GENERATED:
-         result->u32 += results[i];
+         /* if a given draw had a geometry shader, we need to use the second result */
+         result->u32 += ((uint32_t*)results)[i + query->have_gs[i / 2]];
          break;
       case PIPE_QUERY_PRIMITIVES_EMITTED:
          /* A query pool created with this type will capture 2 integers -
@@ -252,6 +258,8 @@
       }
    }
    query->last_checked_query = query->curr_query;
+   for (unsigned i = 0; i < num_results; i++)
+      query->have_gs[i] = false;
 
    if (is_time_query(query))
       timestamp_to_nanoseconds(screen, &result->u64);
@@ -303,6 +311,8 @@
    if (!batch->active_queries)
       batch->active_queries = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
    assert(batch->active_queries);
+   if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+      list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
    p_atomic_inc(&q->fences);
    _mesa_set_add(batch->active_queries, q);
 }
@@ -312,11 +322,12 @@
                  struct pipe_query *q)
 {
    struct zink_query *query = (struct zink_query *)q;
-   struct zink_batch *batch = zink_curr_batch(zink_context(pctx));
+   struct zink_context *ctx = zink_context(pctx);
+   struct zink_batch *batch = zink_curr_batch(ctx);
 
    util_query_clear_result(&query->accumulated_result, query->type);
 
-   begin_query(zink_context(pctx), batch, query);
+   begin_query(ctx, batch, query);
 
    return true;
 }
@@ -333,6 +344,8 @@
       screen->vk_CmdEndQueryIndexedEXT(batch->cmdbuf, q->query_pool, q->curr_query, q->index);
    else
       vkCmdEndQuery(batch->cmdbuf, q->query_pool, q->curr_query);
+   if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+      list_delinit(&q->stats_list);
    if (++q->curr_query == q->num_queries) {
       /* can't do zink_batch_no_rp here because we might already be inside a zink_batch_no_rp */
       if (batch->rp)
@@ -350,6 +363,8 @@
    struct zink_query *query = (struct zink_query *)q;
    struct zink_batch *batch = zink_curr_batch(ctx);
 
+   if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
+      list_delinit(&query->stats_list);
    if (query->active)
       end_query(ctx, batch, query);
 
@@ -397,6 +412,16 @@
    }
 }
 
+void
+zink_query_update_gs_states(struct zink_context *ctx)
+{
+   struct zink_query *query;
+   LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
+      assert(query->curr_query - query->last_checked_query < ARRAY_SIZE(query->have_gs));
+      query->have_gs[query->curr_query - query->last_checked_query] = !!ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
+   }
+}
+
 static void
 zink_set_active_query_state(struct pipe_context *pctx, bool enable)
 {
@@ -486,6 +511,7 @@
 {
    struct zink_context *ctx = zink_context(pctx);
    list_inithead(&ctx->suspended_queries);
+   list_inithead(&ctx->primitives_generated_queries);
 
    pctx->create_query = zink_create_query;
    pctx->destroy_query = zink_destroy_query;
diff --git a/src/gallium/drivers/zink/zink_query.h b/src/gallium/drivers/zink/zink_query.h
index dea3562..d9606c2 100644
--- a/src/gallium/drivers/zink/zink_query.h
+++ b/src/gallium/drivers/zink/zink_query.h
@@ -38,4 +38,7 @@
 void
 zink_prune_queries(struct zink_screen *screen, struct zink_fence *fence);
 
+void
+zink_query_update_gs_states(struct zink_context *ctx);
+
 #endif