panfrost: XMLify primitive information

This is grouped as the latter part of the prefix. Some kludges around a
magic field for compute jobs that we'll deal with later. (I hope.)

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6476>
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index fb27e21..bb5d018 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -76,13 +76,13 @@
 
 void
 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
-                                  struct mali_vertex_tiler_prefix *prefix,
+                                  bool points,
                                   union midgard_primitive_size *primitive_size)
 {
         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
 
         if (!panfrost_writes_point_size(ctx)) {
-                float val = (prefix->draw_mode == MALI_DRAW_MODE_POINTS) ?
+                float val = points ?
                               rasterizer->base.point_size :
                               rasterizer->base.line_width;
 
@@ -90,24 +90,6 @@
         }
 }
 
-unsigned
-panfrost_translate_index_size(unsigned size)
-{
-        switch (size) {
-        case 1:
-                return MALI_DRAW_INDEXED_UINT8;
-
-        case 2:
-                return MALI_DRAW_INDEXED_UINT16;
-
-        case 4:
-                return MALI_DRAW_INDEXED_UINT32;
-
-        default:
-                unreachable("Invalid index size");
-        }
-}
-
 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
  * good for the duration of the draw (transient), could last longer. Also get
  * the bounds on the index buffer for the range accessed by the draw. We do
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h
index f722faf..735b9a4 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.h
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.h
@@ -68,9 +68,6 @@
 mali_ptr
 panfrost_vt_emit_shared_memory(struct panfrost_batch *batch);
 
-unsigned
-panfrost_translate_index_size(unsigned size);
-
 mali_ptr
 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
                                   const struct pipe_draw_info *info,
@@ -95,7 +92,7 @@
 
 void
 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
-                                  struct mali_vertex_tiler_prefix *prefix,
+                                  bool points,
                                   union midgard_primitive_size *primitive_size);
 
 mali_ptr
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index 6fdb5f0..53a23bc 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -128,6 +128,13 @@
                                 PIPE_SHADER_COMPUTE, &cfg.push_uniforms);
         }
 
+        unsigned magic =
+                util_logbase2_ceil(info->block[0] + 1) +
+                util_logbase2_ceil(info->block[1] + 1) +
+                util_logbase2_ceil(info->block[2] + 1);
+
+        payload.prefix.primitive.opaque[0] = (magic) << 26; /* XXX */
+
         memcpy(&payload.postfix, &postfix, sizeof(postfix));
 
         /* Invoke according to the grid info */
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index e6ebfdb..4da307d 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -262,6 +262,17 @@
         d->samplers = panfrost_emit_sampler_descriptors(batch, st);
 }
 
+static enum mali_index_type
+panfrost_translate_index_size(unsigned size)
+{
+        switch (size) {
+        case 1: return MALI_INDEX_TYPE_UINT8;
+        case 2: return MALI_INDEX_TYPE_UINT16;
+        case 4: return MALI_INDEX_TYPE_UINT32;
+        default: unreachable("Invalid index size");
+        }
+}
+
 static void
 panfrost_draw_vbo(
         struct pipe_context *pipe,
@@ -315,48 +326,43 @@
 
         struct mali_vertex_tiler_prefix vertex_prefix = { 0 }, tiler_prefix = { 0 };
         struct mali_draw_packed vertex_postfix, tiler_postfix;
+        struct mali_primitive_packed primitive;
         union midgard_primitive_size primitive_size;
-        unsigned vertex_count;
+        unsigned vertex_count = ctx->vertex_count;
 
         mali_ptr shared_mem = (device->quirks & IS_BIFROST) ?
                 panfrost_vt_emit_shared_memory(batch) :
                 panfrost_batch_reserve_framebuffer(batch);
 
         struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
-        SET_BIT(tiler_prefix.unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
-                rast->flatshade_first);
+        unsigned min_index = 0, max_index = 0;
 
-        tiler_prefix.draw_mode = pan_draw_mode(mode);
+        pan_pack(&primitive, PRIMITIVE, cfg) {
+                cfg.draw_mode = pan_draw_mode(mode);
+                cfg.point_size_array = panfrost_writes_point_size(ctx);
+                cfg.first_provoking_vertex = rast->flatshade_first;
+                cfg.primitive_restart = info->primitive_restart;
+                cfg.unknown_3 = 6;
 
-        unsigned draw_flags = 0x3000;
+                if (info->index_size) {
+                        cfg.index_type = panfrost_translate_index_size(info->index_size);
+                        cfg.indices = panfrost_get_index_buffer_bounded(ctx, info,
+                                        &min_index, &max_index);
 
-        if (panfrost_writes_point_size(ctx))
-                draw_flags |= MALI_DRAW_VARYING_SIZE;
+                        /* Use the corresponding values */
+                        vertex_count = max_index - min_index + 1;
+                        ctx->offset_start = min_index + info->index_bias;
 
-        if (info->primitive_restart)
-                draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
-
-        if (info->index_size) {
-                unsigned min_index = 0, max_index = 0;
-
-                tiler_prefix.indices = panfrost_get_index_buffer_bounded(ctx,
-                                                                       info,
-                                                                       &min_index,
-                                                                       &max_index);
-
-                /* Use the corresponding values */
-                vertex_count = max_index - min_index + 1;
-                ctx->offset_start = min_index + info->index_bias;
-                tiler_prefix.offset_bias_correction = -min_index;
-                tiler_prefix.index_count = MALI_POSITIVE(info->count);
-                draw_flags |= panfrost_translate_index_size(info->index_size);
-        } else {
-                vertex_count = ctx->vertex_count;
-                ctx->offset_start = info->start;
-                tiler_prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
+                        cfg.base_vertex_offset = -min_index;
+                        cfg.index_count = info->count;
+                } else {
+                        ctx->offset_start = info->start;
+                        cfg.index_count = ctx->vertex_count;
+                }
         }
 
-        tiler_prefix.unknown_draw = draw_flags;
+        vertex_prefix.primitive.opaque[0] = (5) << 26; /* XXX */ 
+        memcpy(&tiler_prefix.primitive, &primitive, sizeof(primitive));
 
         /* Encode the padded vertex count */
 
@@ -415,7 +421,7 @@
         }
 
         primitive_size.pointer = psiz;
-        panfrost_vt_update_primitive_size(ctx, &tiler_prefix, &primitive_size);
+        panfrost_vt_update_primitive_size(ctx, info->mode == PIPE_PRIM_POINTS, &primitive_size);
 
         /* Fire off the draw itself */
         panfrost_emit_vertex_tiler_jobs(batch, &vertex_prefix, &vertex_postfix,
diff --git a/src/panfrost/bifrost/test/bi_submit.c b/src/panfrost/bifrost/test/bi_submit.c
index 453d9de..8b46569 100644
--- a/src/panfrost/bifrost/test/bi_submit.c
+++ b/src/panfrost/bifrost/test/bi_submit.c
@@ -182,6 +182,9 @@
 
         struct bifrost_payload_vertex payload = {
                 .prefix = {
+                        .primitive = {
+                                .opaque = { (5) << 26 }
+                        }
                 },
                 .postfix = {
                         .gl_enables = 0x2,
@@ -201,8 +204,6 @@
                         1, 1, 1,
                         true);
 
-        payload.prefix.workgroups_x_shift_3 = 5;
-
         struct panfrost_bo *bos[] = {
                 scratchpad, shmem, shader, shader_desc, ubo, var, attr
         };
diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h
index ce5aefd..8cb30ed 100644
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@@ -420,23 +420,6 @@
  * fused payloads.
  */
 
-/* Applies to unknown_draw */
-
-#define MALI_DRAW_INDEXED_UINT8  (0x10)
-#define MALI_DRAW_INDEXED_UINT16 (0x20)
-#define MALI_DRAW_INDEXED_UINT32 (0x30)
-#define MALI_DRAW_INDEXED_SIZE   (0x30)
-#define MALI_DRAW_INDEXED_SHIFT  (4)
-
-#define MALI_DRAW_VARYING_SIZE   (0x100)
-
-/* Set to use first vertex as the provoking vertex for flatshading. Clear to
- * use the last vertex. This is the default in DX and VK, but not in GL. */
-
-#define MALI_DRAW_FLATSHADE_FIRST (0x800)
-
-#define MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX (0x10000)
-
 struct mali_vertex_tiler_prefix {
         /* This is a dynamic bitfield containing the following things in this order:
          *
@@ -474,52 +457,7 @@
          */
         u32 invocation_shifts;
 
-        u32 draw_mode : 4;
-        u32 unknown_draw : 22;
-
-        /* This is the the same as workgroups_x_shift_2 in compute shaders, but
-         * always 5 for vertex jobs and 6 for tiler jobs. I suspect this has
-         * something to do with how many quads get put in the same execution
-         * engine, which is a balance (you don't want to starve the engine, but
-         * you also want to distribute work evenly).
-         */
-        u32 workgroups_x_shift_3 : 6;
-
-
-        /* Negative of min_index. This is used to compute
-         * the unbiased index in tiler/fragment shader runs.
-         * 
-         * The hardware adds offset_bias_correction in each run,
-         * so that absent an index bias, the first vertex processed is
-         * genuinely the first vertex (0). But with an index bias,
-         * the first vertex process is numbered the same as the bias.
-         *
-         * To represent this more conviniently:
-         * unbiased_index = lower_bound_index +
-         *                  index_bias +
-         *                  offset_bias_correction
-         *
-         * This is done since the hardware doesn't accept a index_bias
-         * and this allows it to recover the unbiased index.
-         */
-        int32_t offset_bias_correction;
-        u32 zero1;
-
-        /* Like many other strictly nonzero quantities, index_count is
-         * subtracted by one. For an indexed cube, this is equal to 35 = 6
-         * faces * 2 triangles/per face * 3 vertices/per triangle - 1. That is,
-         * for an indexed draw, index_count is the number of actual vertices
-         * rendered whereas invocation_count is the number of unique vertices
-         * rendered (the number of times the vertex shader must be invoked).
-         * For non-indexed draws, this is just equal to invocation_count. */
-
-        u32 index_count;
-
-        /* No hidden structure; literally just a pointer to an array of uint
-         * indices (width depends on flags). Thanks, guys, for not making my
-         * life insane for once! NULL for non-indexed draws. */
-
-        u64 indices;
+        struct mali_primitive_packed primitive;
 } __attribute__((packed));
 
 /* Point size / line width can either be specified as a 32-bit float (for
diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c
index 3fa3da0..78b0a54 100644
--- a/src/panfrost/lib/decode.c
+++ b/src/panfrost/lib/decode.c
@@ -1299,49 +1299,30 @@
                         size_x, size_y, size_z,
                         groups_x, groups_y, groups_z);
 
-        /* TODO: Decode */
-        if (p->unknown_draw)
-                pandecode_prop("unknown_draw = 0x%" PRIx32, p->unknown_draw);
-
-        pandecode_prop("workgroups_x_shift_3 = 0x%" PRIx32, p->workgroups_x_shift_3);
-
-        if (p->draw_mode != MALI_DRAW_MODE_NONE)
-                pandecode_prop("draw_mode = %s", mali_draw_mode_as_str(p->draw_mode));
-
-        /* Index count only exists for tiler jobs anyway */
-
-        if (p->index_count)
-                pandecode_prop("index_count = MALI_POSITIVE(%" PRId32 ")", p->index_count + 1);
-
-
-        unsigned index_raw_size = (p->unknown_draw & MALI_DRAW_INDEXED_SIZE);
-        index_raw_size >>= MALI_DRAW_INDEXED_SHIFT;
+        fprintf(pandecode_dump_stream, "Primitive\n");
+        struct MALI_PRIMITIVE primitive;
+        struct mali_primitive_packed prim_packed = p->primitive;
+        MALI_PRIMITIVE_unpack((const uint8_t *) &prim_packed, &primitive);
+        MALI_PRIMITIVE_print(pandecode_dump_stream, &primitive, 1 * 2);
 
         /* Validate an index buffer is present if we need one. TODO: verify
          * relationship between invocation_count and index_count */
 
-        if (p->indices) {
-                unsigned count = p->index_count;
-
+        if (primitive.indices) {
                 /* Grab the size */
-                unsigned size = (index_raw_size == 0x3) ? 4 : index_raw_size;
+                unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32) ?
+                        sizeof(uint32_t) : primitive.index_type;
 
                 /* Ensure we got a size, and if so, validate the index buffer
                  * is large enough to hold a full set of indices of the given
                  * size */
 
-                if (!index_raw_size)
+                if (!size)
                         pandecode_msg("XXX: index size missing\n");
                 else
-                        pandecode_validate_buffer(p->indices, count * size);
-        } else if (index_raw_size)
-                pandecode_msg("XXX: unexpected index size %u\n", index_raw_size);
-
-        if (p->offset_bias_correction)
-                pandecode_prop("offset_bias_correction = %d", p->offset_bias_correction);
-
-        /* TODO: Figure out what this is. It's not zero */
-        pandecode_prop("zero1 = 0x%" PRIx32, p->zero1);
+                        pandecode_validate_buffer(primitive.indices, primitive.index_count * size);
+        } else if (primitive.index_type)
+                pandecode_msg("XXX: unexpected index size\n");
 
         pandecode_indent--;
         pandecode_log("},\n");
@@ -2092,8 +2073,11 @@
         pandecode_vertex_tiler_prefix(&v->prefix, job_no, is_graphics);
         pandecode_vertex_tiler_postfix(&v->postfix, job_no, false);
 
-        bool has_primitive_pointer = v->prefix.unknown_draw & MALI_DRAW_VARYING_SIZE;
-        pandecode_primitive_size(v->primitive_size, !has_primitive_pointer);
+        struct MALI_PRIMITIVE primitive;
+        struct mali_primitive_packed prim_packed = v->prefix.primitive;
+        MALI_PRIMITIVE_unpack((const uint8_t *) &prim_packed, &primitive);
+
+        pandecode_primitive_size(v->primitive_size, primitive.point_size_array == 0);
 
         pandecode_indent--;
         pandecode_log("};\n");
diff --git a/src/panfrost/lib/midgard.xml b/src/panfrost/lib/midgard.xml
index af20957..ee887e7 100644
--- a/src/panfrost/lib/midgard.xml
+++ b/src/panfrost/lib/midgard.xml
@@ -201,6 +201,13 @@
     <value name="Layered" value="3"/>
   </enum>
 
+  <enum name="Index Type">
+    <value name="None" value="0"/>
+    <value name="UINT8" value="1"/>
+    <value name="UINT16" value="2"/>
+    <value name="UINT32" value="3"/>
+  </enum>
+
   <enum name="Occlusion Mode">
     <value name="Disabled" value="0"/>
     <value name="Predicate" value="1"/>
@@ -290,6 +297,23 @@
     <field name="Shader" size="64" start="2:0" type="address"/>
   </struct>
 
+  <struct name="Primitive">
+    <field name="Draw mode" size="4" start="0:0" type="Draw Mode" default="None"/>
+    <field name="Index type" size="2" start="0:8" type="Index Type" default="None"/>
+    <field name="Point size array" size="1" start="0:12" type="bool"/>
+    <!--- TODO -->
+    <field name="First provoking vertex" size="1" start="0:15" type="bool" default="true"/>
+    <field name="Unknown 1" size="1" start="0:16" type="bool" default="true"/>
+    <field name="Unknown 2" size="1" start="0:17" type="bool" default="true"/>
+    <!--- TODO -->
+    <field name="Primitive restart" size="1" start="0:20" type="bool"/>
+    <field name="Unknown 3" size="6" start="0:26" type="uint"/>
+    <field name="Base vertex offset" size="32" start="1:0" type="uint"/>
+    <!--- TODO: word 2 -->
+    <field name="Index count" size="32" start="3:0" type="uint" modifier="minus(1)"/>
+    <field name="Indices" size="64" start="4:0" type="address"/>
+  </struct>
+
   <struct name="Draw" size="30">
     <field name="Unknown 1" size="3" start="0:0" type="uint"/>
     <field name="Occlusion query" size="2" start="0:3" type="Occlusion Mode" default="Disabled"/>
diff --git a/src/panfrost/lib/pan_blit.c b/src/panfrost/lib/pan_blit.c
index c1b704b..f9ca87e 100644
--- a/src/panfrost/lib/pan_blit.c
+++ b/src/panfrost/lib/pan_blit.c
@@ -343,14 +343,8 @@
                 }
         }
 
-        struct midgard_payload_vertex_tiler payload = {
-                .prefix = {
-                        .draw_mode = MALI_DRAW_MODE_TRIANGLES,
-                        .unknown_draw = 0x3000,
-                        .index_count = MALI_POSITIVE(vertex_count)
-                },
-        };
-
+        struct midgard_payload_vertex_tiler payload = {};
+        struct mali_primitive_packed primitive;
         struct mali_draw_packed draw;
 
         pan_pack(&draw, DRAW, cfg) {
@@ -365,10 +359,16 @@
                 cfg.shared = fbd;
         }
 
+        pan_pack(&primitive, PRIMITIVE, cfg) {
+                cfg.draw_mode = MALI_DRAW_MODE_TRIANGLES;
+                cfg.index_count = vertex_count;
+                cfg.unknown_3 = 6;
+        }
+
+        memcpy(&payload.prefix.primitive, &primitive, MALI_DRAW_LENGTH);
         memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH);
 
         panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true);
-        payload.prefix.workgroups_x_shift_3 = 6;
 
         panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
 }
diff --git a/src/panfrost/lib/pan_invocation.c b/src/panfrost/lib/pan_invocation.c
index d86b16a..cfb5bec 100644
--- a/src/panfrost/lib/pan_invocation.c
+++ b/src/panfrost/lib/pan_invocation.c
@@ -106,9 +106,6 @@
         /* Upload the packed bitfields */
         out->invocation_count = packed;
         out->invocation_shifts = packed_shifts;
-
-        /* TODO: Compute workgroups_x_shift_3 */
-        out->workgroups_x_shift_3 = shift_2;
 }
 
 /* Packs vertex/tiler descriptors simultaneously */
@@ -128,9 +125,5 @@
         /* Copy results over */
         tiler->invocation_count = vertex->invocation_count;
         tiler->invocation_shifts = vertex->invocation_shifts;
-
-        /* Set special fields for each */
-        vertex->workgroups_x_shift_3 = 5;
-        tiler->workgroups_x_shift_3 = 6;
 }