panfrost: XMLify Bifrost preload

There's a lot of code here since the meaning of this field changes
depending on shader state. The good news is that our careful handling
allows preload registers to be decoded now, which pandecode could not
previously do. Likewise, the cmdstream code to emit this is now much
more obvious.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6440>
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 8f9771b..6204251 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -323,16 +323,22 @@
 
         if (dev->quirks & IS_BIFROST) {
                 struct mali_bifrost_properties_packed prop;
+                struct mali_preload_vertex_packed preload;
 
                 pan_pack(&prop, BIFROST_PROPERTIES, cfg) {
                         cfg.unknown = 0x800000; /* XXX */
                         cfg.uniform_buffer_count = panfrost_ubo_count(ctx, st);
                 }
 
-                memcpy(&meta->bifrost_props, &prop, sizeof(prop));
+                /* TODO: True compute shaders */
+                pan_pack(&preload, PRELOAD_VERTEX, cfg) {
+                        cfg.uniform_count = ss->uniform_count;
+                        cfg.vertex_id = true;
+                        cfg.instance_id = true;
+                }
 
-                meta->bifrost2.preload_regs = 0xC0;
-                meta->bifrost2.uniform_count = ss->uniform_count;
+                memcpy(&meta->bifrost_props, &prop, sizeof(prop));
+                memcpy(&meta->bifrost_preload, &preload, sizeof(preload));
         } else {
                 struct mali_midgard_properties_packed prop;
 
@@ -572,6 +578,7 @@
 
         if (dev->quirks & IS_BIFROST) {
                 struct mali_bifrost_properties_packed prop;
+                struct mali_preload_fragment_packed preload;
 
                 bool no_blend = true;
 
@@ -584,12 +591,13 @@
                         cfg.early_z_enable = !fs->can_discard && !fs->writes_depth && no_blend;
                 }
 
+                pan_pack(&preload, PRELOAD_FRAGMENT, cfg) {
+                        cfg.uniform_count = fs->uniform_count;
+                        cfg.fragment_position = fs->reads_frag_coord;
+                }
+
                 memcpy(&fragmeta->bifrost_props, &prop, sizeof(prop));
-
-                fragmeta->bifrost2.preload_regs = 0x1;
-                SET_BIT(fragmeta->bifrost2.preload_regs, 0x10, fs->reads_frag_coord);
-
-                fragmeta->bifrost2.uniform_count = fs->uniform_count;
+                memcpy(&fragmeta->bifrost_preload, &preload, sizeof(preload));
         } else {
                 struct mali_midgard_properties_packed prop;
 
diff --git a/src/panfrost/bifrost/test/bi_submit.c b/src/panfrost/bifrost/test/bi_submit.c
index 3688e2b..542ed72 100644
--- a/src/panfrost/bifrost/test/bi_submit.c
+++ b/src/panfrost/bifrost/test/bi_submit.c
@@ -176,12 +176,7 @@
                 .attribute_count = 1,
                 .varying_count = 1,
                 .bifrost_props = { .opaque = { 0x80020001 } },
-                .bifrost2 = {
-                        .unk3 = 0x0,
-                        .preload_regs = 0xc0,
-                        .uniform_count = sz_ubo / 16,
-                        .unk4 = 0x0,
-                },
+                .bifrost_preload = { .opaque = { (sz_ubo / 16) << 15 } },
         };
 
         memcpy(shader_desc->cpu, &meta, sizeof(meta));
diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h
index af70f56..e226706 100644
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@@ -326,52 +326,7 @@
         struct mali_stencil_packed stencil_back;
 
         union {
-                struct {
-                        u32 unk3 : 7;
-                        /* On Bifrost, some system values are preloaded in
-                         * registers R55-R62 by the thread dispatcher prior to
-                         * the start of shader execution. This is a bitfield
-                         * with one entry for each register saying which
-                         * registers need to be preloaded. Right now, the known
-                         * values are:
-                         *
-                         * Vertex/compute:
-                         * - R55 : gl_LocalInvocationID.xy
-                         * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits
-                         * - R57 : gl_WorkGroupID.x
-                         * - R58 : gl_WorkGroupID.y
-                         * - R59 : gl_WorkGroupID.z
-                         * - R60 : gl_GlobalInvocationID.x
-                         * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base)
-                         * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base)
-                         *
-                         * Fragment:
-                         * - R55 : unknown, never seen (but the bit for this is
-                         *   always set?)
-                         * - R56 : unknown (bit always unset)
-                         * - R57 : gl_PrimitiveID
-                         * - R58 : gl_FrontFacing in low bit, potentially other stuff
-                         * - R59 : u16 fragment coordinates (used to compute
-                         *   gl_FragCoord.xy, together with sample positions)
-                         * - R60 : gl_SampleMask (used in epilog, so pretty
-                         *   much always used, but the bit is always 0 -- is
-                         *   this just always pushed?)
-                         * - R61 : gl_SampleMaskIn and gl_SampleID, used by
-                         *   varying interpolation.
-                         * - R62 : unknown (bit always unset).
-                         *
-                         * Later GPUs (starting with Mali-G52?) support
-                         * preloading float varyings into r0-r7. This is
-                         * indicated by setting 0x40. There is no distinction
-                         * here between 1 varying and 2.
-                         */
-                        u32 preload_regs : 8;
-                        /* In units of 8 bytes or 64 bits, since the
-                         * uniform/const port loads 64 bits at a time.
-                         */
-                        u32 uniform_count : 7;
-                        u32 unk4 : 10; // = 2
-                } bifrost2;
+                struct mali_preload_packed bifrost_preload;
                 struct {
                         u32 unknown2_7;
                 } midgard2;
diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c
index 6b2e71a..fcfc590 100644
--- a/src/panfrost/lib/decode.c
+++ b/src/panfrost/lib/decode.c
@@ -1731,6 +1731,7 @@
 
                 struct MALI_MIDGARD_PROPERTIES midg_props;
                 struct MALI_BIFROST_PROPERTIES bi_props;
+                struct MALI_PRELOAD bi_preload;
 
                 pandecode_log("struct mali_shader_meta shader_meta_%"PRIx64"_%d%s = {\n", p->shader, job_no, suffix);
                 pandecode_indent++;
@@ -1745,7 +1746,10 @@
                         uint32_t opaque = s->bifrost_props.opaque[0];
                         MALI_BIFROST_PROPERTIES_unpack((const uint8_t *) &opaque, &bi_props);
 
-                        uniform_count = s->bifrost2.uniform_count;
+                        opaque = s->bifrost_preload.opaque[0];
+                        MALI_PRELOAD_unpack((const uint8_t *) &opaque, &bi_preload);
+
+                        uniform_count = bi_preload.uniform_count;
                         uniform_buffer_count = bi_props.uniform_buffer_count;
                 } else {
                         uint32_t opaque = s->midgard_props.opaque[0];
@@ -1767,6 +1771,24 @@
                 else
                         MALI_MIDGARD_PROPERTIES_print(pandecode_dump_stream, &midg_props, 2);
 
+                if (is_bifrost) {
+                        uint32_t opaque = s->bifrost_preload.opaque[0];
+                        switch (job_type) {
+                        case MALI_JOB_TYPE_VERTEX:
+                                DUMP_CL("Preload", PRELOAD_VERTEX, &opaque, 2);
+                                break;
+                        case MALI_JOB_TYPE_TILER:
+                                DUMP_CL("Preload", PRELOAD_FRAGMENT, &opaque, 2);
+                                break;
+                        case MALI_JOB_TYPE_COMPUTE:
+                                DUMP_CL("Preload", PRELOAD_COMPUTE, &opaque, 2);
+                                break;
+                        default:
+                                DUMP_CL("Preload", PRELOAD, &opaque, 2);
+                                break;
+                        }
+                }
+
                 if (s->depth_units || s->depth_factor) {
                         pandecode_prop("depth_factor = %f", s->depth_factor);
                         pandecode_prop("depth_units = %f", s->depth_units);
@@ -1809,18 +1831,7 @@
                 DUMP_CL("Stencil front", STENCIL, &s->stencil_front, 1);
                 DUMP_CL("Stencil back", STENCIL, &s->stencil_back, 1);
 
-                if (is_bifrost) {
-                        pandecode_log(".bifrost2 = {\n");
-                        pandecode_indent++;
-
-                        pandecode_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3);
-                        pandecode_prop("preload_regs = 0x%" PRIx32, s->bifrost2.preload_regs);
-                        pandecode_prop("uniform_count = %" PRId32, s->bifrost2.uniform_count);
-                        pandecode_prop("unk4 = 0x%" PRIx32, s->bifrost2.unk4);
-
-                        pandecode_indent--;
-                        pandecode_log("},\n");
-                } else if (s->midgard2.unknown2_7) {
+                if (!is_bifrost && s->midgard2.unknown2_7) {
                         pandecode_log(".midgard2 = {\n");
                         pandecode_indent++;
 
diff --git a/src/panfrost/lib/midgard.xml b/src/panfrost/lib/midgard.xml
index 352398b..d6fa7e4 100644
--- a/src/panfrost/lib/midgard.xml
+++ b/src/panfrost/lib/midgard.xml
@@ -363,6 +363,38 @@
     <field name="Unknown" size="32" start="0" type="uint"/>
   </struct>
 
+  <struct name="Preload" size="1">
+    <field name="Untyped" size="15" start="0" type="uint"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
+  <struct name="Preload Compute" size="1">
+    <field name="Local Invocation XY" size="1" start="7" type="bool"/>
+    <field name="Local Invocation Z" size="1" start="8" type="bool"/>
+    <field name="Work group X" size="1" start="9" type="bool"/>
+    <field name="Work group Y" size="1" start="10" type="bool"/>
+    <field name="Work group Z" size="1" start="11" type="bool"/>
+    <field name="Global Invocation X" size="1" start="12" type="bool"/>
+    <field name="Global Invocation Y" size="1" start="13" type="bool"/>
+    <field name="Global Invocation Z" size="1" start="14" type="bool"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
+  <struct name="Preload Vertex" size="1">
+    <field name="Vertex ID" size="1" start="13" type="bool"/>
+    <field name="Instance ID" size="1" start="14" type="bool"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
+  <struct name="Preload Fragment" size="1">
+    <field name="Unknown" size="1" start="7" type="bool" default="true"/>
+    <field name="Primitive ID" size="1" start="9" type="bool"/>
+    <field name="Front facing" size="1" start="10" type="bool"/>
+    <field name="Fragment position" size="1" start="11" type="bool"/>
+    <field name="Sample mask/ID" size="1" start="12" type="bool"/>
+    <field name="Uniform count" size="7" start="15" type="uint"/>
+  </struct>
+
   <struct name="Stencil">
     <field name="Reference Value" size="8" start="0" type="uint"/>
     <field name="Mask" size="8" start="8" type="uint" default="0xFF"/>