v3d/compiler: implement nir_intrinsic_load_base_instance

Vulkan lowers gl_InstanceIndex to load_base_instance +
load_instance_id, so we need to implement loading the base instance in
the compiler.

The base instance is set by the BASE_VERTEX_BASE_INSTANCE command
right before the instanced draw call and it is included in the VPM
payload together with the InstanceID and VertexID if this is requested
by the shader record.

Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 475f5cd..12dcfe9 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1560,10 +1560,14 @@
         bool uses_iid = c->s->info.system_values_read &
                 (1ull << SYSTEM_VALUE_INSTANCE_ID |
                  1ull << SYSTEM_VALUE_INSTANCE_INDEX);
+        bool uses_biid = c->s->info.system_values_read &
+                (1ull << SYSTEM_VALUE_BASE_INSTANCE);
         bool uses_vid = c->s->info.system_values_read &
                 (1ull << SYSTEM_VALUE_VERTEX_ID |
                  1ull << SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
+
         num_components += uses_iid;
+        num_components += uses_biid;
         num_components += uses_vid;
 
         for (int i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
@@ -1574,6 +1578,11 @@
                                            &num_components, ~0);
         }
 
+        if (uses_biid) {
+                c->biid = ntq_emit_vpm_read(c, &vpm_components_queued,
+                                            &num_components, ~0);
+        }
+
         if (uses_vid) {
                 c->vid = ntq_emit_vpm_read(c, &vpm_components_queued,
                                            &num_components, ~0);
@@ -2003,6 +2012,10 @@
                       index++;
                }
                if (c->s->info.system_values_read &
+                   (1ull << SYSTEM_VALUE_BASE_INSTANCE)) {
+                      index++;
+               }
+               if (c->s->info.system_values_read &
                    (1ull << SYSTEM_VALUE_VERTEX_ID)) {
                       index++;
                }
@@ -2252,6 +2265,10 @@
                                        vir_REVF(c)));
                 break;
 
+        case nir_intrinsic_load_base_instance:
+                ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, c->biid));
+                break;
+
         case nir_intrinsic_load_instance_id:
                 ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, c->iid));
                 break;
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index b829173..e17aa4c 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -585,9 +585,15 @@
         struct qreg iid;
 
         /**
-         * Vertex ID, which comes in before the vertex attribute payload
+         * Base Instance ID, which comes in before the vertex attribute payload
          * (after Instance ID) if the shader record requests it.
          */
+        struct qreg biid;
+
+        /**
+         * Vertex ID, which comes in before the vertex attribute payload
+         * (after Base Instance) if the shader record requests it.
+         */
         struct qreg vid;
 
         /* Fragment shader payload regs. */
@@ -714,7 +720,7 @@
 struct v3d_vs_prog_data {
         struct v3d_prog_data base;
 
-        bool uses_iid, uses_vid;
+        bool uses_iid, uses_biid, uses_vid;
 
         /* Number of components read from each vertex attribute. */
         uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 0c837fd..39cae23 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -639,12 +639,17 @@
                                (1ull << SYSTEM_VALUE_VERTEX_ID |
                                 1ull << SYSTEM_VALUE_VERTEX_ID_ZERO_BASE));
 
+        prog_data->uses_biid = (c->s->info.system_values_read &
+                                (1ull << SYSTEM_VALUE_BASE_INSTANCE));
+
         prog_data->uses_iid = (c->s->info.system_values_read &
                                (1ull << SYSTEM_VALUE_INSTANCE_ID |
                                 1ull << SYSTEM_VALUE_INSTANCE_INDEX));
 
         if (prog_data->uses_vid)
                 prog_data->vpm_input_size++;
+        if (prog_data->uses_biid)
+                prog_data->vpm_input_size++;
         if (prog_data->uses_iid)
                 prog_data->vpm_input_size++;