v3dv: vertex input support

This includes:

   * Implementation for CmdBindVertexBuffers

   * Gather vertex input info during CreateGraphicsPipelines
     (pipeline_init) and SHADER_STATE_ATTRIBUTE_RECORD prepacking

   * Final emission of such packet during CmdDraw
     (cmd_buffer_emit_graphics_pipeline)

Default attributes values will be handled on a following patch.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index cc29ba8..442cbf7 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -1526,18 +1526,8 @@
    job->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl;
    job->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl;
 
-   /* FIXME: fake vtx->num_elements, that is the vertex state that includes
-    * data from the buffers used on the vertex. Such info is still not
-    * supported or filled in any place. On Gallium that is filled by
-    * st_update_array, that eventually calls v3d_vertex_state_create
-    *
-    * We area handling it mostly to the GFXH-930 workaround mentioned below,
-    * as it would provide more context of why it is needed and to the code.
-    */
-   uint32_t vtx_num_elements = 0;
-
    /* See GFXH-930 workaround below */
-   uint32_t num_elements_to_emit = MAX2(vtx_num_elements, 1);
+   uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1);
 
    uint32_t shader_rec_offset =
       v3dv_cl_ensure_space(&job->indirect,
@@ -1570,16 +1560,53 @@
       shader.vertex_shader_uniforms_address = vs_uniforms;
       shader.fragment_shader_uniforms_address = fs_uniforms;
 
-      /* FIXME: I understand that the following is needed only if
-       * vtx_num_elements > 0
-       */
-/*       shader.address_of_default_attribute_values = */
+      /* FIXME: pending */
+      /* shader.address_of_default_attribute_values = */
    }
 
    /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */
+   bool cs_loaded_any = false;
+   const uint32_t packet_length =
+      cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
 
-   /* FIXME: vertex elements not supported yet (vtx_num_elements == 0) */
-   if (vtx_num_elements == 0) {
+   for (uint32_t i = 0; i < pipeline->va_count; i++) {
+      uint32_t binding = pipeline->va[i].binding;
+      uint32_t location = pipeline->va[i].driver_location;
+
+      struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding];
+
+      cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD,
+                             &pipeline->vertex_attrs[i * packet_length], attr) {
+
+         assert(c_vb->buffer->mem->bo);
+         attr.address = v3dv_cl_address(c_vb->buffer->mem->bo,
+                                        c_vb->buffer->mem_offset +
+                                        pipeline->va[i].offset +
+                                        c_vb->offset);
+
+         attr.number_of_values_read_by_coordinate_shader =
+            pipeline->vs_bin->prog_data.vs->vattr_sizes[location];
+         attr.number_of_values_read_by_vertex_shader =
+            pipeline->vs->prog_data.vs->vattr_sizes[location];
+
+         /* GFXH-930: At least one attribute must be enabled and read by CS
+          * and VS.  If we have attributes being consumed by the VS but not
+          * the CS, then set up a dummy load of the last attribute into the
+          * CS's VPM inputs.  (Since CS is just dead-code-elimination compared
+          * to VS, we can't have CS loading but not VS).
+          */
+         if (pipeline->vs_bin->prog_data.vs->vattr_sizes[location])
+            cs_loaded_any = true;
+
+         if (binding == pipeline->va_count - 1 && !cs_loaded_any) {
+            attr.number_of_values_read_by_coordinate_shader = 1;
+         }
+
+         attr.maximum_index = 0xffffff;
+      }
+   }
+
+   if (pipeline->va_count == 0) {
       /* GFXH-930: At least one attribute must be enabled and read
        * by CS and VS.  If we have no attributes being consumed by
        * the shader, set up a dummy to be loaded into the VPM.
@@ -1673,7 +1700,8 @@
    uint32_t states = cmd_buffer->state.dirty;
    struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
 
-   if (states & (V3DV_CMD_DIRTY_PIPELINE)) {
+   /* vertex buffers info are emitted as part of the shader_state_record */
+   if (states & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_VERTEX_BUFFER)) {
       cmd_buffer_emit_graphics_pipeline(cmd_buffer);
    }
    /* Emit(flush) dynamic state */
@@ -1731,3 +1759,26 @@
 
    v3dv_cmd_buffer_finish_job(cmd_buffer);
 }
+
+void
+v3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
+                          uint32_t firstBinding,
+                          uint32_t bindingCount,
+                          const VkBuffer *pBuffers,
+                          const VkDeviceSize *pOffsets)
+{
+   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct v3dv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
+
+   /* We have to defer setting up vertex buffer since we need the buffer
+    * stride from the pipeline.
+    */
+
+   assert(firstBinding + bindingCount <= MAX_VBS);
+   for (uint32_t i = 0; i < bindingCount; i++) {
+      vb[firstBinding + i].buffer = v3dv_buffer_from_handle(pBuffers[i]);
+      vb[firstBinding + i].offset = pOffsets[i];
+   }
+
+   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VERTEX_BUFFER;
+}
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
index 07f3d52..ff400d4 100644
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@@ -500,7 +500,6 @@
    const uint32_t max_sampled_images = 16;
    const uint32_t max_storage_images = 4;
 
-   const uint32_t max_vertex_attributes = 16;
    const uint32_t max_varying_components = 16 * 4;
    const uint32_t max_render_targets = 4;
 
@@ -547,8 +546,8 @@
       .maxDescriptorSetInputAttachments         = 4,
 
       /* Vertex limits */
-      .maxVertexInputAttributes                 = max_vertex_attributes,
-      .maxVertexInputBindings                   = max_vertex_attributes,
+      .maxVertexInputAttributes                 = MAX_VERTEX_ATTRIBS,
+      .maxVertexInputBindings                   = MAX_VBS,
       .maxVertexInputAttributeOffset            = 0xffffffff,
       .maxVertexInputBindingStride              = 0xffffffff,
       .maxVertexOutputComponents                = max_varying_components,
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 18fea2a..98bb6c5 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -983,6 +983,87 @@
    }
 }
 
+/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
+static uint8_t
+get_attr_type(const struct util_format_description *desc)
+{
+   uint32_t r_size = desc->channel[0].size;
+   uint8_t attr_type = ATTRIBUTE_FLOAT;
+
+   switch (desc->channel[0].type) {
+   case UTIL_FORMAT_TYPE_FLOAT:
+      if (r_size == 32) {
+         attr_type = ATTRIBUTE_FLOAT;
+      } else {
+         assert(r_size == 16);
+         attr_type = ATTRIBUTE_HALF_FLOAT;
+      }
+      break;
+
+   case UTIL_FORMAT_TYPE_SIGNED:
+   case UTIL_FORMAT_TYPE_UNSIGNED:
+      switch (r_size) {
+      case 32:
+         attr_type = ATTRIBUTE_INT;
+         break;
+      case 16:
+         attr_type = ATTRIBUTE_SHORT;
+         break;
+      case 10:
+         attr_type = ATTRIBUTE_INT2_10_10_10;
+         break;
+      case 8:
+         attr_type = ATTRIBUTE_BYTE;
+         break;
+      default:
+         fprintf(stderr,
+                 "format %s unsupported\n",
+                 desc->name);
+         attr_type = ATTRIBUTE_BYTE;
+         abort();
+      }
+      break;
+
+   default:
+      fprintf(stderr,
+              "format %s unsupported\n",
+              desc->name);
+      abort();
+   }
+
+   return attr_type;
+}
+
+static void
+pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
+                                   uint32_t index,
+                                   const VkVertexInputAttributeDescription *vi_desc)
+{
+   const uint32_t packet_length =
+      cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+
+   const struct util_format_description *desc =
+      vk_format_description(vi_desc->format);
+
+   uint32_t binding = vi_desc->binding;
+
+   v3dv_pack(&pipeline->vertex_attrs[index * packet_length],
+             GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+
+      /* vec_size == 0 means 4 */
+      attr.vec_size = desc->nr_channels & 3;
+      attr.signed_int_type = (desc->channel[0].type ==
+                              UTIL_FORMAT_TYPE_SIGNED);
+      attr.normalized_int_type = desc->channel[0].normalized;
+      attr.read_as_int_uint = desc->channel[0].pure_integer;
+
+      attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
+                                   0xffff);
+      attr.stride = pipeline->vb[binding].stride;
+      attr.type = get_attr_type(desc);
+   }
+}
+
 static VkResult
 pipeline_init(struct v3dv_pipeline *pipeline,
               struct v3dv_device *device,
@@ -1028,6 +1109,41 @@
    pack_shader_state_record(pipeline);
    pack_vcm_cache_size(pipeline);
 
+   const VkPipelineVertexInputStateCreateInfo *vi_info =
+      pCreateInfo->pVertexInputState;
+
+   pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
+   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
+      const VkVertexInputBindingDescription *desc =
+         &vi_info->pVertexBindingDescriptions[i];
+
+      pipeline->vb[desc->binding].stride = desc->stride;
+      pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
+   }
+
+   pipeline->va_count = 0;
+   nir_shader *shader = pipeline->vs->nir;
+
+   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
+      const VkVertexInputAttributeDescription *desc =
+         &vi_info->pVertexAttributeDescriptions[i];
+      uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
+
+      nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_in, location);
+
+      if (var != NULL) {
+         unsigned driver_location = var->data.driver_location;
+
+         pipeline->va[pipeline->va_count].offset = desc->offset;
+         pipeline->va[pipeline->va_count].binding = desc->binding;
+         pipeline->va[pipeline->va_count].driver_location = driver_location;
+
+         pack_shader_state_attribute_record(pipeline, pipeline->va_count, desc);
+
+         pipeline->va_count++;
+      }
+   }
+
    return result;
 }
 
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index d183c12..7c66b4b 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -110,6 +110,9 @@
 #define MAX_VIEWPORTS 1
 #define MAX_SCISSORS  1
 
+#define MAX_VBS 16
+#define MAX_VERTEX_ATTRIBS 16
+
 struct v3dv_instance;
 
 #ifdef USE_V3D_SIMULATOR
@@ -434,6 +437,7 @@
    V3DV_CMD_DIRTY_DYNAMIC_SCISSOR                   = 1 << 1,
    V3DV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 2) - 1,
    V3DV_CMD_DIRTY_PIPELINE                          = 1 << 2,
+   V3DV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 3,
 };
 
 
@@ -478,6 +482,11 @@
 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
 void v3dv_job_emit_binning_flush(struct v3dv_job *job);
 
+struct v3dv_vertex_binding {
+   struct v3dv_buffer *buffer;
+   VkDeviceSize offset;
+};
+
 struct v3dv_cmd_buffer_state {
    const struct v3dv_render_pass *pass;
    const struct v3dv_framebuffer *framebuffer;
@@ -495,6 +504,8 @@
 
    uint32_t attachment_count;
    struct v3dv_cmd_buffer_attachment_state *attachments;
+
+   struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
 };
 
 struct v3dv_cmd_buffer {
@@ -634,6 +645,32 @@
 
    struct v3dv_dynamic_state dynamic_state;
 
+   /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
+    * array with such binding
+    */
+   struct v3dv_pipeline_vertex_binding {
+      uint32_t stride;
+      uint32_t instance_divisor;
+   } vb[MAX_VBS];
+   uint32_t vb_count;
+
+   /* Note that a lot of info from VkVertexInputAttributeDescription is
+    * already prepacked, so storing here only those that need recheck later
+    *
+    * Note that they are not indexed by the location or nir driver location,
+    * as we are defining here only the inputs that the shader are really
+    * using.
+    */
+   struct v3dv_pipeline_vertex_attrib {
+      uint32_t binding;
+      uint32_t offset;
+      /* We store driver_location instead of location because most v3d structs
+       * are indexed by driver_location
+       */
+      uint32_t driver_location;
+   } va[MAX_VERTEX_ATTRIBS];
+   uint32_t va_count;
+
    struct vpm_config vpm_cfg;
    struct vpm_config vpm_cfg_bin;
    /* Packets prepacked during pipeline creation
@@ -641,6 +678,8 @@
    uint8_t cfg_bits[cl_packet_length(CFG_BITS)];
    uint8_t shader_state_record[cl_packet_length(GL_SHADER_STATE_RECORD)];
    uint8_t vcm_cache_size[cl_packet_length(VCM_CACHE_SIZE)];
+   uint8_t vertex_attrs[cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD) *
+                        (MAX_VBS / 4)];
 };
 
 uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev);