v3dv: partial prepack of the gl_shader_state_record

We can't prepack all the record, as addresses need the job, and
uniforms depend on dynamic value.

Also due cl_emit_with_prepacked and v3dv_pack asserting correct
values, we need to define two values twice, that lead to move
vpm_config to the pipeline. In any case, the latter will be useful
when we start to prepack more stuff.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/vulkan/v3dv_cl.h b/src/broadcom/vulkan/v3dv_cl.h
index 1784f8e..c0b0e38 100644
--- a/src/broadcom/vulkan/v3dv_cl.h
+++ b/src/broadcom/vulkan/v3dv_cl.h
@@ -155,6 +155,23 @@
                 _loop_terminate = NULL;                          \
         }))                                                      \
 
+#define cl_emit_with_prepacked(cl, packet, prepacked, name)      \
+        for (struct cl_packet_struct(packet) name = {            \
+                cl_packet_header(packet)                         \
+        },                                                       \
+        *_loop_terminate = &name;                                \
+        __builtin_expect(_loop_terminate != NULL, 1);            \
+        ({                                                       \
+                struct v3dv_cl_out *cl_out = cl_start(cl);        \
+                uint8_t packed[cl_packet_length(packet)];         \
+                cl_packet_pack(packet)(cl, packed, &name);       \
+                for (int _i = 0; _i < cl_packet_length(packet); _i++) \
+                        ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \
+                cl_advance(&cl_out, cl_packet_length(packet));   \
+                cl_end(cl, cl_out);                              \
+                _loop_terminate = NULL;                          \
+        }))                                                      \
+
 /**
  * Helper function called by the XML-generated pack functions for filling in
  * an address field in shader records.
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index fd0984f..81131d8 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -1501,20 +1501,6 @@
    }
 }
 
-/* FIXME: in fact this is not really required at this point, as we don't plan
- * to initially support GS, but it is more readable and serves as a
- * placeholder, to have the struct and fill it with default values.
- */
-struct vpm_config {
-   uint32_t As;
-   uint32_t Vc;
-   uint32_t Gs;
-   uint32_t Gd;
-   uint32_t Gv;
-   uint32_t Ve;
-   uint32_t gs_width;
-};
-
 static void
 cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer)
 {
@@ -1560,54 +1546,18 @@
                            cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
                            32);
 
-   struct vpm_config vpm_cfg_bin, vpm_cfg;
+   cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
+                          pipeline->shader_state_record, shader) {
 
-   /* FIXME: values below are default when non-GS is available. Would need to
-    * provide real values if GS gets supported
-    */
-   vpm_cfg_bin.As = 1;
-   vpm_cfg_bin.Ve = 0;
-   vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size;
-
-   vpm_cfg.As = 1;
-   vpm_cfg.Ve = 0;
-   vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size;
-
-   cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
-      shader.enable_clipping = true;
-
-      shader.point_size_in_shaded_vertex_data =
-         pipeline->vs->key.vs.per_vertex_point_size;
-
-      /* Must be set if the shader modifies Z, discards, or modifies
-       * the sample mask.  For any of these cases, the fragment
-       * shader needs to write the Z value (even just discards).
+      /* FIXME: we are setting this values here and during the
+       * prepacking. This is because both cl_emit_with_prepacked and v3dv_pack
+       * asserts for minimum values of these. It would be good to get
+       * v3dv_pack to assert on the final value if possible
        */
-      shader.fragment_shader_does_z_writes =
-         pipeline->fs->prog_data.fs->writes_z;
-      /* Set if the EZ test must be disabled (due to shader side
-       * effects and the early_z flag not being present in the
-       * shader).
-       */
-      shader.turn_off_early_z_test =
-         pipeline->fs->prog_data.fs->disable_ez;
-
-      shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
-         pipeline->fs->prog_data.fs->uses_center_w;
-
-      shader.any_shader_reads_hardware_written_primitive_id = false;
-
-      shader.do_scoreboard_wait_on_first_thread_switch =
-         pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw;
-      shader.disable_implicit_point_line_varyings =
-         !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings;
-
-      shader.number_of_varyings_in_fragment_shader =
-         pipeline->fs->prog_data.fs->num_inputs;
-
-      shader.coordinate_shader_propagate_nans = true;
-      shader.vertex_shader_propagate_nans = true;
-      shader.fragment_shader_propagate_nans = true;
+      shader.min_coord_shader_input_segments_required_in_play =
+         pipeline->vpm_cfg_bin.As;
+      shader.min_vertex_shader_input_segments_required_in_play =
+         pipeline->vpm_cfg.As;
 
       shader.coordinate_shader_code_address =
          v3dv_cl_address(pipeline->vs_bin->assembly_bo, 0);
@@ -1616,63 +1566,10 @@
       shader.fragment_shader_code_address =
          v3dv_cl_address(pipeline->fs->assembly_bo, 0);
 
-      /* FIXME: Use combined input/output size flag in the common case (also
-       * on v3d, see v3dx_draw).
-       */
-      shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
-         pipeline->vs_bin->prog_data.vs->separate_segments;
-      shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
-         pipeline->vs->prog_data.vs->separate_segments;
-
-      shader.coordinate_shader_input_vpm_segment_size =
-         pipeline->vs_bin->prog_data.vs->separate_segments ?
-         pipeline->vs_bin->prog_data.vs->vpm_input_size : 1;
-      shader.vertex_shader_input_vpm_segment_size =
-         pipeline->vs->prog_data.vs->separate_segments ?
-         pipeline->vs->prog_data.vs->vpm_input_size : 1;
-
-      shader.coordinate_shader_output_vpm_segment_size =
-         pipeline->vs_bin->prog_data.vs->vpm_output_size;
-      shader.vertex_shader_output_vpm_segment_size =
-         pipeline->vs->prog_data.vs->vpm_output_size;
-
       shader.coordinate_shader_uniforms_address = vs_bin_uniforms;
       shader.vertex_shader_uniforms_address = vs_uniforms;
       shader.fragment_shader_uniforms_address = fs_uniforms;
 
-      shader.min_coord_shader_input_segments_required_in_play =
-         vpm_cfg_bin.As;
-      shader.min_vertex_shader_input_segments_required_in_play =
-         vpm_cfg.As;
-
-      shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
-         vpm_cfg_bin.Ve;
-      shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
-         vpm_cfg.Ve;
-
-      shader.coordinate_shader_4_way_threadable =
-         pipeline->vs_bin->prog_data.vs->base.threads == 4;
-      shader.vertex_shader_4_way_threadable =
-         pipeline->vs->prog_data.vs->base.threads == 4;
-      shader.fragment_shader_4_way_threadable =
-         pipeline->fs->prog_data.fs->base.threads == 4;
-
-      shader.coordinate_shader_start_in_final_thread_section =
-         pipeline->vs_bin->prog_data.vs->base.single_seg;
-      shader.vertex_shader_start_in_final_thread_section =
-         pipeline->vs->prog_data.vs->base.single_seg;
-      shader.fragment_shader_start_in_final_thread_section =
-         pipeline->fs->prog_data.fs->base.single_seg;
-
-      shader.vertex_id_read_by_coordinate_shader =
-         pipeline->vs_bin->prog_data.vs->uses_vid;
-      shader.instance_id_read_by_coordinate_shader =
-         pipeline->vs_bin->prog_data.vs->uses_iid;
-      shader.vertex_id_read_by_vertex_shader =
-         pipeline->vs->prog_data.vs->uses_vid;
-      shader.instance_id_read_by_vertex_shader =
-         pipeline->vs->prog_data.vs->uses_iid;
-
       /* FIXME: I understand that the following is needed only if
        * vtx_num_elements > 0
        */
@@ -1701,8 +1598,8 @@
    }
 
    cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) {
-      vcm.number_of_16_vertex_batches_for_binning = vpm_cfg_bin.Vc;
-      vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc;
+      vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
+      vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
    }
 
    cl_emit(&job->bcl, GL_SHADER_STATE, state) {
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 75b67b4..338af12 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -683,6 +683,17 @@
       }
    }
 
+   /* FIXME: values below are default when non-GS is available. Would need to
+    * provide real values if GS gets supported
+    */
+   pipeline->vpm_cfg_bin.As = 1;
+   pipeline->vpm_cfg_bin.Ve = 0;
+   pipeline->vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size;
+
+   pipeline->vpm_cfg.As = 1;
+   pipeline->vpm_cfg.Ve = 0;
+   pipeline->vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size;
+
    return VK_SUCCESS;
 }
 
@@ -814,6 +825,122 @@
    };
 }
 
+static void
+pack_shader_state_record(struct v3dv_pipeline *pipeline)
+{
+   assert(sizeof(pipeline->shader_state_record) ==
+          cl_packet_length(GL_SHADER_STATE_RECORD));
+
+   /* Note: we are not packing addresses, as we need the job (see
+    * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
+    * point as they depend on dynamic info that can be set after create the
+    * pipeline (like viewport), . Would need to be filled later, so we are
+    * doing a partial prepacking.
+    */
+   v3dv_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
+      shader.enable_clipping = true;
+
+      shader.point_size_in_shaded_vertex_data =
+         pipeline->vs->key.vs.per_vertex_point_size;
+
+      /* Must be set if the shader modifies Z, discards, or modifies
+       * the sample mask.  For any of these cases, the fragment
+       * shader needs to write the Z value (even just discards).
+       */
+      shader.fragment_shader_does_z_writes =
+         pipeline->fs->prog_data.fs->writes_z;
+      /* Set if the EZ test must be disabled (due to shader side
+       * effects and the early_z flag not being present in the
+       * shader).
+       */
+      shader.turn_off_early_z_test =
+         pipeline->fs->prog_data.fs->disable_ez;
+
+      shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
+         pipeline->fs->prog_data.fs->uses_center_w;
+
+      shader.any_shader_reads_hardware_written_primitive_id = false;
+
+      shader.do_scoreboard_wait_on_first_thread_switch =
+         pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw;
+      shader.disable_implicit_point_line_varyings =
+         !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings;
+
+      shader.number_of_varyings_in_fragment_shader =
+         pipeline->fs->prog_data.fs->num_inputs;
+
+      shader.coordinate_shader_propagate_nans = true;
+      shader.vertex_shader_propagate_nans = true;
+      shader.fragment_shader_propagate_nans = true;
+
+      /* Note: see previous note about adresses */
+      /* shader.coordinate_shader_code_address */
+      /* shader.vertex_shader_code_address */
+      /* shader.fragment_shader_code_address */
+
+      /* FIXME: Use combined input/output size flag in the common case (also
+       * on v3d, see v3dx_draw).
+       */
+      shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
+         pipeline->vs_bin->prog_data.vs->separate_segments;
+      shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
+         pipeline->vs->prog_data.vs->separate_segments;
+
+      shader.coordinate_shader_input_vpm_segment_size =
+         pipeline->vs_bin->prog_data.vs->separate_segments ?
+         pipeline->vs_bin->prog_data.vs->vpm_input_size : 1;
+      shader.vertex_shader_input_vpm_segment_size =
+         pipeline->vs->prog_data.vs->separate_segments ?
+         pipeline->vs->prog_data.vs->vpm_input_size : 1;
+
+      shader.coordinate_shader_output_vpm_segment_size =
+         pipeline->vs_bin->prog_data.vs->vpm_output_size;
+      shader.vertex_shader_output_vpm_segment_size =
+         pipeline->vs->prog_data.vs->vpm_output_size;
+
+      /* Note: see previous note about adresses */
+      /* shader.coordinate_shader_uniforms_address */
+      /* shader.vertex_shader_uniforms_address */
+      /* shader.fragment_shader_uniforms_address */
+
+      shader.min_coord_shader_input_segments_required_in_play =
+         pipeline->vpm_cfg_bin.As;
+      shader.min_vertex_shader_input_segments_required_in_play =
+         pipeline->vpm_cfg.As;
+
+      shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+         pipeline->vpm_cfg_bin.Ve;
+      shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+         pipeline->vpm_cfg.Ve;
+
+      shader.coordinate_shader_4_way_threadable =
+         pipeline->vs_bin->prog_data.vs->base.threads == 4;
+      shader.vertex_shader_4_way_threadable =
+         pipeline->vs->prog_data.vs->base.threads == 4;
+      shader.fragment_shader_4_way_threadable =
+         pipeline->fs->prog_data.fs->base.threads == 4;
+
+      shader.coordinate_shader_start_in_final_thread_section =
+         pipeline->vs_bin->prog_data.vs->base.single_seg;
+      shader.vertex_shader_start_in_final_thread_section =
+         pipeline->vs->prog_data.vs->base.single_seg;
+      shader.fragment_shader_start_in_final_thread_section =
+         pipeline->fs->prog_data.fs->base.single_seg;
+
+      shader.vertex_id_read_by_coordinate_shader =
+         pipeline->vs_bin->prog_data.vs->uses_vid;
+      shader.instance_id_read_by_coordinate_shader =
+         pipeline->vs_bin->prog_data.vs->uses_iid;
+      shader.vertex_id_read_by_vertex_shader =
+         pipeline->vs->prog_data.vs->uses_vid;
+      shader.instance_id_read_by_vertex_shader =
+         pipeline->vs->prog_data.vs->uses_iid;
+
+      /* Note: see previous note about adresses */
+      /* shader.address_of_default_attribute_values */
+   }
+}
+
 static VkResult
 pipeline_init(struct v3dv_pipeline *pipeline,
               struct v3dv_device *device,
@@ -856,6 +983,8 @@
       return result;
    }
 
+   pack_shader_state_record(pipeline);
+
    return result;
 }
 
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 8f617f2..3087df2 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -583,6 +583,20 @@
    struct v3dv_bo *assembly_bo;
 };
 
+/* FIXME: although the full vpm_config is not required at this point, as we
+ * don't plan to initially support GS, it is more readable and serves as a
+ * placeholder, to have the struct and fill it with default values.
+ */
+struct vpm_config {
+   uint32_t As;
+   uint32_t Vc;
+   uint32_t Gs;
+   uint32_t Gd;
+   uint32_t Gv;
+   uint32_t Ve;
+   uint32_t gs_width;
+};
+
 struct v3dv_pipeline {
    struct v3dv_device *device;
 
@@ -599,9 +613,12 @@
 
    struct v3dv_dynamic_state dynamic_state;
 
+   struct vpm_config vpm_cfg;
+   struct vpm_config vpm_cfg_bin;
    /* Packets prepacked during pipeline creation
     */
    uint8_t cfg_bits[cl_packet_length(CFG_BITS)];
+   uint8_t shader_state_record[cl_packet_length(GL_SHADER_STATE_RECORD)];
 };
 
 uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev);