v3dv: implement color blending

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 9fbb4b6..0eff6ae 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -1651,6 +1651,8 @@
       }
    }
 
+   /* FIXME: handle VK_DYNAMIC_STATE_BLEND_CONSTANTS */
+
    cmd_buffer->state.dynamic.mask = dynamic_mask;
    cmd_buffer->state.dirty |= dirty;
 }
@@ -1994,6 +1996,43 @@
 }
 
 static void
+emit_blend(struct v3dv_cmd_buffer *cmd_buffer)
+{
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
+   struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   assert(pipeline);
+
+   const uint32_t blend_packets_size =
+      cl_packet_length(BLEND_ENABLES) +
+      cl_packet_length(BLEND_CONSTANT_COLOR) +
+      cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS +
+      cl_packet_length(COLOR_WRITE_MASKS);
+
+   v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size);
+
+   if (pipeline->blend.enables) {
+      cl_emit(&job->bcl, BLEND_ENABLES, enables) {
+         enables.mask = pipeline->blend.enables;
+      }
+   }
+
+   /* FIXME: this can be dynamic state! */
+   if (pipeline->blend.needs_color_constants)
+      cl_emit_prepacked(&job->bcl, &pipeline->blend.constant_color);
+
+   for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
+      if (pipeline->blend.enables & (1 << i))
+         cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]);
+   }
+
+   cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
+      mask.mask = pipeline->blend.color_write_masks;
+   }
+}
+
+static void
 emit_flat_shade_flags(struct v3dv_job *job,
                       int varying_offset,
                       uint32_t varyings,
@@ -2322,6 +2361,8 @@
    if (*dirty & dynamic_stencil_dirty_flags)
       emit_stencil(cmd_buffer);
 
+   emit_blend(cmd_buffer);
+
    cmd_buffer->state.dirty &= ~(*dirty);
 }
 
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 0198b16..6fd0262 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -1260,28 +1260,137 @@
    pipeline->dynamic_state.mask = dynamic_states;
 }
 
+static uint8_t
+blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
+{
+   switch (factor) {
+   case VK_BLEND_FACTOR_ZERO:
+   case VK_BLEND_FACTOR_ONE:
+   case VK_BLEND_FACTOR_SRC_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+   case VK_BLEND_FACTOR_DST_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+   case VK_BLEND_FACTOR_SRC_ALPHA:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+      return factor;
+   case VK_BLEND_FACTOR_CONSTANT_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+      *needs_constants = true;
+      return factor;
+   case VK_BLEND_FACTOR_DST_ALPHA:
+      return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
+                             V3D_BLEND_FACTOR_DST_ALPHA;
+   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+      return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
+                             V3D_BLEND_FACTOR_INV_DST_ALPHA;
+   case VK_BLEND_FACTOR_SRC1_COLOR:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
+   case VK_BLEND_FACTOR_SRC1_ALPHA:
+   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
+      assert(!"Invalid blend factor: dual source blending not supported.");
+   default:
+      assert(!"Unknown blend factor.");
+   }
+
+   /* Should be handled by the switch, added to avoid a "end of non-void
+    * function" error
+    */
+   unreachable("Unknown blend factor.");
+}
+
+static void
+pack_blend(struct v3dv_pipeline *pipeline,
+           const VkPipelineColorBlendStateCreateInfo *cb_info)
+{
+   /* By default, we are not enabling blending and all color channel writes are
+    * enabled. Color write enables are independent of whether blending is
+    * enabled or not.
+    *
+    * Vulkan specifies color write masks so that bits set correspond to
+    * enabled channels. Our hardware does it the other way around.
+    */
+   pipeline->blend.enables = 0;
+   pipeline->blend.color_write_masks = 0; /* All channels enabled */
+
+   if (!cb_info)
+      return;
+
+   assert(pipeline->subpass);
+   if (pipeline->subpass->color_count == 0)
+      return;
+
+   pipeline->blend.needs_color_constants = false;
+   uint32_t color_write_masks = 0;
+   for (uint32_t i = 0; i < cb_info->attachmentCount; i++) {
+      const VkPipelineColorBlendAttachmentState *b_state =
+         &cb_info->pAttachments[i];
+
+      assert(i < pipeline->subpass->color_count);
+
+      uint32_t attachment_idx =
+         pipeline->subpass->color_attachments[i].attachment;
+      if (attachment_idx == VK_ATTACHMENT_UNUSED)
+         continue;
+
+      color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
+
+      if (!b_state->blendEnable)
+         continue;
+
+      VkAttachmentDescription *desc =
+         &pipeline->pass->attachments[attachment_idx].desc;
+      const struct v3dv_format *format = v3dv_get_format(desc->format);
+      bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
+
+      uint8_t rt_mask = 1 << i;
+      pipeline->blend.enables |= rt_mask;
+
+      v3dv_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
+         config.render_target_mask = rt_mask;
+
+         config.color_blend_mode = b_state->colorBlendOp;
+         config.color_blend_dst_factor =
+            blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+         config.color_blend_src_factor =
+            blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+
+         config.alpha_blend_mode = b_state->alphaBlendOp;
+         config.alpha_blend_dst_factor =
+            blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+         config.alpha_blend_src_factor =
+            blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
+                         &pipeline->blend.needs_color_constants);
+      }
+   }
+
+   if (pipeline->blend.needs_color_constants) {
+      v3dv_pack(pipeline->blend.constant_color, BLEND_CONSTANT_COLOR, color) {
+         color.red_f16 = _mesa_float_to_half(cb_info->blendConstants[0]);
+         color.green_f16 = _mesa_float_to_half(cb_info->blendConstants[1]);
+         color.blue_f16 = _mesa_float_to_half(cb_info->blendConstants[2]);
+         color.alpha_f16 = _mesa_float_to_half(cb_info->blendConstants[3]);
+      }
+   }
+
+   pipeline->blend.color_write_masks = color_write_masks;
+}
+
+/* This requires that pack_blend() had been called before so we can set
+ * the overall blend enable bit in the CFG_BITS packet.
+ */
 static void
 pack_cfg_bits(struct v3dv_pipeline *pipeline,
               const VkPipelineDepthStencilStateCreateInfo *ds_info,
-              const VkPipelineRasterizationStateCreateInfo *rs_info,
-              const VkPipelineColorBlendStateCreateInfo *cb_info)
+              const VkPipelineRasterizationStateCreateInfo *rs_info)
 {
    assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
 
-   /* CFG_BITS allow to set a overall blend_enable that it is anded with the
-    * per-target blend enable. v3d so far creates a mask with each target, so
-    * we just set to true if any attachment has blending enabled
-    */
-   bool overall_blend_enable = false;
-   if (cb_info) {
-      for (uint32_t i = 0; i < cb_info->attachmentCount; i++) {
-         const VkPipelineColorBlendAttachmentState *b_state =
-            &cb_info->pAttachments[i];
-
-         overall_blend_enable |= b_state->blendEnable;
-      }
-   }
-
    v3dv_pack(pipeline->cfg_bits, CFG_BITS, config) {
       config.enable_forward_facing_primitive =
          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
@@ -1311,7 +1420,7 @@
        */
       config.direct3d_provoking_vertex = true;
 
-      config.blend_enable = overall_blend_enable;
+      config.blend_enable = pipeline->blend.enables != 0;
 
       /* Note: ez state may update based on the compiled FS, along with zsa
        * (FIXME: not done)
@@ -1744,6 +1853,7 @@
 
    V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
    assert(pCreateInfo->subpass < render_pass->subpass_count);
+   pipeline->pass = render_pass;
    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
 
    pipeline_init_dynamic_state(pipeline, pCreateInfo);
@@ -1763,7 +1873,8 @@
    const VkPipelineColorBlendStateCreateInfo *cb_info =
       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
 
-   pack_cfg_bits(pipeline, ds_info, rs_info, cb_info);
+   pack_blend(pipeline, cb_info);
+   pack_cfg_bits(pipeline, ds_info, rs_info);
    pack_stencil_cfg(pipeline, ds_info);
    pipeline_set_ez_state(pipeline, ds_info);
 
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index b4209eb..29d5014 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -867,6 +867,7 @@
 
    VkShaderStageFlags active_stages;
 
+   struct v3dv_render_pass *pass;
    struct v3dv_subpass *subpass;
 
    /* Note: We can't use just a MESA_SHADER_STAGES array as we need to track
@@ -928,6 +929,22 @@
    /* If the pipeline is using push constants */
    bool use_push_constants;
 
+   /* Blend state */
+   struct {
+      /* Per-RT bit mask with blend enables */
+      uint8_t enables;
+      /* Per-RT prepacked blend config packets */
+      uint8_t cfg[V3D_MAX_DRAW_BUFFERS][cl_packet_length(BLEND_CFG)];
+      /* Flag indicating whether the blend factors in use require
+       * color constants.
+       */
+      bool needs_color_constants;
+      /* Blend constants packet */
+      uint8_t constant_color[cl_packet_length(BLEND_CONSTANT_COLOR)];
+      /* Mask with enabled color channels for each RT (4 bits per RT) */
+      uint32_t color_write_masks;
+   } blend;
+
    /* Packets prepacked during pipeline creation
     */
    uint8_t cfg_bits[cl_packet_length(CFG_BITS)];