v3dv: implement early Z optimization

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index e603471..cda9254 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -315,6 +315,9 @@
     *  any prefix state data before the binning list proper starts."
     */
    cl_emit(&job->bcl, START_TILE_BINNING, bin);
+
+   job->ez_state = VC5_EZ_UNDECIDED;
+   job->first_ez_state = VC5_EZ_UNDECIDED;
 }
 
 static void
@@ -1113,6 +1116,27 @@
 }
 
 static void
+set_rcl_early_z_config(struct v3dv_job *job,
+                       bool *early_z_disable,
+                       uint32_t *early_z_test_and_update_direction)
+{
+   switch (job->first_ez_state) {
+   case VC5_EZ_UNDECIDED:
+   case VC5_EZ_LT_LE:
+      *early_z_disable = false;
+      *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE;
+      break;
+   case VC5_EZ_GT_GE:
+      *early_z_disable = false;
+      *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE;
+      break;
+   case VC5_EZ_DISABLED:
+      *early_z_disable = true;
+      break;
+   }
+}
+
+static void
 cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer)
 {
    struct v3dv_job *job = cmd_buffer->state.job;
@@ -1151,7 +1175,9 @@
          const struct v3dv_image_view *iview =
             framebuffer->attachments[ds_attachment_idx];
          config.internal_depth_type = iview->internal_type;
-         config.early_z_disable = true; /* FIXME */
+         set_rcl_early_z_config(job,
+                                &config.early_z_disable,
+                                &config.early_z_test_and_update_direction);
       } else {
          config.early_z_disable = true;
       }
@@ -1389,6 +1415,64 @@
    cmd_buffer->state.dirty |= dest_mask;
 }
 
+static void
+cmd_buffer_update_ez_state(struct v3dv_cmd_buffer *cmd_buffer,
+                           struct v3dv_pipeline *pipeline)
+{
+   struct v3dv_job *job = cmd_buffer->state.job;
+   assert(job);
+
+   switch (pipeline->ez_state) {
+   case VC5_EZ_UNDECIDED:
+      /* If the pipeline didn't pick a direction but didn't disable, then go
+       * along with the current EZ state. This allows EZ optimization for Z
+       * func == EQUAL or NEVER.
+       */
+      break;
+
+   case VC5_EZ_LT_LE:
+   case VC5_EZ_GT_GE:
+      /* If the pipeline picked a direction, then it needs to match the current
+       * direction if we've decided on one.
+       */
+      if (job->ez_state == VC5_EZ_UNDECIDED)
+         job->ez_state = pipeline->ez_state;
+      else if (job->ez_state != pipeline->ez_state)
+         job->ez_state = VC5_EZ_DISABLED;
+      break;
+
+   case VC5_EZ_DISABLED:
+      /* If the pipeline disables EZ because of a bad Z func or stencil
+       * operation, then we can't do any more EZ in this frame.
+       */
+      job->ez_state = VC5_EZ_DISABLED;
+      break;
+   }
+
+   /* If the FS writes Z, then it may update against the chosen EZ direction */
+   if (pipeline->fs->prog_data.fs->writes_z)
+      job->ez_state = VC5_EZ_DISABLED;
+
+   if (job->first_ez_state == VC5_EZ_UNDECIDED &&
+       job->ez_state != VC5_EZ_DISABLED) {
+      job->first_ez_state = job->ez_state;
+   }
+}
+
+static void
+bind_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
+                       struct v3dv_pipeline *pipeline)
+{
+   if (cmd_buffer->state.pipeline == pipeline)
+      return;
+
+   cmd_buffer->state.pipeline = pipeline;
+   bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state);
+
+   cmd_buffer_update_ez_state(cmd_buffer, pipeline);
+
+   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE;
+}
 
 void
 v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer,
@@ -1404,13 +1488,7 @@
       break;
 
    case VK_PIPELINE_BIND_POINT_GRAPHICS:
-      if (cmd_buffer->state.pipeline == pipeline)
-         return;
-
-      cmd_buffer->state.pipeline = pipeline;
-      bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state);
-
-      cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE;
+      bind_graphics_pipeline(cmd_buffer, pipeline);
       break;
 
    default:
@@ -1739,7 +1817,9 @@
       state.number_of_attribute_arrays = num_elements_to_emit;
    }
 
-   cl_emit_prepacked(&job->bcl, &pipeline->cfg_bits);
+   cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
+      config.early_z_updates_enable = job->ez_state != VC5_EZ_DISABLED;
+   }
 
    if (pipeline->emit_stencil_cfg[0]) {
       cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[0]);
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 3820ba2..af96161 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -1065,6 +1065,48 @@
    }
 }
 
+static bool
+stencil_op_is_no_op(const VkStencilOpState *stencil)
+{
+   return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
+          stencil->compareOp == VK_COMPARE_OP_ALWAYS;
+}
+
+static void
+pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
+                      const VkPipelineDepthStencilStateCreateInfo *ds_info)
+{
+   if (!ds_info || !ds_info->depthTestEnable) {
+      pipeline->ez_state = VC5_EZ_DISABLED;
+      return;
+   }
+
+   switch (ds_info->depthCompareOp) {
+   case VK_COMPARE_OP_LESS:
+   case VK_COMPARE_OP_LESS_OR_EQUAL:
+      pipeline->ez_state = VC5_EZ_LT_LE;
+      break;
+   case VK_COMPARE_OP_GREATER:
+   case VK_COMPARE_OP_GREATER_OR_EQUAL:
+      pipeline->ez_state = VC5_EZ_GT_GE;
+      break;
+   case VK_COMPARE_OP_NEVER:
+   case VK_COMPARE_OP_EQUAL:
+      pipeline->ez_state = VC5_EZ_UNDECIDED;
+      break;
+   default:
+      pipeline->ez_state = VC5_EZ_DISABLED;
+      break;
+   }
+
+   /* If stencil is enabled and is not a no-op, we need to disable EZ */
+   if (ds_info->stencilTestEnable &&
+       (!stencil_op_is_no_op(&ds_info->front) ||
+        !stencil_op_is_no_op(&ds_info->back))) {
+         pipeline->ez_state = VC5_EZ_DISABLED;
+   }
+}
+
 static void
 pack_shader_state_record(struct v3dv_pipeline *pipeline)
 {
@@ -1346,6 +1388,7 @@
 
    pack_cfg_bits(pipeline, ds_info, rs_info, cb_info);
    pack_stencil_cfg(pipeline, ds_info);
+   pipeline_set_ez_state(pipeline, ds_info);
 
    result = pipeline_compile_graphics(pipeline, pCreateInfo, alloc);
 
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index e2bde4e..f5f63c2 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -468,6 +468,13 @@
                                  float scale[3],
                                  float translate[3]);
 
+enum v3dv_ez_state {
+   VC5_EZ_UNDECIDED = 0,
+   VC5_EZ_GT_GE,
+   VC5_EZ_LT_LE,
+   VC5_EZ_DISABLED,
+};
+
 struct v3dv_job {
    struct list_head list_link;
 
@@ -490,6 +497,9 @@
    bool tmu_dirty_rcl;
 
    uint32_t first_subpass;
+
+   enum v3dv_ez_state ez_state;
+   enum v3dv_ez_state first_ez_state;
 };
 
 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
@@ -658,6 +668,8 @@
 
    struct v3dv_dynamic_state dynamic_state;
 
+   enum v3dv_ez_state ez_state;
+
    /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
     * array with such binding
     */