tu: Implement multiview clear/resolve interactions

Loads, stores, clears, and resolves now happen per-view. Since we only
support multiview with sysmem rendering, we only implement this for
sysmem clears and resolves.

There aren't any tests that mix multiview and MSAA, so no coverage of
the resolve path.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5720>
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index e60f33b..fe39d6a 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -439,6 +439,16 @@
    tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0());
    tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0());
 
+   /* Copy what the blob does here. This will emit an extra 0x3f
+    * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
+    * this is working around yet.
+    */
+   tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
+   tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
+   tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL);
+   tu_cs_emit(cs, 0);
+   tu_cs_emit_regs(cs, A6XX_VFD_MULTIVIEW_CNTL());
+
    tu6_emit_vpc(cs, &vs, NULL, NULL, NULL, &fs, 0, false);
 
    /* REPL_MODE for varying with RECTLIST (2 vertices only) */
@@ -1665,11 +1675,18 @@
    ops->teardown(cmd, cs);
 }
 
+#define for_each_layer(layer, layer_mask, layers) \
+   for (uint32_t layer = 0; \
+        layer < ((layer_mask) ? (util_logbase2(layer_mask) + 1) : layers); \
+        layer++) \
+      if (!layer_mask || (layer_mask & BIT(layer)))
+
 void
 tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
                   struct tu_cs *cs,
                   struct tu_image_view *src,
                   struct tu_image_view *dst,
+                  uint32_t layer_mask,
                   uint32_t layers,
                   const VkRect2D *rect)
 {
@@ -1684,7 +1701,7 @@
               ROTATE_0, false, dst->ubwc_enabled);
    ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
 
-   for (uint32_t i = 0; i < layers; i++) {
+   for_each_layer(i, layer_mask, layers) {
       ops->src(cmd, cs, src, i, VK_FILTER_NEAREST);
       ops->dst(cs, dst, i);
       ops->run(cmd, cs);
@@ -1878,6 +1895,15 @@
          layered_clear = true;
    }
 
+   /* a630 doesn't support multiview masks, which means that we can't use the
+    * normal multiview path without potentially recompiling a shader on-demand
+    * or using a more complicated variant that takes the mask as a const. Just
+    * use the layered path instead, since it shouldn't be much worse.
+    */
+   if (subpass->multiview_mask) {
+      layered_clear = true;
+   }
+
    r3d_common(cmd, cs, false, num_rts, layered_clear);
 
    tu_cs_emit_regs(cs,
@@ -1923,7 +1949,15 @@
       tu_cs_emit_array(cs, clear_value[b], 4);
 
    for (uint32_t i = 0; i < rect_count; i++) {
-      for (uint32_t layer = 0; layer < rects[i].layerCount; layer++) {
+      /* This should be true because of this valid usage for
+       * vkCmdClearAttachments:
+       *
+       *    "If the render pass instance this is recorded in uses multiview,
+       *    then baseArrayLayer must be zero and layerCount must be one"
+       */
+      assert(!subpass->multiview_mask || rects[i].baseArrayLayer == 0);
+
+      for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount) {
          r3d_coords_raw(cs, (float[]) {
             rects[i].rect.offset.x, rects[i].rect.offset.y,
             z_clear_val, uif(rects[i].baseArrayLayer + layer),
@@ -2150,6 +2184,7 @@
 {
    const struct tu_framebuffer *fb = cmd->state.framebuffer;
    const struct tu_image_view *iview = fb->attachments[a].attachment;
+   const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views;
    const struct blit_ops *ops = &r2d_ops;
    if (cmd->state.pass->attachments[a].samples > 1)
       ops = &r3d_ops;
@@ -2158,7 +2193,7 @@
    ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
    ops->clear_value(cs, format, &info->pClearValues[a]);
 
-   for (uint32_t i = 0; i < fb->layers; i++) {
+   for_each_layer(i, clear_views, fb->layers) {
       if (separate_stencil) {
          if (ops == &r3d_ops)
             r3d_dst_stencil(cs, iview, i);
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index fe1ddd8..0a17add 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -340,7 +340,8 @@
    tu_cs_emit_regs(cs,
                    A6XX_SP_SRGB_CNTL(.dword = subpass->srgb_cntl));
 
-   tu_cs_emit_regs(cs, A6XX_GRAS_MAX_LAYER_INDEX(fb->layers - 1));
+   unsigned layers = MAX2(fb->layers, util_logbase2(subpass->multiview_mask) + 1);
+   tu_cs_emit_regs(cs, A6XX_GRAS_MAX_LAYER_INDEX(layers - 1));
 }
 
 void
@@ -684,6 +685,7 @@
 static void
 tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
                         struct tu_cs *cs,
+                        uint32_t layer_mask,
                         uint32_t a,
                         uint32_t gmem_a)
 {
@@ -691,7 +693,7 @@
    struct tu_image_view *dst = fb->attachments[a].attachment;
    struct tu_image_view *src = fb->attachments[gmem_a].attachment;
 
-   tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.render_area);
+   tu_resolve_sysmem(cmd, cs, src, dst, layer_mask, fb->layers, &cmd->state.render_area);
 }
 
 static void
@@ -731,7 +733,7 @@
          if (a == VK_ATTACHMENT_UNUSED)
             continue;
 
-         tu6_emit_sysmem_resolve(cmd, cs, a,
+         tu6_emit_sysmem_resolve(cmd, cs, subpass->multiview_mask, a,
                                  subpass->color_attachments[i].attachment);
       }
    }
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 3d180fb..584931d 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1206,6 +1206,7 @@
                   struct tu_cs *cs,
                   struct tu_image_view *src,
                   struct tu_image_view *dst,
+                  uint32_t layer_mask,
                   uint32_t layers,
                   const VkRect2D *rect);