anv: Place images into the aux-map when safe to do so

At image bind time, if an image's addresses can be placed into the
aux-map without causing conflicts with a pre-existing mapping, do so.
The code aux management code in the binding function operates on a
per-plane basis. So, use the per-plane CCS memory range from the image
rather than the CCS memory region for the entire BO.

Another way to avoid aux-map conflicts is to rely solely on having a
dedicated allocation for an image. Unfortunately, not all workloads
change their behavior when drivers report a preference for dedicated
allocations. In particular, 3DMark Wild Life Extreme does not make more
dedicated allocations and such a solution was measured to perform ~16%
worse than this solution. With this solution, I did not measure a loss
of CCS on that benchmark.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6304
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (v1)
Reviewed-by: Jianxun Zhang <jianxun.zhang@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25003>
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 6624076..c1bf508 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1508,15 +1508,6 @@
       return vk_errorf(device, VK_ERROR_UNKNOWN, "vm bind failed");
    }
 
-   if (new_bo._ccs_size > 0) {
-      assert(device->info->has_aux_map);
-      const bool mapped =
-         intel_aux_map_add_mapping(device->aux_map_ctx, new_bo.offset,
-                                   intel_canonical_address(new_bo.offset + new_bo.size),
-                                   new_bo.size, 0 /* format_bits */);
-      assert(mapped);
-   }
-
    assert(new_bo.gem_handle);
 
    /* If we just got this gem_handle from anv_bo_init_new then we know no one
@@ -1867,12 +1858,11 @@
    }
    assert(bo->refcount == 0);
 
-   if (bo->_ccs_size > 0) {
-      assert(device->physical->has_implicit_ccs);
-      assert(device->info->has_aux_map);
-      assert(bo->has_implicit_ccs);
+   /* Unmap the entire BO. In the case that some addresses lacked an aux-map
+    * entry, the unmapping function will add table entries for them.
+    */
+   if (anv_bo_allows_aux_map(device, bo))
       intel_aux_map_unmap_range(device->aux_map_ctx, bo->offset, bo->size);
-   }
 
    /* Memset the BO just in case.  The refcount being zero should be enough to
     * prevent someone from assuming the data is valid but it's safer to just
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index e0dbff9..e0549d7 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -2273,8 +2273,25 @@
          if (device->info->has_flat_ccs && bo->vram_only)
             continue;
 
+         /* Add the plane to the aux map when applicable. */
          if (anv_bo_allows_aux_map(device, bo)) {
-            continue;
+            const struct anv_address main_addr =
+               anv_image_address(image,
+                 &image->planes[p].primary_surface.memory_range);
+            const struct anv_address aux_addr =
+               anv_image_address(image,
+                 &image->planes[p].compr_ctrl_memory_range);
+            const struct isl_surf *surf =
+               &image->planes[p].primary_surface.isl;
+            const uint64_t format_bits =
+               intel_aux_map_format_bits_for_isl_surf(surf);
+            const bool mapped =
+               intel_aux_map_add_mapping(device->aux_map_ctx,
+                                         anv_address_physical(main_addr),
+                                         anv_address_physical(aux_addr),
+                                         surf->size_B, format_bits);
+            if (mapped)
+               continue;
          }
 
          /* Do nothing prior to gfx12. There are no special requirements. */
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f824d29..9d64ef4 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -4855,7 +4855,10 @@
 anv_image_get_aux_memory_range(const struct anv_image *image,
                                uint32_t plane)
 {
-  return &image->planes[plane].aux_surface.memory_range;
+   if (image->planes[plane].aux_surface.memory_range.size > 0)
+     return &image->planes[plane].aux_surface.memory_range;
+   else
+     return &image->planes[plane].compr_ctrl_memory_range;
 }
 
 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index f46a24a..4bc160b 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -339,113 +339,6 @@
    }
 }
 
-#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
-
-#if GFX_VER == 12
-static void
-anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
-                      const struct anv_image *image,
-                      VkImageAspectFlagBits aspect,
-                      uint32_t base_level, uint32_t level_count,
-                      uint32_t base_layer, uint32_t layer_count)
-{
-   const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
-
-   const struct anv_surface *surface = &image->planes[plane].primary_surface;
-   uint64_t base_address =
-      anv_address_physical(anv_image_address(image, &surface->memory_range));
-
-   const struct isl_surf *isl_surf = &image->planes[plane].primary_surface.isl;
-   uint64_t format_bits = intel_aux_map_format_bits_for_isl_surf(isl_surf);
-
-   /* We're about to live-update the AUX-TT.  We really don't want anyone else
-    * trying to read it while we're doing this.  We could probably get away
-    * with not having this stall in some cases if we were really careful but
-    * it's better to play it safe.  Full stall the GPU.
-    */
-   anv_add_pending_pipe_bits(cmd_buffer,
-                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
-                             "before update AUX-TT");
-   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-
-   struct mi_builder b;
-   mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
-
-   for (uint32_t a = 0; a < layer_count; a++) {
-      const uint32_t layer = base_layer + a;
-
-      uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0;
-      for (uint32_t l = 0; l < level_count; l++) {
-         const uint32_t level = base_level + l;
-
-         uint32_t logical_array_layer, logical_z_offset_px;
-         if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
-            logical_array_layer = 0;
-
-            /* If the given miplevel does not have this layer, then any higher
-             * miplevels won't either because miplevels only get smaller the
-             * higher the LOD.
-             */
-            assert(layer < image->vk.extent.depth);
-            if (layer >= u_minify(image->vk.extent.depth, level))
-               break;
-            logical_z_offset_px = layer;
-         } else {
-            assert(layer < image->vk.array_layers);
-            logical_array_layer = layer;
-            logical_z_offset_px = 0;
-         }
-
-         uint64_t slice_start_offset_B, slice_end_offset_B;
-         isl_surf_get_image_range_B_tile(isl_surf, level,
-                                         logical_array_layer,
-                                         logical_z_offset_px,
-                                         &slice_start_offset_B,
-                                         &slice_end_offset_B);
-
-         start_offset_B = MIN2(start_offset_B, slice_start_offset_B);
-         end_offset_B = MAX2(end_offset_B, slice_end_offset_B);
-      }
-
-      struct intel_aux_map_context *ctx = cmd_buffer->device->aux_map_ctx;
-      /* It depends on what the purpose you use that figure from AUX module,
-       * alignment, page size of main surface, or actually granularity...
-       */
-      uint64_t main_page_size = intel_aux_map_get_alignment(ctx);
-      start_offset_B = ROUND_DOWN_TO(start_offset_B, main_page_size);
-      end_offset_B = align64(end_offset_B, main_page_size);
-
-      for (uint64_t offset = start_offset_B;
-           offset < end_offset_B; offset += main_page_size) {
-         uint64_t address = base_address + offset;
-
-         uint64_t aux_entry_addr64, *aux_entry_map;
-         struct intel_aux_map_context *ctx = cmd_buffer->device->aux_map_ctx;
-         aux_entry_map = intel_aux_map_get_entry(ctx, address, &aux_entry_addr64);
-
-         struct anv_address aux_entry_address = {
-            .bo = NULL,
-            .offset = aux_entry_addr64,
-         };
-
-         const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
-         uint64_t new_aux_entry =
-            (old_aux_entry & intel_aux_get_meta_address_mask(ctx)) |
-            format_bits;
-
-         if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
-            new_aux_entry |= INTEL_AUX_MAP_ENTRY_VALID_BIT;
-
-         mi_store(&b, mi_mem64(aux_entry_address), mi_imm(new_aux_entry));
-      }
-   }
-
-   anv_add_pending_pipe_bits(cmd_buffer,
-                             ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
-                             "after update AUX-TT");
-}
-#endif /* GFX_VER == 12 */
-
 /* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
  * the initial layout is undefined, the HiZ buffer and depth buffer will
  * represent the same data at the end of this operation.
@@ -463,16 +356,6 @@
    if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE)
       return;
 
-#if GFX_VER == 12
-   if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
-        initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
-       cmd_buffer->device->physical->has_implicit_ccs &&
-       cmd_buffer->device->info->has_aux_map) {
-      anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
-                            0, 1, base_layer, layer_count);
-   }
-#endif
-
    /* If will_full_fast_clear is set, the caller promises to fast-clear the
     * largest portion of the specified range as it can.  For depth images,
     * that means the entire image because we don't support multi-LOD HiZ.
@@ -541,9 +424,6 @@
         initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
        cmd_buffer->device->physical->has_implicit_ccs &&
        cmd_buffer->device->info->has_aux_map) {
-      anv_image_init_aux_tt(cmd_buffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
-                            base_level, level_count, base_layer, layer_count);
-
       /* If will_full_fast_clear is set, the caller promises to fast-clear the
        * largest portion of the specified range as it can.
        */
@@ -1160,18 +1040,6 @@
       }
    }
 
-#if GFX_VER == 12
-   if (initial_layout_undefined) {
-      if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
-         anv_image_init_aux_tt(cmd_buffer, image, aspect,
-                               base_level, level_count,
-                               base_layer, layer_count);
-      }
-   }
-#else
-   assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map));
-#endif
-
    if (must_init_fast_clear_state) {
       if (base_level == 0 && base_layer == 0) {
          set_image_fast_clear_state(cmd_buffer, image, aspect,