panfrost: add support for (de)interleaving Z24S8 in pan_tiling

This is needed for VK_EXT_host_image_copy which, like the buffer<->image
copy commands, treats depth/stencil like separate image planes and
requires copying each separately.

Signed-off-by: Olivia Lee <olivia.lee@collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35910>
diff --git a/src/gallium/drivers/lima/lima_resource.c b/src/gallium/drivers/lima/lima_resource.c
index ed84825..ba28249 100644
--- a/src/gallium/drivers/lima/lima_resource.c
+++ b/src/gallium/drivers/lima/lima_resource.c
@@ -654,7 +654,8 @@
                ptrans->box.width, ptrans->box.height,
                ptrans->stride,
                row_stride,
-               pres->format);
+               pres->format,
+               PAN_INTERLEAVE_NONE);
       }
 
       return trans->staging;
@@ -759,7 +760,8 @@
                   ptrans->box.width, ptrans->box.height,
                   row_stride,
                   ptrans->stride,
-                  pres->format);
+                  pres->format,
+                  PAN_INTERLEAVE_NONE);
          }
       }
    }
diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c
index f107beb..03783cb 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -1386,7 +1386,7 @@
          dst, map, ptrans->box.x, ptrans->box.y, ptrans->box.width,
          ptrans->box.height, ptrans->stride,
          rsrc->plane.layout.slices[level].tiled_or_linear.row_stride_B,
-         rsrc->image.props.format);
+         rsrc->image.props.format, PAN_INTERLEAVE_NONE);
    }
 }
 
@@ -1502,7 +1502,7 @@
          map, src, ptrans->box.x, ptrans->box.y, ptrans->box.width,
          ptrans->box.height,
          rsrc->plane.layout.slices[level].tiled_or_linear.row_stride_B,
-         ptrans->stride, rsrc->image.props.format);
+         ptrans->stride, rsrc->image.props.format, PAN_INTERLEAVE_NONE);
    }
 }
 
diff --git a/src/panfrost/shared/pan_tiling.c b/src/panfrost/shared/pan_tiling.c
index 20df42e..c0c10a2 100644
--- a/src/panfrost/shared/pan_tiling.c
+++ b/src/panfrost/shared/pan_tiling.c
@@ -111,19 +111,63 @@
 #define TILE_HEIGHT     16
 #define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT)
 
+enum pan_interleave_zs
+pan_get_interleave_zs(enum pipe_format format, bool depth, bool stencil)
+{
+   if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
+      if (depth && stencil) {
+         return PAN_INTERLEAVE_NONE;
+      } else if (depth && !stencil) {
+         return PAN_INTERLEAVE_DEPTH;
+      } else if (!depth && stencil) {
+         return PAN_INTERLEAVE_STENCIL;
+      } else if (!depth && !stencil) {
+         unreachable("at least one aspect must be specified");
+      }
+   }
+   return PAN_INTERLEAVE_NONE;
+}
+
 static ALWAYS_INLINE
 void pan_access_image_pixel(void *dst, void *src, const unsigned pixel_size,
-                            bool is_store)
+                            enum pan_interleave_zs interleave, bool is_store)
 {
    if (util_is_power_of_two_nonzero(pixel_size)) {
       src = __builtin_assume_aligned(src, pixel_size);
-      dst = __builtin_assume_aligned(dst, pixel_size);
+      if (interleave != PAN_INTERLEAVE_STENCIL)
+         dst = __builtin_assume_aligned(dst, pixel_size);
    }
 
-   if (is_store)
-      memcpy(dst, src, pixel_size);
-   else
-      memcpy(src, dst, pixel_size);
+   switch (interleave) {
+      case PAN_INTERLEAVE_NONE:
+         if (is_store)
+            memcpy(dst, src, pixel_size);
+         else
+            memcpy(src, dst, pixel_size);
+         break;
+      case PAN_INTERLEAVE_DEPTH:
+         /* interleave only applies to Z24S8 */
+         assert(pixel_size == 4);
+         if (is_store) {
+            uint32_t src_pixel = *(uint32_t *) src;
+            *(uint16_t *) dst = src_pixel & 0xffff;
+            *((uint8_t *) dst + 2) = (src_pixel >> 16) & 0xff;
+         } else {
+            /* The top 8 bits of Z24X8 are unused, so we can overwrite them
+             * with zeros in a single 32B write, instead of needing separate
+             * 16B and 8B writes */
+            *(uint32_t *) src = *(uint32_t *) dst & 0xffffff;
+         }
+         break;
+      case PAN_INTERLEAVE_STENCIL:
+         /* interleave only applies to Z24S8 */
+         assert(pixel_size == 4);
+         if (is_store)
+            *((uint8_t *) dst + 3) = *(uint8_t *) src;
+         else
+            *(uint8_t *) src = *((uint8_t *) dst + 3);
+         break;
+   }
 }
 
 /* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation:
@@ -159,21 +203,27 @@
 
 static ALWAYS_INLINE void
 pan_access_tiled_image_aligned(
-   void *dst, void *src, unsigned pixel_size, unsigned shift,
-   uint16_t sx, uint16_t sy, uint16_t w, uint16_t h,
-   uint32_t dst_stride, uint32_t src_stride, bool is_store)
+   void *dst, void *src,
+   unsigned dst_pixel_size, unsigned src_pixel_size,
+   unsigned shift,
+   uint16_t sx, uint16_t sy,
+   uint16_t w, uint16_t h,
+   uint32_t dst_stride, uint32_t src_stride,
+   enum pan_interleave_zs interleave,
+   bool is_store)
 {
-   uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * pixel_size);
+   uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * dst_pixel_size);
    for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) {
       uint8_t *dest = (uint8_t *)(dest_start + ((y >> 4) * dst_stride));
       void *source = src + (src_y * src_stride);
-      void *source_end = source + w * pixel_size;
+      void *source_end = source + w * src_pixel_size;
       unsigned expanded_y = bit_duplication[y & 0xF] << shift;
       for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) {
          for (uint8_t i = 0; i < 16; ++i) {
             unsigned index = expanded_y ^ (space_4[i] << shift);
-            pan_access_image_pixel(dest + index, source, pixel_size, is_store);
-            source += pixel_size;
+            pan_access_image_pixel(dest + index, source, dst_pixel_size,
+                                   interleave, is_store);
+            source += src_pixel_size;
          }
       }
    }
@@ -181,9 +231,14 @@
 
 static ALWAYS_INLINE void
 pan_access_tiled_image_unaligned(
-   void *dst, void *src, unsigned pixel_size, unsigned tile_shift,
-   uint16_t sx, uint16_t sy, uint16_t w, uint16_t h,
-   uint32_t dst_stride, uint32_t src_stride, bool is_store)
+   void *dst, void *src,
+   unsigned dst_pixel_size, unsigned src_pixel_size,
+   unsigned tile_shift,
+   uint16_t sx, uint16_t sy,
+   uint16_t w, uint16_t h,
+   uint32_t dst_stride, uint32_t src_stride,
+   enum pan_interleave_zs interleave,
+   bool is_store)
 {
    const unsigned mask = (1 << tile_shift) - 1;
    for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) {
@@ -194,37 +249,44 @@
       for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) {
          unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2));
          unsigned index = expanded_y ^ space_4[x & mask];
-         uint8_t *source = src + source_start + pixel_size * src_x;
-         uint8_t *dest = dst + block_start_s + pixel_size * (block_x_s + index);
-         pan_access_image_pixel(dest, source, pixel_size, is_store);
+         uint8_t *source = src + source_start + src_pixel_size * src_x;
+         uint8_t *dest =
+            dst + block_start_s + dst_pixel_size * (block_x_s + index);
+         pan_access_image_pixel(dest, source, dst_pixel_size, interleave,
+                                is_store);
       }
    }
 }
 
-#define TILED_UNALIGNED_VARIANT(bpp, store, shift)                             \
-   pan_access_tiled_image_unaligned(dst, src, (bpp) / 8, shift, sx, sy, w, h,  \
-                                    dst_stride, src_stride, store)
+#define TILED_UNALIGNED_VARIANT(dst_bpp, src_bpp, interleave, store, shift)    \
+   pan_access_tiled_image_unaligned(dst, src, (dst_bpp) / 8, (src_bpp) / 8,    \
+                                    shift, sx, sy, w, h,                       \
+                                    dst_stride, src_stride, interleave, store)
 
 /* We have a separate call for each configuration, to ensure that the inlined
  * function is specialized */
 #define TILED_UNALIGNED_VARIANTS(store, shift)                                 \
    {                                                                           \
       if (bpp == 8)                                                            \
-         TILED_UNALIGNED_VARIANT(8, store, shift);                             \
+         TILED_UNALIGNED_VARIANT(8, 8, PAN_INTERLEAVE_NONE, store, shift);     \
       else if (bpp == 16)                                                      \
-         TILED_UNALIGNED_VARIANT(16, store, shift);                            \
+         TILED_UNALIGNED_VARIANT(16, 16, PAN_INTERLEAVE_NONE, store, shift);   \
       else if (bpp == 24)                                                      \
-         TILED_UNALIGNED_VARIANT(24, store, shift);                            \
-      else if (bpp == 32)                                                      \
-         TILED_UNALIGNED_VARIANT(32, store, shift);                            \
+         TILED_UNALIGNED_VARIANT(24, 24, PAN_INTERLEAVE_NONE, store, shift);   \
+      else if (bpp == 32 && interleave == PAN_INTERLEAVE_NONE)                 \
+         TILED_UNALIGNED_VARIANT(32, 32, PAN_INTERLEAVE_NONE, store, shift);   \
+      else if (bpp == 32 && interleave == PAN_INTERLEAVE_DEPTH)                \
+         TILED_UNALIGNED_VARIANT(32, 32, PAN_INTERLEAVE_DEPTH, store, shift);  \
+      else if (bpp == 32 && interleave == PAN_INTERLEAVE_STENCIL)              \
+         TILED_UNALIGNED_VARIANT(32, 8, PAN_INTERLEAVE_STENCIL, store, shift); \
       else if (bpp == 48)                                                      \
-         TILED_UNALIGNED_VARIANT(48, store, shift);                            \
+         TILED_UNALIGNED_VARIANT(48, 48, PAN_INTERLEAVE_NONE, store, shift);   \
       else if (bpp == 64)                                                      \
-         TILED_UNALIGNED_VARIANT(64, store, shift);                            \
+         TILED_UNALIGNED_VARIANT(64, 64, PAN_INTERLEAVE_NONE, store, shift);   \
       else if (bpp == 96)                                                      \
-         TILED_UNALIGNED_VARIANT(96, store, shift);                            \
+         TILED_UNALIGNED_VARIANT(96, 96, PAN_INTERLEAVE_NONE, store, shift);   \
       else if (bpp == 128)                                                     \
-         TILED_UNALIGNED_VARIANT(128, store, shift);                           \
+         TILED_UNALIGNED_VARIANT(128, 128, PAN_INTERLEAVE_NONE, store, shift); \
    }
 
 /*
@@ -238,6 +300,7 @@
                                unsigned w, unsigned h, uint32_t dst_stride,
                                uint32_t src_stride,
                                const struct util_format_description *desc,
+                               enum pan_interleave_zs interleave,
                                bool _is_store)
 {
    unsigned bpp = desc->block.bits;
@@ -261,22 +324,27 @@
    }
 }
 
-#define TILED_ALIGNED_VARIANT(store, bpp, shift)                               \
-   pan_access_tiled_image_aligned(dst, src, (bpp) / 8, shift, sx, sy, w, h,    \
-                                  dst_stride, src_stride, store)
+#define TILED_ALIGNED_VARIANT(interleave, store, dst_bpp, src_bpp, shift)      \
+   pan_access_tiled_image_aligned(dst, src, (dst_bpp) / 8, (src_bpp) / 8,      \
+                                  shift, sx, sy, w, h,                         \
+                                  dst_stride, src_stride, interleave, store)
 
 #define TILED_ALIGNED_VARIANTS(store)                                          \
    {                                                                           \
       if (bpp == 8)                                                            \
-         TILED_ALIGNED_VARIANT(store, 8, 0);                                   \
+         TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 8, 8, 0);           \
       else if (bpp == 16)                                                      \
-         TILED_ALIGNED_VARIANT(store, 16, 1);                                  \
-      else if (bpp == 32)                                                      \
-         TILED_ALIGNED_VARIANT(store, 32, 2);                                  \
+         TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 16, 16, 1);         \
+      else if (bpp == 32 && interleave == PAN_INTERLEAVE_NONE)                 \
+         TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 32, 32, 2);         \
+      else if (bpp == 32 && interleave == PAN_INTERLEAVE_DEPTH)                \
+         TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_DEPTH, store, 32, 32, 2);        \
+      else if (bpp == 32 && interleave == PAN_INTERLEAVE_STENCIL)              \
+         TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_STENCIL, store, 32, 8, 2);       \
       else if (bpp == 64)                                                      \
-         TILED_ALIGNED_VARIANT(store, 64, 3);                                  \
+         TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 64, 64, 3);         \
       else if (bpp == 128)                                                     \
-         TILED_ALIGNED_VARIANT(store, 128, 4);                                 \
+         TILED_ALIGNED_VARIANT(PAN_INTERLEAVE_NONE, store, 128, 128, 4);       \
    }
 
 /* Optimized variant of pan_access_tiled_image_generic except that requires
@@ -285,7 +353,8 @@
 pan_access_tiled_image_generic_aligned(
    void *dst, void *src, unsigned sx, unsigned sy, unsigned w, unsigned h,
    uint32_t dst_stride, uint32_t src_stride,
-   const struct util_format_description *desc,bool is_store)
+   const struct util_format_description *desc,
+   enum pan_interleave_zs interleave, bool is_store)
 {
    unsigned bpp = desc->block.bits;
 
@@ -309,8 +378,12 @@
 static ALWAYS_INLINE void
 pan_access_tiled_image(void *dst, void *src, unsigned x, unsigned y, unsigned w,
                        unsigned h, uint32_t dst_stride, uint32_t src_stride,
-                       enum pipe_format format, bool is_store)
+                       enum pipe_format format,
+                       enum pan_interleave_zs interleave, bool is_store)
 {
+   if (interleave != PAN_INTERLEAVE_NONE)
+      assert(format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
+
    const struct util_format_description *desc = util_format_description(format);
    unsigned bpp = desc->block.bits;
 
@@ -325,7 +398,7 @@
    if (desc->block.width > 1 ||
        !util_is_power_of_two_nonzero(desc->block.bits)) {
       pan_access_tiled_image_generic(dst, (void *)src, x, y, w, h, dst_stride,
-                                     src_stride, desc, is_store);
+                                     src_stride, desc, interleave, is_store);
 
       return;
    }
@@ -343,7 +416,8 @@
       unsigned dist = MIN2(first_full_tile_y - y, h);
 
       pan_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, w, dist,
-                                     dst_stride, src_stride, desc, is_store);
+                                     dst_stride, src_stride, desc, interleave,
+                                     is_store);
 
       if (dist == h)
          return;
@@ -358,7 +432,7 @@
 
       pan_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y), x,
                                      last_full_tile_y, w, dist, dst_stride,
-                                     src_stride, desc, is_store);
+                                     src_stride, desc, interleave, is_store);
 
       h -= dist;
    }
@@ -368,7 +442,8 @@
       unsigned dist = MIN2(first_full_tile_x - x, w);
 
       pan_access_tiled_image_generic(dst, OFFSET(src, x, y), x, y, dist, h,
-                                     dst_stride, src_stride, desc, is_store);
+                                     dst_stride, src_stride, desc, interleave,
+                                     is_store);
 
       if (dist == w)
          return;
@@ -383,14 +458,14 @@
 
       pan_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y),
                                      last_full_tile_x, y, dist, h, dst_stride,
-                                     src_stride, desc, is_store);
+                                     src_stride, desc, interleave, is_store);
 
       w -= dist;
    }
 
    pan_access_tiled_image_generic_aligned(dst, OFFSET(src, x, y), x, y, w,
                                           h, dst_stride, src_stride, desc,
-                                          is_store);
+                                          interleave, is_store);
 }
 
 /**
@@ -401,19 +476,21 @@
 void
 pan_store_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
                       unsigned w, unsigned h, uint32_t dst_stride,
-                      uint32_t src_stride, enum pipe_format format)
+                      uint32_t src_stride, enum pipe_format format,
+                      enum pan_interleave_zs interleave)
 {
    pan_access_tiled_image(dst, (void *)src, x, y, w, h, dst_stride, src_stride,
-                          format, true);
+                          format, interleave, true);
 }
 
 void
 pan_load_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
                      unsigned w, unsigned h, uint32_t dst_stride,
-                     uint32_t src_stride, enum pipe_format format)
+                     uint32_t src_stride, enum pipe_format format,
+                     enum pan_interleave_zs interleave)
 {
    pan_access_tiled_image((void *)src, dst, x, y, w, h, src_stride, dst_stride,
-                          format, false);
+                          format, interleave, false);
 }
 
 void
@@ -499,10 +576,10 @@
 
          pan_load_tiled_image(
             chunk, src, src_chunk_x, src_chunk_y, width, height,
-            chunk_row_stride_B, src_stride, format);
+            chunk_row_stride_B, src_stride, format, PAN_INTERLEAVE_NONE);
          pan_store_tiled_image(
             dst, chunk, dst_chunk_x, dst_chunk_y, width, height, dst_stride,
-            chunk_row_stride_B, format);
+            chunk_row_stride_B, format, PAN_INTERLEAVE_NONE);
       }
    }
 
diff --git a/src/panfrost/shared/pan_tiling.h b/src/panfrost/shared/pan_tiling.h
index d7f6601..2fbaf7e 100644
--- a/src/panfrost/shared/pan_tiling.h
+++ b/src/panfrost/shared/pan_tiling.h
@@ -34,6 +34,29 @@
 extern "C" {
 #endif
 
+/* The depth and stencil aspects of a Z24_UNORM_S8_UINT image are interleaved,
+ * where the bottom 24 bits are depth and the top 8 bits are stencil. When
+ * copying to/from a Z24S8 tiled image, the pan_interleave_zs enum specifies
+ * whether to (de)interleave the depth/stencil aspects */
+enum pan_interleave_zs {
+   /* Copy all aspects, no interleaving */
+   PAN_INTERLEAVE_NONE,
+   /* Copy only the depth aspect of a Z24S8 tiled image to/from linear Z24X8 */
+   PAN_INTERLEAVE_DEPTH,
+   /* Copy only the stencil aspect of a Z24S8 tiled image to/from linear S8 */
+   PAN_INTERLEAVE_STENCIL,
+};
+
+/**
+ * Get the appropriate pan_interleave_zs mode for copying to/from a given
+ * format.
+ *
+ * @depth Whether to copy the depth aspect
+ * @stencil Whether to copy the stencil aspect
+ */
+enum pan_interleave_zs
+pan_get_interleave_zs(enum pipe_format format, bool depth, bool stencil);
+
 /**
  * Load a rectangular region from a tiled image to a linear staging image.
  *
@@ -46,10 +69,12 @@
  * @dst_stride Stride in bytes of linear destination
  * @src_stride Number of bytes between adjacent rows of tiles in source.
  * @format Format of the source and destination image
+ * @interleave How to deinterleave ZS aspects from the tiled image
  */
 void pan_load_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
                           unsigned w, unsigned h, uint32_t dst_stride,
-                          uint32_t src_stride, enum pipe_format format);
+                          uint32_t src_stride, enum pipe_format format,
+                          enum pan_interleave_zs interleave);
 
 /**
  * Store a linear staging image to a rectangular region of a tiled image.
@@ -63,10 +88,12 @@
  * @dst_stride Number of bytes between adjacent rows of tiles in destination.
  * @src_stride Stride in bytes of linear source
  * @format Format of the source and destination image
+ * @interleave How to interleave a ZS aspects to the tiled image
  */
 void pan_store_tiled_image(void *dst, const void *src, unsigned x, unsigned y,
                            unsigned w, unsigned h, uint32_t dst_stride,
-                           uint32_t src_stride, enum pipe_format format);
+                           uint32_t src_stride, enum pipe_format format,
+                           enum pan_interleave_zs interleave);
 
 /**
  * Copy a rectangular region from one tiled image to another.
diff --git a/src/panfrost/shared/test/test-tiling.cpp b/src/panfrost/shared/test/test-tiling.cpp
index 4ce9e11..3c8254c 100644
--- a/src/panfrost/shared/test/test-tiling.cpp
+++ b/src/panfrost/shared/test/test-tiling.cpp
@@ -151,14 +151,14 @@
       }
 
       pan_store_tiled_image(tiled, linear, rx, ry, rw, rh, dst_stride,
-                            src_stride, format);
+                            src_stride, format, PAN_INTERLEAVE_NONE);
    } else {
       for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) {
          ((uint8_t *)tiled)[i] = (i & 0xFF);
       }
 
       pan_load_tiled_image(linear, tiled, rx, ry, rw, rh, dst_stride,
-                           src_stride, format);
+                           src_stride, format, PAN_INTERLEAVE_NONE);
    }
 
    ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh, dst_stride,
diff --git a/src/panfrost/vulkan/panvk_host_copy.c b/src/panfrost/vulkan/panvk_host_copy.c
index e3e5b10..02444ab 100644
--- a/src/panfrost/vulkan/panvk_host_copy.c
+++ b/src/panfrost/vulkan/panvk_host_copy.c
@@ -125,14 +125,14 @@
                   img.offset.x, img.offset.y, extent.width, extent.height,
                   slice_layout->tiled_or_linear.row_stride_B,
                   mem.layout.row_stride_B,
-                  pfmt);
+                  pfmt, PAN_INTERLEAVE_NONE);
             else
                pan_load_tiled_image(
                   mem_depth_ptr, img_depth_ptr,
                   img.offset.x, img.offset.y, extent.width, extent.height,
                   mem.layout.row_stride_B,
                   slice_layout->tiled_or_linear.row_stride_B,
-                  pfmt);
+                  pfmt, PAN_INTERLEAVE_NONE);
          }
       }
    }
@@ -355,7 +355,7 @@
                region->extent.width, region->extent.height,
                dst_slice_layout->tiled_or_linear.row_stride_B,
                src_slice_layout->tiled_or_linear.row_stride_B,
-               src_pfmt);
+               src_pfmt, PAN_INTERLEAVE_NONE);
          } else if (!src_linear && dst_linear) {
             unsigned dst_y_bl = region->dstOffset.y / block_height_px;
             unsigned dst_x_bl = region->dstOffset.x / block_width_px;
@@ -368,7 +368,7 @@
                region->extent.width, region->extent.height,
                dst_slice_layout->tiled_or_linear.row_stride_B,
                src_slice_layout->tiled_or_linear.row_stride_B,
-               dst_pfmt);
+               dst_pfmt, PAN_INTERLEAVE_NONE);
          } else {
             pan_copy_tiled_image(
                dst_depth_ptr, src_depth_ptr, region->dstOffset.x,