radeonsi: si_cp_dma_prepare is a no-op for L2 prefetches

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 4c79dfe..06e4899 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -133,6 +133,12 @@
 			      uint64_t remaining_size, unsigned user_flags,
 			      bool *is_first, unsigned *packet_flags)
 {
+	/* Fast exit for a CPDMA prefetch. */
+	if ((user_flags & SI_CPDMA_SKIP_ALL) == SI_CPDMA_SKIP_ALL) {
+		*is_first = false;
+		return;
+	}
+
 	if (!(user_flags & SI_CPDMA_SKIP_BO_LIST_UPDATE)) {
 		/* Count memory usage in so that need_cs_space can take it into account. */
 		r600_context_add_resource_size(&sctx->b.b, dst);
@@ -395,11 +401,7 @@
 {
 	assert(sctx->b.chip_class >= CIK);
 
-	si_copy_buffer(sctx, buf, buf, offset, offset, size,
-		       SI_CPDMA_SKIP_CHECK_CS_SPACE |
-		       SI_CPDMA_SKIP_SYNC_AFTER |
-		       SI_CPDMA_SKIP_SYNC_BEFORE |
-		       SI_CPDMA_SKIP_GFX_SYNC);
+	si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL);
 }
 
 void si_init_cp_dma_functions(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 64218ee..c9ae27e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -373,6 +373,11 @@
 #define SI_CPDMA_SKIP_SYNC_BEFORE	(1 << 2) /* don't wait for DMA before the copy (RAW hazards) */
 #define SI_CPDMA_SKIP_GFX_SYNC		(1 << 3) /* don't flush caches and don't wait for PS/CS */
 #define SI_CPDMA_SKIP_BO_LIST_UPDATE	(1 << 4) /* don't update the BO list */
+#define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \
+			   SI_CPDMA_SKIP_SYNC_AFTER | \
+			   SI_CPDMA_SKIP_SYNC_BEFORE | \
+			   SI_CPDMA_SKIP_GFX_SYNC | \
+			   SI_CPDMA_SKIP_BO_LIST_UPDATE)
 
 void si_copy_buffer(struct si_context *sctx,
 		    struct pipe_resource *dst, struct pipe_resource *src,