radeonsi: skip an unnecessary mutex lock for L2 prefetches

the mutex lock is inside util_range_add.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 06e4899..582e599 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -315,11 +315,13 @@
 	if (!size)
 		return;
 
-	/* Mark the buffer range of destination as valid (initialized),
-	 * so that transfer_map knows it should wait for the GPU when mapping
-	 * that range. */
-	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
-		       dst_offset + size);
+	if (dst != src || dst_offset != src_offset) {
+		/* Mark the buffer range of destination as valid (initialized),
+		 * so that transfer_map knows it should wait for the GPU when mapping
+		 * that range. */
+		util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+			       dst_offset + size);
+	}
 
 	dst_offset += r600_resource(dst)->gpu_address;
 	src_offset += r600_resource(src)->gpu_address;