ac/llvm: add missing optimization barrier for 64-bit readlanes

Otherwise, LLVM optimizes it but it's actually incorrect.

Fixes: 0f45d4dc2b1 ("ac: add ac_build_readlane without optimization barrier")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3585>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3585>
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 760d912..93e2e28 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -3611,11 +3611,15 @@
 }
 
 static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
+		  LLVMValueRef lane, bool with_opt_barrier)
 {
 	LLVMTypeRef type = LLVMTypeOf(src);
 	LLVMValueRef result;
 
+	if (with_opt_barrier)
+		ac_build_optimization_barrier(ctx, &src);
+
 	src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
 	if (lane)
 		lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
@@ -3630,6 +3634,43 @@
 	return LLVMBuildTrunc(ctx->builder, result, type, "");
 }
 
+static LLVMValueRef
+ac_build_readlane_common(struct ac_llvm_context *ctx,
+			 LLVMValueRef src, LLVMValueRef lane,
+			 bool with_opt_barrier)
+{
+	LLVMTypeRef src_type = LLVMTypeOf(src);
+	src = ac_to_integer(ctx, src);
+	unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+	LLVMValueRef ret;
+
+	if (bits > 32) {
+		assert(bits % 32 == 0);
+		LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+		LLVMValueRef src_vector =
+			LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+		ret = LLVMGetUndef(vec_type);
+		for (unsigned i = 0; i < bits / 32; i++) {
+			LLVMValueRef ret_comp;
+
+			src = LLVMBuildExtractElement(ctx->builder, src_vector,
+						LLVMConstInt(ctx->i32, i, 0), "");
+
+			ret_comp = _ac_build_readlane(ctx, src, lane,
+						      with_opt_barrier);
+
+			ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
+						LLVMConstInt(ctx->i32, i, 0), "");
+		}
+	} else {
+		ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
+	}
+
+	if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+		return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
+	return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
 /**
  * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
  *
@@ -3642,44 +3683,16 @@
  * @return value of the lane
  */
 LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
-					      LLVMValueRef src, LLVMValueRef lane)
+                                             LLVMValueRef src, LLVMValueRef lane)
 {
-	unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
-	LLVMValueRef ret;
-
-	if (bits > 32) {
-		assert(bits % 32 == 0);
-		LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
-		LLVMValueRef src_vector =
-			LLVMBuildBitCast(ctx->builder, src, vec_type, "");
-		ret = LLVMGetUndef(vec_type);
-		for (unsigned i = 0; i < bits / 32; i++) {
-			src = LLVMBuildExtractElement(ctx->builder, src_vector,
-						LLVMConstInt(ctx->i32, i, 0), "");
-			LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
-			ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
-						LLVMConstInt(ctx->i32, i, 0), "");
-		}
-	} else {
-		ret = _ac_build_readlane(ctx, src, lane);
-	}
-
-	return ret;
+	return ac_build_readlane_common(ctx, src, lane, false);
 }
 
+
 LLVMValueRef
 ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
 {
-	LLVMTypeRef src_type = LLVMTypeOf(src);
-	src = ac_to_integer(ctx, src);
-	LLVMValueRef ret;
-
-	ac_build_optimization_barrier(ctx, &src);
-
-	ret = ac_build_readlane_no_opt_barrier(ctx, src, lane);
-	if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
-		return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
-	return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+	return ac_build_readlane_common(ctx, src, lane, true);
 }
 
 LLVMValueRef