blob: c8a1883b151511662949697e641b1892cecca298 [file] [log] [blame]
// RUN: hlo_to_llvm_ir %s | FileCheck %s
HloModule TestModule
compare {
p.0.lhs = f32[] parameter(0)
p.0.rhs = f32[] parameter(1)
ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT
}
// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) %[[VAL_0:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_1:.*]] = alloca i8, align 1
// CHECK: %[[VAL_2:.*]] = getelementptr inbounds i8, i8* %[[VAL_0]], i64 0
// CHECK: %[[VAL_3:.*]] = bitcast i8* %[[VAL_2]] to [2 x [3 x float]]*
// CHECK: %[[VAL_4:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6
// CHECK: %[[VAL_5:.*]] = zext i32 %[[VAL_4]] to i64
// CHECK: %[[VAL_6:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7
// CHECK: %[[VAL_7:.*]] = zext i32 %[[VAL_6]] to i64
// CHECK: %[[VAL_8:.*]] = mul nuw nsw i64 %[[VAL_5]], 4
// CHECK: %[[VAL_9:.*]] = add nuw nsw i64 %[[VAL_8]], %[[VAL_7]]
// CHECK: %[[VAL_10:.*]] = icmp ult i64 %[[VAL_9]], 4
// CHECK: call void @llvm.assume(i1 %[[VAL_10]])
// CHECK: %[[VAL_11:.*]] = udiv i64 %[[VAL_9]], 1
// CHECK: %[[VAL_12:.*]] = urem i64 %[[VAL_11]], 2
// CHECK: %[[VAL_13:.*]] = udiv i64 %[[VAL_9]], 2
// CHECK: %[[VAL_14:.*]] = icmp ult i64 %[[VAL_9]], 4
// CHECK: br i1 %[[VAL_14]], label %[[VAL_15:.*]], label %[[VAL_16:.*]]
// CHECK: sort.in_bounds-after: ; preds = %[[VAL_17:.*]], %[[VAL_18:.*]]
// CHECK: ret void
// CHECK: sort.in_bounds-true: ; preds = %[[VAL_18]]
// CHECK: %[[VAL_19:.*]] = mul i64 %[[VAL_12]], 2
// CHECK: %[[VAL_20:.*]] = xor i64 %[[VAL_19]], 1
// CHECK: %[[VAL_21:.*]] = icmp slt i64 %[[VAL_19]], %[[VAL_20]]
// CHECK: %[[VAL_22:.*]] = icmp slt i64 %[[VAL_20]], 3
// CHECK: %[[VAL_23:.*]] = and i1 %[[VAL_21]], %[[VAL_22]]
// CHECK: br i1 %[[VAL_23]], label %[[VAL_24:.*]], label %[[VAL_17]]
// CHECK: smaller_comparison_index-after: ; preds = %[[VAL_25:.*]], %[[VAL_15]]
// CHECK: br label %[[VAL_16]]
// CHECK: smaller_comparison_index-true: ; preds = %[[VAL_15]]
// CHECK: %[[VAL_26:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_20]]
// CHECK: %[[VAL_27:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_19]]
// CHECK: call void @region_0_4(float* %[[VAL_26]], float* %[[VAL_27]], i8* %[[VAL_1]])
// CHECK: %[[VAL_28:.*]] = load i8, i8* %[[VAL_1]], align 1
// CHECK: %[[VAL_29:.*]] = icmp ne i8 %[[VAL_28]], 0
// CHECK: br i1 %[[VAL_29]], label %[[VAL_30:.*]], label %[[VAL_25]]
// CHECK: is_smaller_than-after: ; preds = %[[VAL_30]], %[[VAL_24]]
// CHECK: br label %[[VAL_17]]
// CHECK: is_smaller_than-true: ; preds = %[[VAL_24]]
// CHECK: %[[VAL_31:.*]] = load float, float* %[[VAL_26]], align 4
// CHECK: %[[VAL_32:.*]] = load float, float* %[[VAL_27]], align 4
// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_19]]
// CHECK: store float %[[VAL_31]], float* %[[VAL_33]], align 4
// CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_20]]
// CHECK: store float %[[VAL_32]], float* %[[VAL_34]], align 4
// CHECK: br label %[[VAL_25]]
// CHECK: }
// CHECK: ; Function Attrs: nounwind readnone
// CHECK: declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
// CHECK: ; Function Attrs: nounwind readnone
// CHECK: declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
// CHECK: ; Function Attrs: nofree nosync nounwind willreturn
// CHECK: declare void @llvm.assume(i1 noundef) #1
// CHECK: define internal void @region_0_4(float* dereferenceable(4) %[[VAL_0:.*]], float* dereferenceable(4) %[[VAL_1:.*]], i8* dereferenceable(1) %[[VAL_2:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_3:.*]] = alloca i8, align 1
// CHECK: %[[VAL_4:.*]] = load float, float* %[[VAL_0]], align 4
// CHECK: %[[VAL_5:.*]] = load float, float* %[[VAL_1]], align 4
// CHECK: %[[VAL_6:.*]] = fcmp olt float %[[VAL_4]], %[[VAL_5]]
// CHECK: %[[VAL_7:.*]] = zext i1 %[[VAL_6]] to i8
// CHECK: store i8 %[[VAL_7]], i8* %[[VAL_3]], align 1
// CHECK: %[[VAL_8:.*]] = load i8, i8* %[[VAL_3]], align 1
// CHECK: store i8 %[[VAL_8]], i8* %[[VAL_2]], align 1
// CHECK: ret void
// CHECK: }
// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) %[[VAL_0:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_1:.*]] = alloca i8, align 1
// CHECK: %[[VAL_2:.*]] = getelementptr inbounds i8, i8* %[[VAL_0]], i64 0
// CHECK: %[[VAL_3:.*]] = bitcast i8* %[[VAL_2]] to [2 x [3 x float]]*
// CHECK: %[[VAL_4:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6
// CHECK: %[[VAL_5:.*]] = zext i32 %[[VAL_4]] to i64
// CHECK: %[[VAL_6:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7
// CHECK: %[[VAL_7:.*]] = zext i32 %[[VAL_6]] to i64
// CHECK: %[[VAL_8:.*]] = mul nuw nsw i64 %[[VAL_5]], 4
// CHECK: %[[VAL_9:.*]] = add nuw nsw i64 %[[VAL_8]], %[[VAL_7]]
// CHECK: %[[VAL_10:.*]] = icmp ult i64 %[[VAL_9]], 4
// CHECK: call void @llvm.assume(i1 %[[VAL_10]])
// CHECK: %[[VAL_11:.*]] = udiv i64 %[[VAL_9]], 1
// CHECK: %[[VAL_12:.*]] = urem i64 %[[VAL_11]], 2
// CHECK: %[[VAL_13:.*]] = udiv i64 %[[VAL_9]], 2
// CHECK: %[[VAL_14:.*]] = icmp ult i64 %[[VAL_9]], 4
// CHECK: br i1 %[[VAL_14]], label %[[VAL_15:.*]], label %[[VAL_16:.*]]
// CHECK: sort.in_bounds-after: ; preds = %[[VAL_17:.*]], %[[VAL_18:.*]]
// CHECK: ret void
// CHECK: sort.in_bounds-true: ; preds = %[[VAL_18]]
// CHECK: %[[VAL_19:.*]] = xor i64 %[[VAL_12]], 3
// CHECK: %[[VAL_20:.*]] = icmp slt i64 %[[VAL_12]], %[[VAL_19]]
// CHECK: %[[VAL_21:.*]] = icmp slt i64 %[[VAL_19]], 3
// CHECK: %[[VAL_22:.*]] = and i1 %[[VAL_20]], %[[VAL_21]]
// CHECK: br i1 %[[VAL_22]], label %[[VAL_23:.*]], label %[[VAL_17]]
// CHECK: smaller_comparison_index-after: ; preds = %[[VAL_24:.*]], %[[VAL_15]]
// CHECK: br label %[[VAL_16]]
// CHECK: smaller_comparison_index-true: ; preds = %[[VAL_15]]
// CHECK: %[[VAL_25:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_19]]
// CHECK: %[[VAL_26:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_12]]
// CHECK: call void @region_0_4(float* %[[VAL_25]], float* %[[VAL_26]], i8* %[[VAL_1]])
// CHECK: %[[VAL_27:.*]] = load i8, i8* %[[VAL_1]], align 1
// CHECK: %[[VAL_28:.*]] = icmp ne i8 %[[VAL_27]], 0
// CHECK: br i1 %[[VAL_28]], label %[[VAL_29:.*]], label %[[VAL_24]]
// CHECK: is_smaller_than-after: ; preds = %[[VAL_29]], %[[VAL_23]]
// CHECK: br label %[[VAL_17]]
// CHECK: is_smaller_than-true: ; preds = %[[VAL_23]]
// CHECK: %[[VAL_30:.*]] = load float, float* %[[VAL_25]], align 4
// CHECK: %[[VAL_31:.*]] = load float, float* %[[VAL_26]], align 4
// CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_12]]
// CHECK: store float %[[VAL_30]], float* %[[VAL_32]], align 4
// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_19]]
// CHECK: store float %[[VAL_31]], float* %[[VAL_33]], align 4
// CHECK: br label %[[VAL_24]]
// CHECK: }
// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) %[[VAL_0:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_1:.*]] = alloca i8, align 1
// CHECK: %[[VAL_2:.*]] = getelementptr inbounds i8, i8* %[[VAL_0]], i64 0
// CHECK: %[[VAL_3:.*]] = bitcast i8* %[[VAL_2]] to [2 x [3 x float]]*
// CHECK: %[[VAL_4:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6
// CHECK: %[[VAL_5:.*]] = zext i32 %[[VAL_4]] to i64
// CHECK: %[[VAL_6:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7
// CHECK: %[[VAL_7:.*]] = zext i32 %[[VAL_6]] to i64
// CHECK: %[[VAL_8:.*]] = mul nuw nsw i64 %[[VAL_5]], 4
// CHECK: %[[VAL_9:.*]] = add nuw nsw i64 %[[VAL_8]], %[[VAL_7]]
// CHECK: %[[VAL_10:.*]] = icmp ult i64 %[[VAL_9]], 4
// CHECK: call void @llvm.assume(i1 %[[VAL_10]])
// CHECK: %[[VAL_11:.*]] = udiv i64 %[[VAL_9]], 1
// CHECK: %[[VAL_12:.*]] = urem i64 %[[VAL_11]], 2
// CHECK: %[[VAL_13:.*]] = udiv i64 %[[VAL_9]], 2
// CHECK: %[[VAL_14:.*]] = icmp ult i64 %[[VAL_9]], 4
// CHECK: br i1 %[[VAL_14]], label %[[VAL_15:.*]], label %[[VAL_16:.*]]
// CHECK: sort.in_bounds-after: ; preds = %[[VAL_17:.*]], %[[VAL_18:.*]]
// CHECK: ret void
// CHECK: sort.in_bounds-true: ; preds = %[[VAL_18]]
// CHECK: %[[VAL_19:.*]] = mul i64 %[[VAL_12]], 2
// CHECK: %[[VAL_20:.*]] = xor i64 %[[VAL_19]], 1
// CHECK: %[[VAL_21:.*]] = icmp slt i64 %[[VAL_19]], %[[VAL_20]]
// CHECK: %[[VAL_22:.*]] = icmp slt i64 %[[VAL_20]], 3
// CHECK: %[[VAL_23:.*]] = and i1 %[[VAL_21]], %[[VAL_22]]
// CHECK: br i1 %[[VAL_23]], label %[[VAL_24:.*]], label %[[VAL_17]]
// CHECK: smaller_comparison_index-after: ; preds = %[[VAL_25:.*]], %[[VAL_15]]
// CHECK: br label %[[VAL_16]]
// CHECK: smaller_comparison_index-true: ; preds = %[[VAL_15]]
// CHECK: %[[VAL_26:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_20]]
// CHECK: %[[VAL_27:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_19]]
// CHECK: call void @region_0_4(float* %[[VAL_26]], float* %[[VAL_27]], i8* %[[VAL_1]])
// CHECK: %[[VAL_28:.*]] = load i8, i8* %[[VAL_1]], align 1
// CHECK: %[[VAL_29:.*]] = icmp ne i8 %[[VAL_28]], 0
// CHECK: br i1 %[[VAL_29]], label %[[VAL_30:.*]], label %[[VAL_25]]
// CHECK: is_smaller_than-after: ; preds = %[[VAL_30]], %[[VAL_24]]
// CHECK: br label %[[VAL_17]]
// CHECK: is_smaller_than-true: ; preds = %[[VAL_24]]
// CHECK: %[[VAL_31:.*]] = load float, float* %[[VAL_26]], align 4
// CHECK: %[[VAL_32:.*]] = load float, float* %[[VAL_27]], align 4
// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_19]]
// CHECK: store float %[[VAL_31]], float* %[[VAL_33]], align 4
// CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_3]], i64 0, i64 %[[VAL_13]], i64 %[[VAL_20]]
// CHECK: store float %[[VAL_32]], float* %[[VAL_34]], align 4
// CHECK: br label %[[VAL_25]]
// CHECK: }
ENTRY main {
x = f32[2, 3] parameter(0)
ROOT sort = f32[2, 3] sort(x), dimensions={1}, to_apply=compare
}
// -----
HloModule TestModule
compare {
p.0.lhs = s32[] parameter(0)
p.0.rhs = s32[] parameter(1)
p.1.lhs = f32[] parameter(2)
p.1.rhs = f32[] parameter(3)
ROOT lt = pred[] compare(p.1.lhs, p.1.rhs), direction=LT
}
// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) %[[VAL_0:.*]], i8* noalias align 64 dereferenceable(24) %[[VAL_1:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_3:.*]] = alloca i8, align 1
// CHECK: %[[VAL_4:.*]] = getelementptr inbounds i8, i8* %[[VAL_0]], i64 0
// CHECK: %[[VAL_5:.*]] = bitcast i8* %[[VAL_4]] to [2 x [3 x i32]]*
// CHECK: %[[VAL_6:.*]] = getelementptr inbounds i8, i8* %[[VAL_1]], i64 0
// CHECK: %[[VAL_7:.*]] = bitcast i8* %[[VAL_6]] to [2 x [3 x float]]*
// CHECK: %[[VAL_10:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6
// CHECK: %[[VAL_11:.*]] = zext i32 %[[VAL_10]] to i64
// CHECK: %[[VAL_12:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7
// CHECK: %[[VAL_13:.*]] = zext i32 %[[VAL_12]] to i64
// CHECK: %[[VAL_14:.*]] = mul nuw nsw i64 %[[VAL_11]], 4
// CHECK: %[[VAL_15:.*]] = add nuw nsw i64 %[[VAL_14]], %[[VAL_13]]
// CHECK: %[[VAL_16:.*]] = icmp ult i64 %[[VAL_15]], 4
// CHECK: call void @llvm.assume(i1 %[[VAL_16]])
// CHECK: %[[VAL_17:.*]] = udiv i64 %[[VAL_15]], 1
// CHECK: %[[VAL_18:.*]] = urem i64 %[[VAL_17]], 2
// CHECK: %[[VAL_19:.*]] = udiv i64 %[[VAL_15]], 2
// CHECK: %[[VAL_20:.*]] = icmp ult i64 %[[VAL_15]], 4
// CHECK: br i1 %[[VAL_20]], label %[[VAL_21:.*]], label %[[VAL_22:.*]]
// CHECK: sort.in_bounds-after: ; preds = %[[VAL_23:.*]], %[[VAL_24:.*]]
// CHECK: ret void
// CHECK: sort.in_bounds-true: ; preds = %[[VAL_24]]
// CHECK: %[[VAL_25:.*]] = mul i64 %[[VAL_18]], 2
// CHECK: %[[VAL_26:.*]] = xor i64 %[[VAL_25]], 1
// CHECK: %[[VAL_27:.*]] = icmp slt i64 %[[VAL_25]], %[[VAL_26]]
// CHECK: %[[VAL_28:.*]] = icmp slt i64 %[[VAL_26]], 3
// CHECK: %[[VAL_29:.*]] = and i1 %[[VAL_27]], %[[VAL_28]]
// CHECK: br i1 %[[VAL_29]], label %[[VAL_30:.*]], label %[[VAL_23]]
// CHECK: smaller_comparison_index-after: ; preds = %[[VAL_31:.*]], %[[VAL_21]]
// CHECK: br label %[[VAL_22]]
// CHECK: smaller_comparison_index-true: ; preds = %[[VAL_21]]
// CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: %[[VAL_35:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: call void @region_0_6(i32* %[[VAL_32]], i32* %[[VAL_33]], float* %[[VAL_34]], float* %[[VAL_35]], i8* %[[VAL_3]])
// CHECK: %[[VAL_36:.*]] = load i8, i8* %[[VAL_3]], align 1
// CHECK: %[[VAL_37:.*]] = icmp ne i8 %[[VAL_36]], 0
// CHECK: br i1 %[[VAL_37]], label %[[VAL_38:.*]], label %[[VAL_31]]
// CHECK: is_smaller_than-after: ; preds = %[[VAL_38]], %[[VAL_30]]
// CHECK: br label %[[VAL_23]]
// CHECK: is_smaller_than-true: ; preds = %[[VAL_30]]
// CHECK: %[[VAL_39:.*]] = load i32, i32* %[[VAL_32]], align 4
// CHECK: %[[VAL_40:.*]] = load i32, i32* %[[VAL_33]], align 4
// CHECK: %[[VAL_41:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: store i32 %[[VAL_39]], i32* %[[VAL_41]], align 4
// CHECK: %[[VAL_42:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: store i32 %[[VAL_40]], i32* %[[VAL_42]], align 4
// CHECK: %[[VAL_43:.*]] = load float, float* %[[VAL_34]], align 4
// CHECK: %[[VAL_44:.*]] = load float, float* %[[VAL_35]], align 4
// CHECK: %[[VAL_45:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: store float %[[VAL_43]], float* %[[VAL_45]], align 4
// CHECK: %[[VAL_46:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: store float %[[VAL_44]], float* %[[VAL_46]], align 4
// CHECK: br label %[[VAL_31]]
// CHECK: }
// CHECK: ; Function Attrs: nounwind readnone
// CHECK: declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
// CHECK: ; Function Attrs: nounwind readnone
// CHECK: declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
// CHECK: ; Function Attrs: nofree nosync nounwind willreturn
// CHECK: declare void @llvm.assume(i1 noundef) #1
// CHECK: define internal void @region_0_6(i32* dereferenceable(4) %[[VAL_0:.*]], i32* dereferenceable(4) %[[VAL_1:.*]], float* dereferenceable(4) %[[VAL_2:.*]], float* dereferenceable(4) %[[VAL_3:.*]], i8* dereferenceable(1) %[[VAL_4:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_5:.*]] = alloca i8, align 1
// CHECK: %[[VAL_6:.*]] = load float, float* %[[VAL_2]], align 4
// CHECK: %[[VAL_7:.*]] = load float, float* %[[VAL_3]], align 4
// CHECK: %[[VAL_8:.*]] = fcmp olt float %[[VAL_6]], %[[VAL_7]]
// CHECK: %[[VAL_9:.*]] = zext i1 %[[VAL_8]] to i8
// CHECK: store i8 %[[VAL_9]], i8* %[[VAL_5]], align 1
// CHECK: %[[VAL_10:.*]] = load i8, i8* %[[VAL_5]], align 1
// CHECK: store i8 %[[VAL_10]], i8* %[[VAL_4]], align 1
// CHECK: ret void
// CHECK: }
// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) %[[VAL_0:.*]], i8* noalias align 64 dereferenceable(24) %[[VAL_1:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_3:.*]] = alloca i8, align 1
// CHECK: %[[VAL_4:.*]] = getelementptr inbounds i8, i8* %[[VAL_0]], i64 0
// CHECK: %[[VAL_5:.*]] = bitcast i8* %[[VAL_4]] to [2 x [3 x i32]]*
// CHECK: %[[VAL_6:.*]] = getelementptr inbounds i8, i8* %[[VAL_1]], i64 0
// CHECK: %[[VAL_7:.*]] = bitcast i8* %[[VAL_6]] to [2 x [3 x float]]*
// CHECK: %[[VAL_10:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6
// CHECK: %[[VAL_11:.*]] = zext i32 %[[VAL_10]] to i64
// CHECK: %[[VAL_12:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7
// CHECK: %[[VAL_13:.*]] = zext i32 %[[VAL_12]] to i64
// CHECK: %[[VAL_14:.*]] = mul nuw nsw i64 %[[VAL_11]], 4
// CHECK: %[[VAL_15:.*]] = add nuw nsw i64 %[[VAL_14]], %[[VAL_13]]
// CHECK: %[[VAL_16:.*]] = icmp ult i64 %[[VAL_15]], 4
// CHECK: call void @llvm.assume(i1 %[[VAL_16]])
// CHECK: %[[VAL_17:.*]] = udiv i64 %[[VAL_15]], 1
// CHECK: %[[VAL_18:.*]] = urem i64 %[[VAL_17]], 2
// CHECK: %[[VAL_19:.*]] = udiv i64 %[[VAL_15]], 2
// CHECK: %[[VAL_20:.*]] = icmp ult i64 %[[VAL_15]], 4
// CHECK: br i1 %[[VAL_20]], label %[[VAL_21:.*]], label %[[VAL_22:.*]]
// CHECK: sort.in_bounds-after: ; preds = %[[VAL_23:.*]], %[[VAL_24:.*]]
// CHECK: ret void
// CHECK: sort.in_bounds-true: ; preds = %[[VAL_24]]
// CHECK: %[[VAL_25:.*]] = xor i64 %[[VAL_18]], 3
// CHECK: %[[VAL_26:.*]] = icmp slt i64 %[[VAL_18]], %[[VAL_25]]
// CHECK: %[[VAL_27:.*]] = icmp slt i64 %[[VAL_25]], 3
// CHECK: %[[VAL_28:.*]] = and i1 %[[VAL_26]], %[[VAL_27]]
// CHECK: br i1 %[[VAL_28]], label %[[VAL_29:.*]], label %[[VAL_23]]
// CHECK: smaller_comparison_index-after: ; preds = %[[VAL_30:.*]], %[[VAL_21]]
// CHECK: br label %[[VAL_22]]
// CHECK: smaller_comparison_index-true: ; preds = %[[VAL_21]]
// CHECK: %[[VAL_31:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_18]]
// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_18]]
// CHECK: call void @region_0_6(i32* %[[VAL_31]], i32* %[[VAL_32]], float* %[[VAL_33]], float* %[[VAL_34]], i8* %[[VAL_3]])
// CHECK: %[[VAL_35:.*]] = load i8, i8* %[[VAL_3]], align 1
// CHECK: %[[VAL_36:.*]] = icmp ne i8 %[[VAL_35]], 0
// CHECK: br i1 %[[VAL_36]], label %[[VAL_37:.*]], label %[[VAL_30]]
// CHECK: is_smaller_than-after: ; preds = %[[VAL_37]], %[[VAL_29]]
// CHECK: br label %[[VAL_23]]
// CHECK: is_smaller_than-true: ; preds = %[[VAL_29]]
// CHECK: %[[VAL_38:.*]] = load i32, i32* %[[VAL_31]], align 4
// CHECK: %[[VAL_39:.*]] = load i32, i32* %[[VAL_32]], align 4
// CHECK: %[[VAL_40:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_18]]
// CHECK: store i32 %[[VAL_38]], i32* %[[VAL_40]], align 4
// CHECK: %[[VAL_41:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: store i32 %[[VAL_39]], i32* %[[VAL_41]], align 4
// CHECK: %[[VAL_42:.*]] = load float, float* %[[VAL_33]], align 4
// CHECK: %[[VAL_43:.*]] = load float, float* %[[VAL_34]], align 4
// CHECK: %[[VAL_44:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_18]]
// CHECK: store float %[[VAL_42]], float* %[[VAL_44]], align 4
// CHECK: %[[VAL_45:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: store float %[[VAL_43]], float* %[[VAL_45]], align 4
// CHECK: br label %[[VAL_30]]
// CHECK: }
// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) %[[VAL_0:.*]], i8* noalias align 64 dereferenceable(24) %[[VAL_1:.*]]) {
// CHECK: entry:
// CHECK: %[[VAL_3:.*]] = alloca i8, align 1
// CHECK: %[[VAL_4:.*]] = getelementptr inbounds i8, i8* %[[VAL_0]], i64 0
// CHECK: %[[VAL_5:.*]] = bitcast i8* %[[VAL_4]] to [2 x [3 x i32]]*
// CHECK: %[[VAL_6:.*]] = getelementptr inbounds i8, i8* %[[VAL_1]], i64 0
// CHECK: %[[VAL_7:.*]] = bitcast i8* %[[VAL_6]] to [2 x [3 x float]]*
// CHECK: %[[VAL_10:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6
// CHECK: %[[VAL_11:.*]] = zext i32 %[[VAL_10]] to i64
// CHECK: %[[VAL_12:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7
// CHECK: %[[VAL_13:.*]] = zext i32 %[[VAL_12]] to i64
// CHECK: %[[VAL_14:.*]] = mul nuw nsw i64 %[[VAL_11]], 4
// CHECK: %[[VAL_15:.*]] = add nuw nsw i64 %[[VAL_14]], %[[VAL_13]]
// CHECK: %[[VAL_16:.*]] = icmp ult i64 %[[VAL_15]], 4
// CHECK: call void @llvm.assume(i1 %[[VAL_16]])
// CHECK: %[[VAL_17:.*]] = udiv i64 %[[VAL_15]], 1
// CHECK: %[[VAL_18:.*]] = urem i64 %[[VAL_17]], 2
// CHECK: %[[VAL_19:.*]] = udiv i64 %[[VAL_15]], 2
// CHECK: %[[VAL_20:.*]] = icmp ult i64 %[[VAL_15]], 4
// CHECK: br i1 %[[VAL_20]], label %[[VAL_21:.*]], label %[[VAL_22:.*]]
// CHECK: sort.in_bounds-after: ; preds = %[[VAL_23:.*]], %[[VAL_24:.*]]
// CHECK: ret void
// CHECK: sort.in_bounds-true: ; preds = %[[VAL_24]]
// CHECK: %[[VAL_25:.*]] = mul i64 %[[VAL_18]], 2
// CHECK: %[[VAL_26:.*]] = xor i64 %[[VAL_25]], 1
// CHECK: %[[VAL_27:.*]] = icmp slt i64 %[[VAL_25]], %[[VAL_26]]
// CHECK: %[[VAL_28:.*]] = icmp slt i64 %[[VAL_26]], 3
// CHECK: %[[VAL_29:.*]] = and i1 %[[VAL_27]], %[[VAL_28]]
// CHECK: br i1 %[[VAL_29]], label %[[VAL_30:.*]], label %[[VAL_23]]
// CHECK: smaller_comparison_index-after: ; preds = %[[VAL_31:.*]], %[[VAL_21]]
// CHECK: br label %[[VAL_22]]
// CHECK: smaller_comparison_index-true: ; preds = %[[VAL_21]]
// CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: %[[VAL_35:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: call void @region_0_6(i32* %[[VAL_32]], i32* %[[VAL_33]], float* %[[VAL_34]], float* %[[VAL_35]], i8* %[[VAL_3]])
// CHECK: %[[VAL_36:.*]] = load i8, i8* %[[VAL_3]], align 1
// CHECK: %[[VAL_37:.*]] = icmp ne i8 %[[VAL_36]], 0
// CHECK: br i1 %[[VAL_37]], label %[[VAL_38:.*]], label %[[VAL_31]]
// CHECK: is_smaller_than-after: ; preds = %[[VAL_38]], %[[VAL_30]]
// CHECK: br label %[[VAL_23]]
// CHECK: is_smaller_than-true: ; preds = %[[VAL_30]]
// CHECK: %[[VAL_39:.*]] = load i32, i32* %[[VAL_32]], align 4
// CHECK: %[[VAL_40:.*]] = load i32, i32* %[[VAL_33]], align 4
// CHECK: %[[VAL_41:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: store i32 %[[VAL_39]], i32* %[[VAL_41]], align 4
// CHECK: %[[VAL_42:.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* %[[VAL_5]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: store i32 %[[VAL_40]], i32* %[[VAL_42]], align 4
// CHECK: %[[VAL_43:.*]] = load float, float* %[[VAL_34]], align 4
// CHECK: %[[VAL_44:.*]] = load float, float* %[[VAL_35]], align 4
// CHECK: %[[VAL_45:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_25]]
// CHECK: store float %[[VAL_43]], float* %[[VAL_45]], align 4
// CHECK: %[[VAL_46:.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* %[[VAL_7]], i64 0, i64 %[[VAL_19]], i64 %[[VAL_26]]
// CHECK: store float %[[VAL_44]], float* %[[VAL_46]], align 4
// CHECK: br label %[[VAL_31]]
// CHECK: }
ENTRY main {
x = s32[2, 3] parameter(0)
y = f32[2, 3] parameter(1)
ROOT sort = (s32[2, 3], f32[2, 3]) sort(x, y), dimensions={1}, to_apply=compare
}