blob: fb4f1098692521ffb482d57c9278a91e9de78e14 [file] [log] [blame]
// RUN: hlo_to_llvm_ir %s | FileCheck %s
// CHECK-LABEL: entry:
// CHECK: %[[VAL_0:.*]] = alloca float, align 4
// CHECK: %[[VAL_1:.*]] = alloca float, align 4
// CHECK: %[[VAL_2:.*]] = alloca float, align 4
// CHECK: %[[VAL_3:.*]] = alloca float, align 4
// CHECK: %[[VAL_4:.*]] = alloca float, align 4
// CHECK: %[[VAL_5:.*]] = alloca float, align 4
// CHECK: %[[VAL_6:.*]] = alloca [2 x i8*], align 8
// CHECK: %[[VAL_7:.*]] = alloca i32, align 4
// CHECK: %[[VAL_8:.*]] = alloca i32, align 4
// CHECK: %[[VAL_9:.*]] = alloca float, align 4
// CHECK: %[[VAL_10:.*]] = alloca float, align 4
// CHECK: %[[VAL_11:.*]] = getelementptr inbounds i8, i8* %[[VAL_12:.*]], i64 0
// CHECK: %[[VAL_13:.*]] = bitcast i8* %[[VAL_11]] to [100 x [200 x [300 x float]]]*
// CHECK: %[[VAL_14:.*]] = getelementptr inbounds i8, i8* %[[VAL_15:.*]], i64 0
// CHECK: %[[VAL_16:.*]] = bitcast i8* %[[VAL_14]] to [100 x [200 x [300 x float]]]*
// CHECK: %[[VAL_17:.*]] = getelementptr inbounds i8, i8* %[[VAL_18:.*]], i64 0
// CHECK: %[[VAL_19:.*]] = bitcast i8* %[[VAL_17]] to [200 x float]*
// CHECK: %[[VAL_20:.*]] = getelementptr inbounds i8, i8* %[[VAL_21:.*]], i64 0
// CHECK: %[[VAL_22:.*]] = bitcast i8* %[[VAL_20]] to [200 x float]*
// CHECK: %[[VAL_23:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !2
// CHECK: %[[VAL_24:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !3
// CHECK: %[[VAL_25:.*]] = mul nuw nsw i32 %[[VAL_23]], 200
// CHECK: %[[VAL_26:.*]] = add nuw nsw i32 %[[VAL_25]], %[[VAL_24]]
// CHECK: %[[VAL_27:.*]] = icmp ult i32 %[[VAL_26]], 200
// CHECK: call void @llvm.assume(i1 %[[VAL_27]])
// CHECK: %[[VAL_28:.*]] = udiv i32 %[[VAL_26]], 1
// CHECK: %[[VAL_29:.*]] = icmp ult i32 %[[VAL_26]], 200
// CHECK: br i1 %[[VAL_29]], label %[[VAL_30:.*]], label %[[VAL_31:.*]]
// CHECK: d.in_bounds-after: ; preds = %[[VAL_32:.*]], %[[VAL_33:.*]]
// CHECK: ret void
// CHECK: d.in_bounds-true: ; preds = %[[VAL_33]]
// CHECK: %[[VAL_34:.*]] = load float, float* bitcast ([4 x i8]* @buffer_for_c to float*), align 4, !invariant.load !4
// CHECK: store float %[[VAL_34]], float* %[[VAL_10]], align 4
// CHECK: %[[VAL_35:.*]] = load float, float* bitcast ([4 x i8]* @buffer_for_c to float*), align 4, !invariant.load !4
// CHECK: store float %[[VAL_35]], float* %[[VAL_9]], align 4
// CHECK: store i32 0, i32* %[[VAL_8]], align 4
// CHECK: br label %[[VAL_36:.*]]
// CHECK: reduce.13.inner.loop_header.reduction_dim.0: ; preds = %[[VAL_37:.*]], %[[VAL_30]]
// CHECK: %[[VAL_38:.*]] = load i32, i32* %[[VAL_8]], align 4
// CHECK: %[[VAL_39:.*]] = icmp uge i32 %[[VAL_38]], 100
// CHECK: br i1 %[[VAL_39]], label %[[VAL_32]], label %[[VAL_40:.*]]
// CHECK: reduce.13.inner.loop_body.reduction_dim.0: ; preds = %[[VAL_36]]
// CHECK: store i32 0, i32* %[[VAL_7]], align 4
// CHECK: br label %[[VAL_41:.*]]
// CHECK: reduce.13.inner.loop_header.reduction_dim.2: ; preds = %[[VAL_42:.*]], %[[VAL_40]]
// CHECK: %[[VAL_43:.*]] = load i32, i32* %[[VAL_7]], align 4
// CHECK: %[[VAL_44:.*]] = icmp uge i32 %[[VAL_43]], 300
// CHECK: br i1 %[[VAL_44]], label %[[VAL_37]], label %[[VAL_42]]
// CHECK: reduce.13.inner.loop_body.reduction_dim.2: ; preds = %[[VAL_41]]
// CHECK: %[[VAL_45:.*]] = load float, float* %[[VAL_10]], align 4
// CHECK: %[[VAL_46:.*]] = load float, float* %[[VAL_9]], align 4
// CHECK: %[[VAL_47:.*]] = getelementptr inbounds [100 x [200 x [300 x float]]], [100 x [200 x [300 x float]]]* %[[VAL_13]], i32 0, i32 %[[VAL_38]], i32 %[[VAL_28]], i32 %[[VAL_43]]
// CHECK: %[[VAL_48:.*]] = load float, float* %[[VAL_47]], align 4, !invariant.load !4
// CHECK: %[[VAL_49:.*]] = getelementptr inbounds [100 x [200 x [300 x float]]], [100 x [200 x [300 x float]]]* %[[VAL_16]], i32 0, i32 %[[VAL_38]], i32 %[[VAL_28]], i32 %[[VAL_43]]
// CHECK: %[[VAL_50:.*]] = load float, float* %[[VAL_49]], align 4, !invariant.load !4
// CHECK: store float %[[VAL_45]], float* %[[VAL_5]], align 4
// CHECK: store float %[[VAL_46]], float* %[[VAL_4]], align 4
// CHECK: store float %[[VAL_48]], float* %[[VAL_3]], align 4
// CHECK: store float %[[VAL_50]], float* %[[VAL_2]], align 4
// CHECK: %[[VAL_51:.*]] = bitcast float* %[[VAL_0]] to i8*
// CHECK: %[[VAL_52:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_6]], i64 0, i64 0
// CHECK: store i8* %[[VAL_51]], i8** %[[VAL_52]], align 8
// CHECK: %[[VAL_53:.*]] = bitcast float* %[[VAL_1]] to i8*
// CHECK: %[[VAL_54:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_6]], i64 0, i64 1
// CHECK: store i8* %[[VAL_53]], i8** %[[VAL_54]], align 8
// CHECK: call void @region_1_5(float* %[[VAL_5]], float* %[[VAL_4]], float* %[[VAL_3]], float* %[[VAL_2]], [2 x i8*]* %[[VAL_6]])
// CHECK: %[[VAL_55:.*]] = load float, float* %[[VAL_0]], align 4
// CHECK: %[[VAL_56:.*]] = load float, float* %[[VAL_1]], align 4
// CHECK: store float %[[VAL_55]], float* %[[VAL_10]], align 4
// CHECK: store float %[[VAL_56]], float* %[[VAL_9]], align 4
// CHECK: %[[VAL_57:.*]] = add nuw nsw i32 %[[VAL_43]], 1
// CHECK: store i32 %[[VAL_57]], i32* %[[VAL_7]], align 4
// CHECK: br label %[[VAL_41]]
// CHECK: reduce.13.inner.loop_exit.reduction_dim.2: ; preds = %[[VAL_41]]
// CHECK: %[[VAL_58:.*]] = add nuw nsw i32 %[[VAL_38]], 1
// CHECK: store i32 %[[VAL_58]], i32* %[[VAL_8]], align 4
// CHECK: br label %[[VAL_36]]
// CHECK: reduce.13.inner.loop_exit.reduction_dim.0: ; preds = %[[VAL_36]]
// CHECK: %[[VAL_59:.*]] = load float, float* %[[VAL_10]], align 4
// CHECK: %[[VAL_60:.*]] = insertvalue { float, float } undef, float %[[VAL_59]], 0
// CHECK: %[[VAL_61:.*]] = load float, float* %[[VAL_9]], align 4
// CHECK: %[[VAL_62:.*]] = insertvalue { float, float } %[[VAL_60]], float %[[VAL_61]], 1
// CHECK: %[[VAL_63:.*]] = extractvalue { float, float } %[[VAL_62]], 0
// CHECK: %[[VAL_64:.*]] = bitcast [200 x float]* %[[VAL_19]] to float*
// CHECK: %[[VAL_65:.*]] = getelementptr inbounds float, float* %[[VAL_64]], i32 %[[VAL_26]]
// CHECK: store float %[[VAL_63]], float* %[[VAL_65]], align 4
// CHECK: %[[VAL_66:.*]] = extractvalue { float, float } %[[VAL_62]], 1
// CHECK: %[[VAL_67:.*]] = bitcast [200 x float]* %[[VAL_22]] to float*
// CHECK: %[[VAL_68:.*]] = getelementptr inbounds float, float* %[[VAL_67]], i32 %[[VAL_26]]
// CHECK: store float %[[VAL_66]], float* %[[VAL_68]], align 4
// CHECK: br label %[[VAL_31]]
// CHECK: entry:
// CHECK: %[[VAL_69:.*]] = alloca float, align 4
// CHECK: %[[VAL_70:.*]] = alloca float, align 4
// CHECK: %[[VAL_71:.*]] = alloca [2 x i8*], align 8
// CHECK: %[[VAL_72:.*]] = alloca [2 x i8*], align 8
// CHECK: %[[VAL_73:.*]] = alloca [2 x i8*], align 8
// CHECK: %[[VAL_74:.*]] = bitcast [2 x i8*]* %[[VAL_72]] to float*
// CHECK: %[[VAL_75:.*]] = bitcast [2 x i8*]* %[[VAL_71]] to float*
// CHECK: %[[VAL_76:.*]] = load float, float* %[[VAL_77:.*]], align 4
// CHECK: %[[VAL_78:.*]] = load float, float* %[[VAL_79:.*]], align 4
// CHECK: %[[VAL_80:.*]] = fadd float %[[VAL_76]], %[[VAL_78]]
// CHECK: store float %[[VAL_80]], float* %[[VAL_70]], align 4
// CHECK: %[[VAL_81:.*]] = load float, float* %[[VAL_82:.*]], align 4
// CHECK: %[[VAL_83:.*]] = load float, float* %[[VAL_84:.*]], align 4
// CHECK: %[[VAL_85:.*]] = fadd float %[[VAL_81]], %[[VAL_83]]
// CHECK: store float %[[VAL_85]], float* %[[VAL_69]], align 4
// CHECK: %[[VAL_86:.*]] = bitcast float* %[[VAL_70]] to i8*
// CHECK: %[[VAL_87:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_73]], i64 0, i64 0
// CHECK: store i8* %[[VAL_86]], i8** %[[VAL_87]], align 8
// CHECK: %[[VAL_88:.*]] = bitcast float* %[[VAL_69]] to i8*
// CHECK: %[[VAL_89:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_73]], i64 0, i64 1
// CHECK: store i8* %[[VAL_88]], i8** %[[VAL_89]], align 8
// CHECK: %[[VAL_90:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_91:.*]], i64 0, i64 0
// CHECK: %[[VAL_92:.*]] = load i8*, i8** %[[VAL_90]], align 8, !dereferenceable !5, !align !6
// CHECK: %[[VAL_93:.*]] = bitcast i8* %[[VAL_92]] to float*
// CHECK: %[[VAL_94:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_73]], i64 0, i64 0
// CHECK: %[[VAL_95:.*]] = load i8*, i8** %[[VAL_94]], align 8, !dereferenceable !5, !align !6
// CHECK: %[[VAL_96:.*]] = bitcast i8* %[[VAL_95]] to float*
// CHECK: %[[VAL_97:.*]] = load float, float* %[[VAL_96]], align 4
// CHECK: store float %[[VAL_97]], float* %[[VAL_93]], align 4
// CHECK: %[[VAL_98:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_91]], i64 0, i64 1
// CHECK: %[[VAL_99:.*]] = load i8*, i8** %[[VAL_98]], align 8, !dereferenceable !5, !align !6
// CHECK: %[[VAL_100:.*]] = bitcast i8* %[[VAL_99]] to float*
// CHECK: %[[VAL_101:.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %[[VAL_73]], i64 0, i64 1
// CHECK: %[[VAL_102:.*]] = load i8*, i8** %[[VAL_101]], align 8, !dereferenceable !5, !align !6
// CHECK: %[[VAL_103:.*]] = bitcast i8* %[[VAL_102]] to float*
// CHECK: %[[VAL_104:.*]] = load float, float* %[[VAL_103]], align 4
// CHECK: store float %[[VAL_104]], float* %[[VAL_100]], align 4
// CHECK: ret void
HloModule Test
Add {
scalar_lhs.0 = f32[] parameter(0)
scalar_rhs.0 = f32[] parameter(1)
scalar_lhs.1 = f32[] parameter(2)
scalar_rhs.1 = f32[] parameter(3)
add.0 = f32[] add(scalar_lhs.0, scalar_rhs.0)
add.1 = f32[] add(scalar_lhs.1, scalar_rhs.1)
ROOT t = (f32[], f32[]) tuple(add.0, add.1)
}
ENTRY main {
a = f32[100, 200, 300]{2,1,0} parameter(0)
b = f32[100, 200, 300]{2,1,0} parameter(1)
c = f32[] constant(0)
ROOT d = (f32[200]{0}, f32[200]{0}) reduce(a, b, c, c), dimensions={0,2}, to_apply=Add
}