| From 60f57b36587c99175cb380406e2502a592a0c400 Mon Sep 17 00:00:00 2001 |
| From: David Green <david.green@arm.com> |
| Date: Tue, 29 Mar 2022 10:12:44 +0100 |
| Subject: [PATCH] [AArch64] Ensure fixed point fptoi_sat has correct saturation |
| width |
| |
| D113200 introduced an error where it was converting FP_TO_SI_SAT with |
| multiply to a fixed point floating point convert. The saturation |
| bitwidth needs to be equal to the floating point width, or else the |
| routine would truncate the result as opposed to saturating it. |
| |
| Fixes #54601 |
| --- |
| .../Target/AArch64/AArch64ISelLowering.cpp | 2 +- |
| llvm/test/CodeGen/AArch64/fcvt_combine.ll | 31 ++++++++++++++++--- |
| 2 files changed, 28 insertions(+), 5 deletions(-) |
| |
| diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp |
| index 30b9b6096620..38079a191f72 100644 |
| --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp |
| +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp |
| @@ -13973,7 +13973,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, |
| if (N->getOpcode() == ISD::FP_TO_SINT_SAT || |
| N->getOpcode() == ISD::FP_TO_UINT_SAT) { |
| EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); |
| - if (SatVT.getScalarSizeInBits() != IntBits) |
| + if (SatVT.getScalarSizeInBits() != IntBits || IntBits != FloatBits) |
| return SDValue(); |
| } |
| |
| diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll |
| index 24713c444024..67af07e05ab0 100644 |
| --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll |
| +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll |
| @@ -285,6 +285,7 @@ declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>) |
| declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double>) |
| declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float>) |
| declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float>) |
| +declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float>) |
| declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>) |
| declare <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float>) |
| declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half>) |
| @@ -326,8 +327,14 @@ define <2 x i64> @test3_sat(<2 x double> %d) { |
| define <2 x i32> @test4_sat(<2 x double> %d) { |
| ; CHECK-LABEL: test4_sat: |
| ; CHECK: // %bb.0: |
| -; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #4 |
| -; CHECK-NEXT: xtn v0.2s, v0.2d |
| +; CHECK-NEXT: fmov v1.2d, #16.00000000 |
| +; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d |
| +; CHECK-NEXT: mov d1, v0.d[1] |
| +; CHECK-NEXT: fcvtzs w8, d0 |
| +; CHECK-NEXT: fmov s0, w8 |
| +; CHECK-NEXT: fcvtzs w8, d1 |
| +; CHECK-NEXT: mov v0.s[1], w8 |
| +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ret |
| %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00> |
| %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i) |
| @@ -338,13 +345,29 @@ define <2 x i32> @test4_sat(<2 x double> %d) { |
| define <2 x i16> @test5_sat(<2 x float> %f) { |
| ; CHECK-LABEL: test5_sat: |
| ; CHECK: // %bb.0: |
| +; CHECK-NEXT: movi v1.2s, #127, msl #8 |
| ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4 |
| +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s |
| +; CHECK-NEXT: mvni v1.2s, #127, msl #8 |
| +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s |
| ; CHECK-NEXT: ret |
| %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> |
| %vcvt.i = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> %mul.i) |
| ret <2 x i16> %vcvt.i |
| } |
| |
| +; Truncate float to i16 |
| +define <4 x i16> @test5l_sat(<4 x float> %f) { |
| +; CHECK-LABEL: test5l_sat: |
| +; CHECK: // %bb.0: |
| +; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #4 |
| +; CHECK-NEXT: sqxtn v0.4h, v0.4s |
| +; CHECK-NEXT: ret |
| + %mul.i = fmul <4 x float> %f, <float 16.000000e+00, float 16.000000e+00, float 16.000000e+00, float 16.000000e+00> |
| + %vcvt.i = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> %mul.i) |
| + ret <4 x i16> %vcvt.i |
| +} |
| + |
| ; Don't convert float to i64 |
| define <2 x i64> @test6_sat(<2 x float> %f) { |
| ; CHECK-LABEL: test6_sat: |
| @@ -389,8 +412,8 @@ define <2 x i32> @test8_sat(<2 x float> %f) { |
| define <2 x i32> @test9_sat(<2 x float> %f) { |
| ; CHECK-LABEL: test9_sat: |
| ; CHECK: // %bb.0: |
| -; CHECK-NEXT: adrp x8, .LCPI26_0 |
| -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0] |
| +; CHECK-NEXT: adrp x8, .LCPI27_0 |
| +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0] |
| ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s |
| ; CHECK-NEXT: fcvtzu v0.2s, v0.2s |
| ; CHECK-NEXT: ret |
| -- |
| 2.35.1.1021.g381101b075-goog |
| |