| commit bf4dcbd2df009ba71d9032379fe8bc37ba0903ff |
| Author: Alexey Bataev <a.bataev@outlook.com> |
| Date: Mon Jun 27 13:20:50 2022 -0700 |
| |
| [SLP]Fix PR56251: Do not remove the reordering from the root node, being used as an operand. |
| |
| If the root order itself does not require reordering, we can just |
| remove its reorder mask safely (e.g., if the root node is a vector of |
| phis). But if this node is used as an operand in the graph, we cannot |
| delete the reordering, need to keep it. Otherwise the graph nodes are |
| not synchronized with the operands. It may cause an extra gather |
| instruction(s) or a compiler crash. |
| Also, need to be very careful when selecting the gather nodes for |
| reordering since there might several gather nodes with the same scalars |
| and we can try to reorder just the same node many times instead of |
| different nodes. |
| |
| Differential Revision: https://reviews.llvm.org/D128680 |
| |
| diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp |
| index 54b54d8a662e..a958c36969d5 100644 |
| --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp |
| +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp |
| @@ -3946,19 +3946,23 @@ bool BoUpSLP::canReorderOperands( |
| GatherOps.push_back(TE); |
| continue; |
| } |
| - ArrayRef<Value *> VL = UserTE->getOperand(I); |
| TreeEntry *Gather = nullptr; |
| if (count_if(ReorderableGathers, |
| - [VL, &Gather](TreeEntry *TE) { |
| + [&Gather, UserTE, I](TreeEntry *TE) { |
| assert(TE->State != TreeEntry::Vectorize && |
| "Only non-vectorized nodes are expected."); |
| - if (TE->isSame(VL)) { |
| + if (any_of(TE->UserTreeIndices, |
| + [UserTE, I](const EdgeInfo &EI) { |
| + return EI.UserTE == UserTE && EI.EdgeIdx == I; |
| + })) { |
| + assert(TE->isSame(UserTE->getOperand(I)) && |
| + "Operand entry does not match operands."); |
| Gather = TE; |
| return true; |
| } |
| return false; |
| }) > 1 && |
| - !all_of(VL, isConstant)) |
| + !all_of(UserTE->getOperand(I), isConstant)) |
| return false; |
| if (Gather) |
| GatherOps.push_back(Gather); |
| @@ -4053,8 +4057,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { |
| TreeEntry *OpTE = Op.second; |
| if (!VisitedOps.insert(OpTE).second) |
| continue; |
| - if (!OpTE->ReuseShuffleIndices.empty() || |
| - (IgnoreReorder && OpTE == VectorizableTree.front().get())) |
| + if (!OpTE->ReuseShuffleIndices.empty()) |
| continue; |
| const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & { |
| if (OpTE->State == TreeEntry::NeedToGather) |
| @@ -4174,6 +4177,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { |
| TE->ReorderIndices.empty()) && |
| "Non-matching sizes of user/operand entries."); |
| reorderOrder(TE->ReorderIndices, Mask); |
| + if (IgnoreReorder && TE == VectorizableTree.front().get()) |
| + IgnoreReorder = false; |
| } |
| // For gathers just need to reorder its scalars. |
| for (TreeEntry *Gather : GatherOps) { |
| @@ -7853,9 +7858,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { |
| auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size()); |
| switch (ShuffleOrOp) { |
| case Instruction::PHI: { |
| - assert( |
| - (E->ReorderIndices.empty() || E != VectorizableTree.front().get()) && |
| - "PHI reordering is free."); |
| + assert((E->ReorderIndices.empty() || |
| + E != VectorizableTree.front().get() || |
| + !E->UserTreeIndices.empty()) && |
| + "PHI reordering is free."); |
| auto *PH = cast<PHINode>(VL0); |
| Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); |
| Builder.SetCurrentDebugLocation(PH->getDebugLoc()); |
| diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll |
| new file mode 100644 |
| index 000000000000..9e136ee516f5 |
| --- /dev/null |
| +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll |
| @@ -0,0 +1,84 @@ |
| +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s |
| + |
| +define ptr @test() { |
| +; CHECK-LABEL: @test( |
| +; CHECK-NEXT: entry: |
| +; CHECK-NEXT: [[GREEN_I:%.*]] = getelementptr inbounds float, ptr null, i32 6 |
| +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GREEN_I]], align 4 |
| +; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> |
| +; CHECK-NEXT: br label [[BODY:%.*]] |
| +; CHECK: body: |
| +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP5:%.*]], [[BODY]] ], [ [[TMP1]], [[ENTRY:%.*]] ] |
| +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr null, align 2 |
| +; CHECK-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double> |
| +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 0> |
| +; CHECK-NEXT: [[TMP5]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[SHUFFLE]], <2 x double> [[TMP2]]) |
| +; CHECK-NEXT: br label [[BODY]] |
| +; |
| +entry: |
| + %green.i = getelementptr inbounds float, ptr null, i32 6 |
| + %blue.i = getelementptr inbounds float, ptr null, i32 7 |
| + %0 = load float, ptr %green.i, align 4 |
| + %conv197 = fpext float %0 to double |
| + %1 = load float, ptr %blue.i, align 8 |
| + %conv199 = fpext float %1 to double |
| + br label %body |
| + |
| +body: |
| + %phi1 = phi double [ %3, %body ], [ %conv197, %entry ] |
| + %phi2 = phi double [ %5, %body ], [ %conv199, %entry ] |
| + %green = getelementptr inbounds i16, ptr null, i32 1 |
| + %2 = load i16, ptr %green, align 2 |
| + %conv1 = uitofp i16 %2 to double |
| + %3 = call double @llvm.fmuladd.f64(double 0.000000e+00, double %conv1, double %phi1) |
| + %4 = load i16, ptr null, align 2 |
| + %conv2 = uitofp i16 %4 to double |
| + %5 = call double @llvm.fmuladd.f64(double 0.000000e+00, double %conv2, double %phi2) |
| + br label %body |
| +} |
| + |
| +declare double @llvm.fmuladd.f64(double, double, double) |
| + |
| +define void @test1(ptr %agg.result, ptr %this) { |
| +; CHECK-LABEL: @test1( |
| +; CHECK-NEXT: entry: |
| +; CHECK-NEXT: br i1 false, label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] |
| +; CHECK: lor.lhs.false: |
| +; CHECK-NEXT: br i1 false, label [[RETURN]], label [[IF_END:%.*]] |
| +; CHECK: if.end: |
| +; CHECK-NEXT: [[B_I:%.*]] = getelementptr inbounds float, ptr [[THIS:%.*]], i32 1 |
| +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[B_I]], align 4 |
| +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], zeroinitializer |
| +; CHECK-NEXT: br label [[RETURN]] |
| +; CHECK: return: |
| +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_END]] ], [ <float 1.000000e+00, float 0.000000e+00>, [[LOR_LHS_FALSE]] ], [ <float 1.000000e+00, float 0.000000e+00>, [[ENTRY:%.*]] ] |
| +; CHECK-NEXT: [[C_I_I_I:%.*]] = getelementptr inbounds float, ptr [[AGG_RESULT:%.*]], i32 2 |
| +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 0> |
| +; CHECK-NEXT: store <2 x float> [[SHUFFLE]], ptr [[C_I_I_I]], align 4 |
| +; CHECK-NEXT: ret void |
| +; |
| +entry: |
| + br i1 false, label %return, label %lor.lhs.false |
| + |
| +lor.lhs.false: |
| + br i1 false, label %return, label %if.end |
| + |
| +if.end: |
| + %c.i = getelementptr inbounds float, ptr %this, i32 2 |
| + %0 = load float, ptr %c.i, align 4 |
| + %add.i = fadd float %0, 0.000000e+00 |
| + %b.i = getelementptr inbounds float, ptr %this, i32 1 |
| + %1 = load float, ptr %b.i, align 4 |
| + %add2.i = fadd float %1, 0.000000e+00 |
| + br label %return |
| + |
| +return: |
| + %add.i.sink = phi float [ %add.i, %if.end ], [ 0.000000e+00, %lor.lhs.false ], [ 0.000000e+00, %entry ] |
| + %add2.i.sink = phi float [ %add2.i, %if.end ], [ 1.000000e+00, %lor.lhs.false ], [ 1.000000e+00, %entry ] |
| + %c.i.i.i = getelementptr inbounds float, ptr %agg.result, i32 2 |
| + store float %add.i.sink, ptr %c.i.i.i, align 4 |
| + %d.i.i.i = getelementptr inbounds float, ptr %agg.result, i32 3 |
| + store float %add2.i.sink, ptr %d.i.i.i, align 4 |
| + ret void |
| +} |