diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4053,8 +4053,7 @@ TreeEntry *OpTE = Op.second; if (!VisitedOps.insert(OpTE).second) continue; - if (!OpTE->ReuseShuffleIndices.empty() || - (IgnoreReorder && OpTE == VectorizableTree.front().get())) + if (!OpTE->ReuseShuffleIndices.empty()) continue; const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & { if (OpTE->State == TreeEntry::NeedToGather) @@ -4174,6 +4173,8 @@ TE->ReorderIndices.empty()) && "Non-matching sizes of user/operand entries."); reorderOrder(TE->ReorderIndices, Mask); + if (IgnoreReorder && TE == VectorizableTree.front().get()) + IgnoreReorder = false; } // For gathers just need to reorder its scalars. for (TreeEntry *Gather : GatherOps) { @@ -7853,9 +7854,10 @@ auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size()); switch (ShuffleOrOp) { case Instruction::PHI: { - assert( - (E->ReorderIndices.empty() || E != VectorizableTree.front().get()) && - "PHI reordering is free."); + assert((E->ReorderIndices.empty() || + E != VectorizableTree.front().get() || + !E->UserTreeIndices.empty()) && + "PHI reordering is free."); auto *PH = cast(VL0); Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reoder-phi-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/reoder-phi-operand.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reoder-phi-operand.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define ptr @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GREEN_I:%.*]] = getelementptr inbounds float, ptr null, i32 6 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GREEN_I]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP5:%.*]], [[BODY]] ], [ [[TMP1]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr null, align 2 +; CHECK-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[SHUFFLE]], <2 x double> [[TMP2]]) +; CHECK-NEXT: br label [[BODY]] +; +entry: + %green.i = getelementptr inbounds float, ptr null, i32 6 + %blue.i = getelementptr inbounds float, ptr null, i32 7 + %0 = load float, ptr %green.i, align 4 + %conv197 = fpext float %0 to double + %1 = load float, ptr %blue.i, align 8 + %conv199 = fpext float %1 to double + br label %body + +body: + %phi1 = phi double [ %3, %body ], [ %conv197, %entry ] + %phi2 = phi double [ %5, %body ], [ %conv199, %entry ] + %green = getelementptr inbounds i16, ptr null, i32 1 + %2 = load i16, ptr %green, align 2 + %conv1 = uitofp i16 %2 to double + %3 = call double @llvm.fmuladd.f64(double 0.000000e+00, double %conv1, double %phi1) + %4 = load i16, ptr null, align 2 + %conv2 = uitofp i16 %4 to double + %5 = call double @llvm.fmuladd.f64(double 0.000000e+00, double %conv2, double %phi2) + br label %body +} + +declare double @llvm.fmuladd.f64(double, double, double)