diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7674,13 +7674,34 @@ return LastInst; }; - auto &&FindFirstInst = [E, Front]() { + auto &&FindFirstInst = [E, Front, this]() { Instruction *FirstInst = Front; for (Value *V : E->Scalars) { auto *I = dyn_cast(V); if (!I) continue; - if (I->comesBefore(FirstInst)) + if (FirstInst->getParent() == I->getParent()) { + if (I->comesBefore(FirstInst)) + FirstInst = I; + continue; + } + assert(isVectorLikeInstWithConstOps(FirstInst) && + isVectorLikeInstWithConstOps(I) && + "Expected vector-like insts only."); + if (!DT->isReachableFromEntry(FirstInst->getParent())) { + FirstInst = I; + continue; + } + if (!DT->isReachableFromEntry(I->getParent())) + continue; + auto *NodeA = DT->getNode(FirstInst->getParent()); + auto *NodeB = DT->getNode(I->getParent()); + assert(NodeA && "Should only process reachable instructions"); + assert(NodeB && "Should only process reachable instructions"); + assert((NodeA == NodeB) == + (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) && + "Different nodes should have different DFS numbers"); + if (NodeA->getDFSNumIn() > NodeB->getDFSNumIn()) FirstInst = I; } return FirstInst; @@ -7689,9 +7710,12 @@ // Set the insert point to the beginning of the basic block if the entry // should not be scheduled. if (E->State != TreeEntry::NeedToGather && - doesNotNeedToSchedule(E->Scalars)) { + (doesNotNeedToSchedule(E->Scalars) || + all_of(E->Scalars, isVectorLikeInstWithConstOps))) { Instruction *InsertInst; - if (all_of(E->Scalars, isUsedOutsideBlock)) + if (all_of(E->Scalars, [](Value *V) { + return !isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V); + })) InsertInst = FindLastInst(); else InsertInst = FindFirstInst(); @@ -8116,7 +8140,7 @@ case Instruction::ExtractElement: { Value *V = E->getSingleOperand(0); - Builder.SetInsertPoint(VL0); + setInsertPointAfterBundle(E); ShuffleBuilder.addInversedMask(E->ReorderIndices); ShuffleBuilder.addMask(E->ReuseShuffleIndices); V = ShuffleBuilder.finalize(V); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-grtev4-linux-gnu -o - < %s | FileCheck %s + +define i32 @crash() { +; CHECK-LABEL: @crash( +; CHECK-NEXT: label: +; CHECK-NEXT: [[ADD_I_I_I_I_I_I_I_I_I_I_I_I_I_I:%.*]] = fadd <2 x double> zeroinitializer, zeroinitializer +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[ADD_I_I_I_I_I_I_I_I_I_I_I_I_I_I]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[SHUFFLE]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[SHUFFLE]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: [[ADD_I_I_I_I_I_I_I_I_I_I_I_I_I:%.*]] = fadd double [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[MUL_I_I_I_I_I_I_I_I_I_I_I_I:%.*]] = fmul double [[ADD_I_I_I_I_I_I_I_I_I_I_I_I_I]], 0.000000e+00 +; CHECK-NEXT: store double [[MUL_I_I_I_I_I_I_I_I_I_I_I_I]], double* null, align 16 +; CHECK-NEXT: ret i32 0 +; +label: + %0 = extractelement <2 x double> zeroinitializer, i64 1 + %1 = extractelement <2 x double> zeroinitializer, i64 0 + %add.i.i.i.i.i.i.i.i.i.i.i.i.i.i = fadd <2 x double> zeroinitializer, zeroinitializer + %sroa.0.8.vec.extract = extractelement <2 x double> %add.i.i.i.i.i.i.i.i.i.i.i.i.i.i, i64 1 + %2 = fmul double %sroa.0.8.vec.extract, %1 + %conj.sroa.3.8.vec.insert = insertelement <2 x double> zeroinitializer, double %sroa.0.8.vec.extract, i64 0 + %sroa.0.0.vec.extract = extractelement <2 x double> %add.i.i.i.i.i.i.i.i.i.i.i.i.i.i, i64 0 + %mul.i.i23.i.i.i.i.i.i.i.i.i.i.i.i.i = fmul double %sroa.0.0.vec.extract, %0 + %add.i.i.i.i.i.i.i.i.i.i.i.i.i = fadd double %2, %mul.i.i23.i.i.i.i.i.i.i.i.i.i.i.i.i + %mul.i.i.i.i.i.i.i.i.i.i.i.i = fmul double %add.i.i.i.i.i.i.i.i.i.i.i.i.i, 0.000000e+00 + store double %mul.i.i.i.i.i.i.i.i.i.i.i.i, double* null, align 16 + ret i32 0 +}