diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -279,9 +279,10 @@ /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. - VPlanPtr - buildVPlanWithVPRecipes(VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions); + VPlanPtr buildVPlanWithVPRecipes( + VFRange &Range, SmallPtrSetImpl &NeedDef, + SmallPtrSetImpl &DeadInstructions, + const DenseMap &SinkAfter); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7130,25 +7130,31 @@ SmallPtrSet DeadInstructions; collectTriviallyDeadInstructions(DeadInstructions); + DenseMap &SinkAfter = Legal->getSinkAfter(); + // Dead instructions do not need sinking. Remove them from SinkAfter. + for (auto &Entry : make_early_inc_range(SinkAfter)) { + if (DeadInstructions.find(Entry.first) != DeadInstructions.end()) + SinkAfter.erase(Entry.first); + } + for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back( - buildVPlanWithVPRecipes(SubRange, NeedDef, DeadInstructions)); + VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef, + DeadInstructions, SinkAfter)); VF = SubRange.End; } } VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions) { + SmallPtrSetImpl &DeadInstructions, + const DenseMap &SinkAfter) { // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap PredInst2Recipe; - DenseMap &SinkAfter = Legal->getSinkAfter(); - SmallPtrSet *, 1> InterleaveGroups; VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -607,3 +607,41 @@ for.end12.loopexit: ; preds = %cond.end ret void } + +; Dead instructions, like the exit condition are not part of the actual VPlan +; and do not need to be sunk. PR44634. +define void @sink_dead_inst() { +; SINK-AFTER-LABEL: define void @sink_dead_inst( +; SINK-AFTER-LABEL: vector.body: ; preds = %vector.body, %vector.ph +; SINK-AFTER-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; SINK-AFTER-NEXT: %vec.ind = phi <4 x i16> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] +; SINK-AFTER-NEXT: %vector.recur = phi <4 x i16> [ , %vector.ph ], [ %3, %vector.body ] +; SINK-AFTER-NEXT: %vector.recur2 = phi <4 x i32> [ , %vector.ph ], [ %1, %vector.body ] +; SINK-AFTER-NEXT: %0 = add <4 x i16> %vec.ind, +; SINK-AFTER-NEXT: %1 = zext <4 x i16> %0 to <4 x i32> +; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur2, <4 x i32> %1, <4 x i32> +; SINK-AFTER-NEXT: %3 = add <4 x i16> %0, +; SINK-AFTER-NEXT: %4 = shufflevector <4 x i16> %vector.recur, <4 x i16> %3, <4 x i32> +; SINK-AFTER-NEXT: %5 = sub <4 x i16> %4, +; SINK-AFTER-NEXT: %index.next = add i32 %index, 4 +; SINK-AFTER-NEXT: %vec.ind.next = add <4 x i16> %vec.ind, +; SINK-AFTER-NEXT: %6 = icmp eq i32 %index.next, 40 +; SINK-AFTER-NEXT: br i1 %6, label %middle.block, label %vector.body, !llvm.loop !43 +; +entry: + br label %for.cond + +for.cond: + %iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ] + %rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ] + %rec.2 = phi i32 [ -27, %entry ], [ %rec.2.prev, %for.cond ] + %use.rec.1 = sub i16 %rec.1, 10 + %cmp = icmp eq i32 %rec.2, 15 + %iv.next = add i16 %iv, 1 + %rec.2.prev = zext i16 %iv.next to i32 + %rec.1.prev = add i16 %iv.next, 5 + br i1 %cmp, label %for.end, label %for.cond + +for.end: + ret void +}