diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -591,7 +591,7 @@ /// Fix a first-order recurrence. This is the second phase of vectorizing /// this phi node. - void fixFirstOrderRecurrence(PHINode *Phi, VPTransformState &State); + void fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, VPTransformState &State); /// Fix a reduction cross-iteration phi. This is the second phase of /// vectorizing this phi node. @@ -4135,11 +4135,11 @@ if (PhiR->getRecurrenceDescriptor()) { fixReduction(PhiR, State); } else if (Legal->isFirstOrderRecurrence(OrigPhi)) - fixFirstOrderRecurrence(OrigPhi, State); + fixFirstOrderRecurrence(PhiR, State); } } -void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi, +void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, VPTransformState &State) { // This is the second phase of vectorizing first-order recurrences. An // overview of the transformation is described below. Suppose we have the @@ -4190,13 +4190,7 @@ // After execution completes the vector loop, we extract the next value of // the recurrence (x) to use as the initial value in the scalar loop. - // Get the original loop preheader and single loop latch. - auto *Preheader = OrigLoop->getLoopPreheader(); - auto *Latch = OrigLoop->getLoopLatch(); - - // Get the initial and previous values of the scalar recurrence. - auto *ScalarInit = Phi->getIncomingValueForBlock(Preheader); - auto *Previous = Phi->getIncomingValueForBlock(Latch); + auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue(); auto *IdxTy = Builder.getInt32Ty(); auto *One = ConstantInt::get(IdxTy, 1); @@ -4212,11 +4206,10 @@ VectorInit, LastIdx, "vector.recur.init"); } - VPValue *PhiDef = State.Plan->getVPValue(Phi); - VPValue *PreviousDef = State.Plan->getVPValue(Previous); + VPValue *PreviousDef = PhiR->getBackedgeValue(); // We constructed a temporary phi node in the first phase of vectorization. // This phi node will eventually be deleted. - Builder.SetInsertPoint(cast(State.get(PhiDef, 0))); + Builder.SetInsertPoint(cast(State.get(PhiR, 0))); // Create a phi node for the new recurrence. The current value will either be // the initial value inserted into a vector or loop-varying vector value. @@ -4256,13 +4249,13 @@ // Shuffle the current and previous vector and update the vector parts. for (unsigned Part = 0; Part < UF; ++Part) { Value *PreviousPart = State.get(PreviousDef, Part); - Value *PhiPart = State.get(PhiDef, Part); + Value *PhiPart = State.get(PhiR, Part); auto *Shuffle = VF.isVector() ? Builder.CreateVectorSplice(Incoming, PreviousPart, -1) : Incoming; PhiPart->replaceAllUsesWith(Shuffle); cast(PhiPart)->eraseFromParent(); - State.reset(PhiDef, Shuffle, Part); + State.reset(PhiR, Shuffle, Part); Incoming = PreviousPart; } @@ -4297,6 +4290,7 @@ // the second last element when VF > 1. ExtractForPhiUsedOutsideLoop = State.get(PreviousDef, UF - 2); + PHINode *Phi = cast(PhiR->getUnderlyingValue()); // Fix the initial value of the original recurrence in the scalar loop. Builder.SetInsertPoint(&*LoopScalarPreHeader->begin()); auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init"); @@ -4755,64 +4749,72 @@ assert(PN->getParent() == OrigLoop->getHeader() && "Non-header phis should have been handled elsewhere"); - VPValue *StartVPV = PhiR->getStartValue(); - Value *StartV = StartVPV ? StartVPV->getLiveInIRValue() : nullptr; + bool IsOrdered = State.VF.isVector() && + Cost->isInLoopReduction(cast(PN)) && + Cost->useOrderedReductions(*RdxDesc); + // In order to support recurrences we need to be able to vectorize Phi nodes. // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. if (RdxDesc || Legal->isFirstOrderRecurrence(P)) { - Value *Iden = nullptr; bool ScalarPHI = (State.VF.isScalar()) || Cost->isInLoopReduction(cast(PN)); Type *VecTy = ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); - if (RdxDesc) { - assert(Legal->isReductionVariable(P) && StartV && - "RdxDesc should only be set for reduction variables; in that case " - "a StartV is also required"); - RecurKind RK = RdxDesc->getRecurrenceKind(); - if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) { - // MinMax reduction have the start value as their identify. - if (ScalarPHI) { - Iden = StartV; - } else { - IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - StartV = Iden = - Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); - } - } else { - Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity( - RK, VecTy->getScalarType(), RdxDesc->getFastMathFlags()); - Iden = IdenC; - - if (!ScalarPHI) { - Iden = ConstantVector::getSplat(State.VF, IdenC); - IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - Constant *Zero = Builder.getInt32(0); - StartV = Builder.CreateInsertElement(Iden, StartV, Zero); - } - } - } - - bool IsOrdered = State.VF.isVector() && - Cost->isInLoopReduction(cast(PN)) && - Cost->useOrderedReductions(*RdxDesc); unsigned LastPartForNewPhi = IsOrdered ? 1 : State.UF; for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); State.set(PhiR, EntryPart, Part); - if (StartV) { - // Make sure to add the reduction start value only to the - // first unroll part. - Value *StartVal = (Part == 0) ? StartV : Iden; - cast(EntryPart)->addIncoming(StartVal, LoopVectorPreHeader); + } + if (Legal->isFirstOrderRecurrence(P)) + return; + VPValue *StartVPV = PhiR->getStartValue(); + Value *StartV = StartVPV->getLiveInIRValue(); + + Value *Iden = nullptr; + + assert(Legal->isReductionVariable(P) && StartV && + "RdxDesc should only be set for reduction variables; in that case " + "a StartV is also required"); + RecurKind RK = RdxDesc->getRecurrenceKind(); + if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) { + // MinMax reduction have the start value as their identify. + if (ScalarPHI) { + Iden = StartV; + } else { + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); + StartV = Iden = + Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); } + } else { + Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity( + RK, VecTy->getScalarType(), RdxDesc->getFastMathFlags()); + Iden = IdenC; + + if (!ScalarPHI) { + Iden = ConstantVector::getSplat(State.VF, IdenC); + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); + Constant *Zero = Builder.getInt32(0); + StartV = Builder.CreateInsertElement(Iden, StartV, Zero); + } + } + + for (unsigned Part = 0; Part < State.UF; ++Part) { + if (Part > 0 && IsOrdered) + return; + + Value *EntryPart = State.get(PhiR, Part); + // Make sure to add the reduction start value only to the + // first unroll part. + Value *StartVal = (Part == 0) ? StartV : Iden; + cast(EntryPart)->addIncoming(StartVal, LoopVectorPreHeader); } + return; } @@ -8958,22 +8960,32 @@ if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands))) return toVPRecipeResult(Recipe); - if (Legal->isReductionVariable(Phi)) { - RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi]; - assert(RdxDesc.getRecurrenceStartValue() == - Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); + VPWidenPHIRecipe *PhiRecipe = nullptr; + if (Legal->isReductionVariable(Phi) || Legal->isFirstOrderRecurrence(Phi)) { VPValue *StartV = Operands[0]; + if (Legal->isReductionVariable(Phi)) { + RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi]; + assert(RdxDesc.getRecurrenceStartValue() == + Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); + PhiRecipe = new VPWidenPHIRecipe(Phi, RdxDesc, *StartV); + } else { + PhiRecipe = new VPWidenPHIRecipe(Phi, *StartV); + } - auto *PhiRecipe = new VPWidenPHIRecipe(Phi, RdxDesc, *StartV); - PhisToFix.push_back(PhiRecipe); // Record the incoming value from the backedge, so we can add the incoming // value from the backedge after all recipes have been created. recordRecipeOf(cast( Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); - return toVPRecipeResult(PhiRecipe); + PhisToFix.push_back(PhiRecipe); + } else { + // TODO: record start and backedge value for remaining pointer induction + // phis. + assert(Phi->getType()->isPointerTy() && + "only pointer phis should be handled here"); + PhiRecipe = new VPWidenPHIRecipe(Phi); } - return toVPRecipeResult(new VPWidenPHIRecipe(Phi)); + return toVPRecipeResult(PhiRecipe); } if (isa(Instr) && diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -56,8 +56,9 @@ // marked by having a nullptr entry in this map. DenseMap Ingredient2Recipe; - /// Cross-iteration reduction phis for which we need to add the incoming value - /// from the backedge after all recipes have been created. + /// Cross-iteration reduction & first-order recurrence phis for which we need + /// to add the incoming value from the backedge after all recipes have been + /// created. SmallVector PhisToFix; /// Check if \p I can be widened at the start of \p Range and possibly @@ -170,8 +171,8 @@ Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlanPtr &Plan); - /// Add the incoming values from the backedge to reduction cross-iteration - /// phis. + /// Add the incoming values from the backedge to reduction & first-order + /// recurrence cross-iteration phis. void fixHeaderPhis(); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1046,10 +1046,10 @@ /// A recipe for handling all phi nodes except for integer and FP inductions. /// For reduction PHIs, RdxDesc must point to the corresponding recurrence -/// descriptor, the start value is the first operand of the recipe and the -/// incoming value from the backedge is the second operand. In the VPlan native -/// path, all incoming VPValues & VPBasicBlock pairs are managed in the recipe -/// directly. +/// descriptor. For reductions and first-order recurrences, the start value is +/// the first operand of the recipe and the incoming value from the backedge is +/// the second operand. In the VPlan native path, all incoming VPValues & +/// VPBasicBlock pairs are managed in the recipe directly. class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { /// Descriptor for a reduction PHI. RecurrenceDescriptor *RdxDesc = nullptr; @@ -1066,6 +1066,11 @@ addOperand(&Start); } + /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. + VPWidenPHIRecipe(PHINode *Phi, VPValue &Start) : VPWidenPHIRecipe(Phi) { + addOperand(&Start); + } + /// Create a VPWidenPHIRecipe for \p Phi VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC, {}), @@ -1089,15 +1094,15 @@ VPSlotTracker &SlotTracker) const override; #endif - /// Returns the start value of the phi, if it is a reduction. + /// Returns the start value of the phi, if it is a reduction or first-order + /// recurrence. VPValue *getStartValue() { return getNumOperands() == 0 ? nullptr : getOperand(0); } - /// Returns the incoming value from the loop backedge, if it is a reduction. + /// Returns the incoming value from the loop backedge, if it is a reduction or + /// first-order recurrence. VPValue *getBackedgeValue() { - assert(RdxDesc && "second incoming value is only guaranteed to be backedge " - "value for reductions"); return getOperand(1); } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: sink_replicate_region_1 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI %0 = phi 0, %conv +; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 @@ -83,7 +83,7 @@ ; CHECK-LABEL: sink_replicate_region_2 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next +; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 @@ -155,7 +155,7 @@ ; CHECK-LABEL: sink_replicate_region_3_reduction ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next +; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv> vp<%0> @@ -214,7 +214,7 @@ ; CHECK-LABEL: sink_replicate_region_4_requires_split_at_end_of_block ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI %0 = phi 0, %conv +; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> @@ -312,7 +312,7 @@ ; CHECK-LABEL: sink_replicate_region_after_replicate_region ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next +; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -849,7 +849,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next -; CHECK-NEXT: WIDEN-PHI %for = phi 0, %lv.a +; CHECK-NEXT: WIDEN-PHI ir<%for> = phi ir<0>, ir<%lv.a> ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> ; CHECK-NEXT: Successor(s): pred.load