diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -508,9 +508,9 @@ /// Widen an integer or floating-point induction variable \p IV. If \p Trunc /// is provided, the integer induction variable will first be truncated to /// the corresponding type. - void widenIntOrFpInduction(PHINode *IV, const InductionDescriptor &ID, - Value *Start, TruncInst *Trunc, VPValue *Def, - VPTransformState &State); + Instruction *widenIntOrFpInduction(PHINode *IV, const InductionDescriptor &ID, + Value *Start, TruncInst *Trunc, + VPValue *Def, VPTransformState &State); /// Construct the vector value of a scalarized value \p V one lane at a time. void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, @@ -628,10 +628,11 @@ /// node, and \p Step is the loop-invariant step. If \p EntryVal is a /// truncate instruction, instead of widening the original IV, we widen a /// version of the IV truncated to \p EntryVal's type. - void createVectorIntOrFpInductionPHI(const InductionDescriptor &II, - Value *Step, Value *Start, - Instruction *EntryVal, VPValue *Def, - VPTransformState &State); + Instruction *createVectorIntOrFpInductionPHI(const InductionDescriptor &II, + Value *Step, Value *Start, + Instruction *EntryVal, + VPValue *Def, + VPTransformState &State); /// Returns true if an instruction \p I should be scalarized instead of /// vectorized for the chosen vectorization factor. @@ -1161,8 +1162,10 @@ // instruction. Widen memory instructions involved in address computation // will lead to gather/scatter instructions, which don't need to be // handled. + // There is also nothing to drop from VPWidenIntOrFpInductionRecipe. if (isa(CurRec) || - isa(CurRec)) + isa(CurRec) || + isa(CurRec)) continue; // This recipe contributes to the address computation of a widen @@ -2332,7 +2335,7 @@ return Shuf; } -void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( +Instruction *InnerLoopVectorizer::createVectorIntOrFpInductionPHI( const InductionDescriptor &II, Value *Step, Value *Start, Instruction *EntryVal, VPValue *Def, VPTransformState &State) { assert((isa(EntryVal) || isa(EntryVal)) && @@ -2402,17 +2405,8 @@ Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); LastInduction->setDebugLoc(EntryVal->getDebugLoc()); } - - // Move the last step to the end of the latch block. This ensures consistent - // placement of all induction updates. - auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); - auto *Br = cast(LoopVectorLatch->getTerminator()); - auto *ICmp = cast(Br->getCondition()); - LastInduction->moveBefore(ICmp); - LastInduction->setName("vec.ind.next"); - VecInd->addIncoming(SteppedStart, LoopVectorPreHeader); - VecInd->addIncoming(LastInduction, LoopVectorLatch); + return LastInduction; } bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const { @@ -2430,11 +2424,9 @@ return llvm::any_of(IV->users(), isScalarInst); } -void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, - const InductionDescriptor &ID, - Value *Start, TruncInst *Trunc, - VPValue *Def, - VPTransformState &State) { +Instruction *InnerLoopVectorizer::widenIntOrFpInduction( + PHINode *IV, const InductionDescriptor &ID, Value *Start, TruncInst *Trunc, + VPValue *Def, VPTransformState &State) { assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); @@ -2512,7 +2504,7 @@ if (VF.isZero() || VF.isScalar()) { Value *ScalarIV = CreateScalarIV(Step); CreateSplatIV(ScalarIV, Step); - return; + return nullptr; } // Determine if we want a scalar version of the induction variable. This is @@ -2520,22 +2512,23 @@ // least one user in the loop that is not widened. auto NeedsScalarIV = needsScalarInduction(EntryVal); if (!NeedsScalarIV) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - return; + return createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, + State); } // Try to create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. if (!shouldScalarizeInstruction(EntryVal)) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); + Instruction *LastInduction = + createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); Value *ScalarIV = CreateScalarIV(Step); // Create scalar steps that can be used by instructions we will later // scalarize. Note that the addition of the scalar steps will not increase // the number of instructions in the loop in the common case prior to // InstCombine. We will be trading one vector extract for each scalar step. buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; + return LastInduction; } // All IV users are scalar instructions, so only emit a scalar IV, not a @@ -2545,6 +2538,7 @@ if (!Cost->isScalarEpilogueAllowed()) CreateSplatIV(ScalarIV, Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); + return nullptr; } Value *InnerLoopVectorizer::getStepVector(Value *Val, Value *StartIdx, @@ -8687,7 +8681,7 @@ auto *PN = cast(R->getUnderlyingValue()); VPRecipeBase *IncR = getRecipe(cast(PN->getIncomingValueForBlock(OrigLatch))); - R->addOperand(IncR->getVPSingleValue()); + R->addOperand(IncR->getVPValue(0)); } } @@ -9636,9 +9630,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); - State.ILV->widenIntOrFpInduction(IV, getInductionDescriptor(), - getStartValue()->getLiveInIRValue(), - getTruncInst(), getVPValue(0), State); + LastInduction = State.ILV->widenIntOrFpInduction( + IV, getInductionDescriptor(), getStartValue()->getLiveInIRValue(), + getTruncInst(), getVPValue(0), State); } void VPWidenPHIRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1008,6 +1008,12 @@ PHINode *IV; const InductionDescriptor &IndDesc; + /// Pointer to the last vector induction increment generated for the recipe. + /// Null if no vector values have been generated. Note that this is a + /// temporary measure and only introduced to facilitate the transition to + /// modeling the increment explicitly in VPlan. + Instruction *LastInduction = nullptr; + public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc) @@ -1051,6 +1057,9 @@ /// Returns the induction descriptor for the recipe. const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } + + /// Return the last vector induction increment generated for the recipe. + Instruction *getLastInduction() { return LastInduction; } }; /// A recipe for handling first order recurrences and pointer inductions. For diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -832,7 +832,34 @@ // Fix the latch value of reduction and first-order recurrences phis in the // vector loop. VPBasicBlock *Header = Entry->getEntryBasicBlock(); + if (Header->empty()) { + assert(EnableVPlanNativePath); + Header = cast(Header->getSingleSuccessor()); + } for (VPRecipeBase &R : Header->phis()) { + if (auto *IV = dyn_cast(&R)) { + if (!IV->getLastInduction()) + continue; + Instruction *LastInduction = IV->getLastInduction(); + Instruction *Cur = LastInduction; + // Starting from LastInduction, Look through the operands until we found + // the starting phi. + while (!isa(Cur)) + Cur = cast(Cur->getOperand(0)); + PHINode *VecInd = cast(Cur); + // Move the last step to the end of the latch block. This ensures + // consistent + // placement of all induction updates. + auto *LoopVectorLatch = + State->LI->getLoopFor(State->CFG.PrevBB)->getLoopLatch(); + auto *Br = cast(LoopVectorLatch->getTerminator()); + auto *ICmp = cast(Br->getCondition()); + LastInduction->moveBefore(ICmp); + LastInduction->setName("vec.ind.next"); + + VecInd->addIncoming(LastInduction, LoopVectorLatch); + continue; + } auto *PhiR = dyn_cast(&R); if (!PhiR || !(isa(&R) || isa(&R)))