diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -508,9 +508,11 @@ /// Widen an integer or floating-point induction variable \p IV. If \p Trunc /// is provided, the integer induction variable will first be truncated to /// the corresponding type. - void widenIntOrFpInduction(PHINode *IV, const InductionDescriptor &ID, - Value *Start, TruncInst *Trunc, VPValue *Def, - VPTransformState &State); + /// Returns the last increment instruction of the vector induction, if one has + /// been created or nullptr otherwise. + Instruction *widenIntOrFpInduction(PHINode *IV, const InductionDescriptor &ID, + Value *Start, TruncInst *Trunc, + VPValue *Def, VPTransformState &State); /// Construct the vector value of a scalarized value \p V one lane at a time. void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, @@ -628,10 +630,11 @@ /// node, and \p Step is the loop-invariant step. If \p EntryVal is a /// truncate instruction, instead of widening the original IV, we widen a /// version of the IV truncated to \p EntryVal's type. - void createVectorIntOrFpInductionPHI(const InductionDescriptor &II, - Value *Step, Value *Start, - Instruction *EntryVal, VPValue *Def, - VPTransformState &State); + Instruction *createVectorIntOrFpInductionPHI(const InductionDescriptor &II, + Value *Step, Value *Start, + Instruction *EntryVal, + VPValue *Def, + VPTransformState &State); /// Returns true if an instruction \p I should be scalarized instead of /// vectorized for the chosen vectorization factor. @@ -2334,7 +2337,7 @@ return Shuf; } -void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( +Instruction *InnerLoopVectorizer::createVectorIntOrFpInductionPHI( const InductionDescriptor &II, Value *Step, Value *Start, Instruction *EntryVal, VPValue *Def, VPTransformState &State) { IRBuilder<> &Builder = State.Builder; @@ -2394,28 +2397,21 @@ PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", &*LoopVectorBody->getFirstInsertionPt()); VecInd->setDebugLoc(EntryVal->getDebugLoc()); - Instruction *LastInduction = VecInd; + Instruction *LastIncrementOfVectorIV = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { - State.set(Def, LastInduction, Part); + State.set(Def, LastIncrementOfVectorIV, Part); if (isa(EntryVal)) - addMetadata(LastInduction, EntryVal); + addMetadata(LastIncrementOfVectorIV, EntryVal); - LastInduction = cast( - Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); - LastInduction->setDebugLoc(EntryVal->getDebugLoc()); + LastIncrementOfVectorIV = cast(Builder.CreateBinOp( + AddOp, LastIncrementOfVectorIV, SplatVF, "step.add")); + LastIncrementOfVectorIV->setDebugLoc(EntryVal->getDebugLoc()); } - // Move the last step to the end of the latch block. This ensures consistent - // placement of all induction updates. - auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); - auto *Br = cast(LoopVectorLatch->getTerminator()); - auto *ICmp = cast(Br->getCondition()); - LastInduction->moveBefore(ICmp); - LastInduction->setName("vec.ind.next"); - + LastIncrementOfVectorIV->setName("vec.ind.next"); VecInd->addIncoming(SteppedStart, LoopVectorPreHeader); - VecInd->addIncoming(LastInduction, LoopVectorLatch); + return LastIncrementOfVectorIV; } bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const { @@ -2433,11 +2429,9 @@ return llvm::any_of(IV->users(), isScalarInst); } -void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, - const InductionDescriptor &ID, - Value *Start, TruncInst *Trunc, - VPValue *Def, - VPTransformState &State) { +Instruction *InnerLoopVectorizer::widenIntOrFpInduction( + PHINode *IV, const InductionDescriptor &ID, Value *Start, TruncInst *Trunc, + VPValue *Def, VPTransformState &State) { IRBuilder<> &Builder = State.Builder; assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); @@ -2517,30 +2511,30 @@ if (State.VF.isZero() || State.VF.isScalar()) { Value *ScalarIV = CreateScalarIV(Step); CreateSplatIV(ScalarIV, Step); - return; + return nullptr; } // Determine if we want a scalar version of the induction variable. This is // true if the induction variable itself is not widened, or if it has at // least one user in the loop that is not widened. auto NeedsScalarIV = needsScalarInduction(EntryVal); - if (!NeedsScalarIV) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - return; - } + if (!NeedsScalarIV) + return createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, + State); // Try to create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. if (!shouldScalarizeInstruction(EntryVal)) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); + Instruction *LastIncrementOfVectorIV = + createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); Value *ScalarIV = CreateScalarIV(Step); // Create scalar steps that can be used by instructions we will later // scalarize. Note that the addition of the scalar steps will not increase // the number of instructions in the loop in the common case prior to // InstCombine. We will be trading one vector extract for each scalar step. buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; + return LastIncrementOfVectorIV; } // All IV users are scalar instructions, so only emit a scalar IV, not a @@ -2550,6 +2544,7 @@ if (!Cost->isScalarEpilogueAllowed()) CreateSplatIV(ScalarIV, Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); + return nullptr; } Value *InnerLoopVectorizer::getStepVector(Value *Val, Value *StartIdx, @@ -9666,9 +9661,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); - State.ILV->widenIntOrFpInduction(IV, getInductionDescriptor(), - getStartValue()->getLiveInIRValue(), - getTruncInst(), getVPValue(0), State); + LastIncrementOfVectorIV = State.ILV->widenIntOrFpInduction( + IV, getInductionDescriptor(), getStartValue()->getLiveInIRValue(), + getTruncInst(), getVPValue(0), State); } void VPWidenPHIRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1008,6 +1008,12 @@ PHINode *IV; const InductionDescriptor &IndDesc; + /// Pointer to the last vector induction increment generated for the recipe. + /// Null if no vector values have been generated. Note that this is a + /// temporary measure and only introduced to facilitate the transition to + /// modeling the increment explicitly in VPlan. + Instruction *LastIncrementOfVectorIV = nullptr; + public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc) @@ -1051,6 +1057,9 @@ /// Returns the induction descriptor for the recipe. const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } + + /// Return the last vector induction increment generated for the recipe. + Instruction *getLastIncrementOfVectorIV() { return LastIncrementOfVectorIV; } }; /// A recipe for handling first order recurrences and pointer inductions. For diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -832,7 +832,33 @@ // Fix the latch value of reduction and first-order recurrences phis in the // vector loop. VPBasicBlock *Header = Entry->getEntryBasicBlock(); + if (Header->empty()) { + assert(EnableVPlanNativePath); + Header = cast(Header->getSingleSuccessor()); + } for (VPRecipeBase &R : Header->phis()) { + if (auto *IV = dyn_cast(&R)) { + if (!IV->getLastIncrementOfVectorIV()) + continue; + Instruction *LastIncrementOfVectorIV = IV->getLastIncrementOfVectorIV(); + Instruction *Cur = LastIncrementOfVectorIV; + // Starting from LastIncrementOfVectorIV, Look through the operands until + // we found the starting phi. + while (!isa(Cur)) + Cur = cast(Cur->getOperand(0)); + PHINode *VecInd = cast(Cur); + // Move the last step to the end of the latch block. This ensures + // consistent + // placement of all induction updates. + auto *LoopVectorLatch = + State->LI->getLoopFor(State->CFG.PrevBB)->getLoopLatch(); + auto *Br = cast(LoopVectorLatch->getTerminator()); + auto *ICmp = cast(Br->getCondition()); + LastIncrementOfVectorIV->moveBefore(ICmp); + + VecInd->addIncoming(LastIncrementOfVectorIV, LoopVectorLatch); + continue; + } auto *PhiR = dyn_cast(&R); if (!PhiR || !(isa(&R) || isa(&R)))