diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -632,13 +632,6 @@ Instruction *EntryVal, VPValue *Def, VPTransformState &State); - /// Returns true if an instruction \p I should be scalarized instead of - /// vectorized for the chosen vectorization factor. - bool shouldScalarizeInstruction(Instruction *I) const; - - /// Returns true if we should generate a scalar version of \p IV. - bool needsScalarInduction(Instruction *IV) const; - /// Returns (and creates if needed) the original loop trip count. Value *getOrCreateTripCount(Loop *NewLoop); @@ -2479,21 +2472,6 @@ VecInd->addIncoming(LastInduction, LoopVectorLatch); } -bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const { - return Cost->isScalarAfterVectorization(I, VF) || - Cost->isProfitableToScalarize(I, VF); -} - -bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { - if (shouldScalarizeInstruction(IV)) - return true; - auto isScalarInst = [&](User *U) -> bool { - auto *I = cast(U); - return (OrigLoop->contains(I) && shouldScalarizeInstruction(I)); - }; - return llvm::any_of(IV->users(), isScalarInst); -} - void InnerLoopVectorizer::widenIntOrFpInduction( PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, Value *CanonicalIV) { @@ -2608,8 +2586,7 @@ // Determine if we want a scalar version of the induction variable. This is // true if the induction variable itself is not widened, or if it has at // least one user in the loop that is not widened. - auto NeedsScalarIV = needsScalarInduction(EntryVal); - if (!NeedsScalarIV) { + if (Def->needsVectorIV() && !Def->needsScalarIV()) { createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); return; } @@ -2617,7 +2594,7 @@ // Try to create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { + if (Def->needsVectorIV() && Def->needsScalarIV()) { createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); Value *ScalarIV = CreateScalarIV(Step); // Create scalar steps that can be used by instructions we will later @@ -8546,15 +8523,48 @@ Mask, Consecutive, Reverse); } -VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, - ArrayRef Operands) const { +/// Returns true if an instruction \p I should be scalarized instead of +/// vectorized for the chosen vectorization factor. +static bool shouldScalarizeInstruction(LoopVectorizationCostModel &Cost, + Instruction *I, ElementCount VF) { + return Cost.isScalarAfterVectorization(I, VF) || + Cost.isProfitableToScalarize(I, VF); +} + +/// Returns true if we should generate a scalar version of \p IV. +static bool needsScalarInduction(LoopVectorizationCostModel &Cost, + Loop &OrigLoop, Instruction *IV, + ElementCount VF) { + if (shouldScalarizeInstruction(Cost, IV, VF)) + return true; + auto isScalarInst = [&Cost, &OrigLoop, VF](User *U) -> bool { + auto *I = cast(U); + return OrigLoop.contains(I) && shouldScalarizeInstruction(Cost, I, VF); + }; + return any_of(IV->users(), isScalarInst); +} + +VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( + PHINode *Phi, ArrayRef Operands, VFRange &Range) const { + // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) { + bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return needsScalarInduction(CM, *OrigLoop, Phi, VF); + }, + Range); + bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return shouldScalarizeInstruction(CM, Phi, VF); + }, + Range); assert(II->getStartValue() == Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); - return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II); + return new VPWidenIntOrFpInductionRecipe( + Phi, Operands[0], *II, NeedsScalarIV, + !NeedsScalarIV || !NeedsScalarIVOnly); } return nullptr; @@ -8581,9 +8591,19 @@ isOptimizableIVTruncate(I), Range)) { auto *Phi = cast(I->getOperand(0)); + bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return needsScalarInduction(CM, *OrigLoop, I, VF); + }, + Range); + bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { return shouldScalarizeInstruction(CM, I, VF); }, + Range); + const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I); + return new VPWidenIntOrFpInductionRecipe( + Phi, Start, II, I, NeedsScalarIV, !NeedsScalarIV || !NeedsScalarIVOnly); } return nullptr; } @@ -8865,7 +8885,7 @@ if (auto Phi = dyn_cast(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) return tryToBlend(Phi, Operands, Plan); - if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands))) + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) return toVPRecipeResult(Recipe); VPHeaderPHIRecipe *PhiRecipe = nullptr; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -75,7 +75,8 @@ /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands) const; + tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands, + VFRange &Range) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1027,18 +1027,24 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { PHINode *IV; const InductionDescriptor &IndDesc; + bool NeedsScalarIV; + bool NeedsVectorIV; public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, - const InductionDescriptor &IndDesc) + const InductionDescriptor &IndDesc, + bool NeedsScalarIV, bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc, - TruncInst *Trunc) + TruncInst *Trunc, bool NeedsScalarIV, + bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} ~VPWidenIntOrFpInductionRecipe() override = default; @@ -1082,6 +1088,12 @@ const TruncInst *TruncI = getTruncInst(); return TruncI ? TruncI->getType() : IV->getType(); } + + /// Returns true if a scalar phi needs to be created for the induction. + bool needsScalarIV() const { return NeedsScalarIV; } + + /// Returns true if a vector phi needs to be created for the induction. + bool needsVectorIV() const { return NeedsVectorIV; } }; /// A pure virtual base class for all recipes modeling header phis, including diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -47,7 +47,8 @@ auto *Phi = cast(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); - NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II); + NewRecipe = + new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true); } else { Plan->addVPValue(Phi, VPPhi); continue;