diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -632,13 +632,6 @@ Instruction *EntryVal, VPValue *Def, VPTransformState &State); - /// Returns true if an instruction \p I should be scalarized instead of - /// vectorized for the chosen vectorization factor. - bool shouldScalarizeInstruction(Instruction *I) const; - - /// Returns true if we should generate a scalar version of \p IV. - bool needsScalarInduction(Instruction *IV) const; - /// Returns (and creates if needed) the original loop trip count. Value *getOrCreateTripCount(Loop *NewLoop); @@ -2479,21 +2472,6 @@ VecInd->addIncoming(LastInduction, LoopVectorLatch); } -bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const { - return Cost->isScalarAfterVectorization(I, VF) || - Cost->isProfitableToScalarize(I, VF); -} - -bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { - if (shouldScalarizeInstruction(IV)) - return true; - auto isScalarInst = [&](User *U) -> bool { - auto *I = cast(U); - return (OrigLoop->contains(I) && shouldScalarizeInstruction(I)); - }; - return llvm::any_of(IV->users(), isScalarInst); -} - void InnerLoopVectorizer::widenIntOrFpInduction( PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, Value *CanonicalIV) { @@ -2605,11 +2583,8 @@ return; } - // Determine if we want a scalar version of the induction variable. This is - // true if the induction variable itself is not widened, or if it has at - // least one user in the loop that is not widened. - auto NeedsScalarIV = needsScalarInduction(EntryVal); - if (!NeedsScalarIV) { + // If only a vector induction is needed, create it and return. + if (!Def->needsScalarIV()) { createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); return; } @@ -2617,7 +2592,7 @@ // Try to create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { + if (Def->needsVectorIV()) { createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); Value *ScalarIV = CreateScalarIV(Step); // Create scalar steps that can be used by instructions we will later @@ -8546,16 +8521,54 @@ Mask, Consecutive, Reverse); } -VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, - ArrayRef Operands) const { +static VPWidenIntOrFpInductionRecipe * +createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, + VPValue *Start, const InductionDescriptor &IndDesc, + LoopVectorizationCostModel &CM, Loop &OrigLoop, + VFRange &Range) { + // Returns true if an instruction \p I should be scalarized instead of + // vectorized for the chosen vectorization factor. + auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) { + return CM.isScalarAfterVectorization(I, VF) || + CM.isProfitableToScalarize(I, VF); + }; + + bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + // Returns true if we should generate a scalar version of \p IV. + if (ShouldScalarizeInstruction(PhiOrTrunc, VF)) + return true; + auto isScalarInst = [&](User *U) -> bool { + auto *I = cast(U); + return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF); + }; + return any_of(PhiOrTrunc->users(), isScalarInst); + }, + Range); + bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return ShouldScalarizeInstruction(PhiOrTrunc, VF); + }, + Range); + assert(IndDesc.getStartValue() == + Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader())); + if (auto *TruncI = dyn_cast(PhiOrTrunc)) { + return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI, + NeedsScalarIV, !NeedsScalarIVOnly); + } + assert(isa(PhiOrTrunc) && "must be a phi node here"); + return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV, + !NeedsScalarIVOnly); +} + +VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( + PHINode *Phi, ArrayRef Operands, VFRange &Range) const { + // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. - if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) { - assert(II->getStartValue() == - Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); - return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II); - } + if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) + return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop, + Range); return nullptr; } @@ -8583,7 +8596,7 @@ auto *Phi = cast(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I); + return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range); } return nullptr; } @@ -8865,7 +8878,7 @@ if (auto Phi = dyn_cast(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) return tryToBlend(Phi, Operands, Plan); - if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands))) + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) return toVPRecipeResult(Recipe); VPHeaderPHIRecipe *PhiRecipe = nullptr; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -75,7 +75,8 @@ /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands) const; + tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands, + VFRange &Range) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1027,18 +1027,24 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { PHINode *IV; const InductionDescriptor &IndDesc; + bool NeedsScalarIV; + bool NeedsVectorIV; public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, - const InductionDescriptor &IndDesc) + const InductionDescriptor &IndDesc, + bool NeedsScalarIV, bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc, - TruncInst *Trunc) + TruncInst *Trunc, bool NeedsScalarIV, + bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} ~VPWidenIntOrFpInductionRecipe() override = default; @@ -1082,6 +1088,12 @@ const TruncInst *TruncI = getTruncInst(); return TruncI ? TruncI->getType() : IV->getType(); } + + /// Returns true if a scalar phi needs to be created for the induction. + bool needsScalarIV() const { return NeedsScalarIV; } + + /// Returns true if a vector phi needs to be created for the induction. + bool needsVectorIV() const { return NeedsVectorIV; } }; /// A pure virtual base class for all recipes modeling header phis, including diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -47,7 +47,8 @@ auto *Phi = cast(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); - NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II); + NewRecipe = + new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true); } else { Plan->addVPValue(Phi, VPPhi); continue;