diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8165,11 +8165,10 @@ Mask, Consecutive, Reverse); } -static VPWidenIntOrFpInductionRecipe * -createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, - VPValue *Start, const InductionDescriptor &IndDesc, - LoopVectorizationCostModel &CM, ScalarEvolution &SE, - Loop &OrigLoop, VFRange &Range) { +static VPWidenIntOrFpInductionRecipe *createWidenInductionRecipe( + PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start, + const InductionDescriptor &IndDesc, LoopVectorizationCostModel &CM, + VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, VFRange &Range) { // Returns true if an instruction \p I should be scalarized instead of // vectorized for the chosen vectorization factor. auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) { @@ -8198,22 +8197,26 @@ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader())); assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) && "step must be loop invariant"); + + VPValue *Step = vputils::expandSCEVExpr(Plan, IndDesc.getStep(), SE); if (auto *TruncI = dyn_cast(PhiOrTrunc)) { - return new VPWidenIntOrFpInductionRecipe( - Phi, Start, IndDesc, TruncI, NeedsScalarIV, !NeedsScalarIVOnly, SE); + return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI, + NeedsScalarIV, !NeedsScalarIVOnly); } assert(isa(PhiOrTrunc) && "must be a phi node here"); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV, - !NeedsScalarIVOnly, SE); + return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, + NeedsScalarIV, !NeedsScalarIVOnly); } -VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI( - PHINode *Phi, ArrayRef Operands, VFRange &Range) const { +VPRecipeBase * +VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, + ArrayRef Operands, + VPlan &Plan, VFRange &Range) const { // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) - return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, + return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, Plan, *PSE.getSE(), *OrigLoop, Range); // Check if this is pointer induction. If so, build the recipe for it. @@ -8246,7 +8249,7 @@ auto *Phi = cast(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return createWidenInductionRecipe(Phi, I, Start, II, CM, *PSE.getSE(), + return createWidenInductionRecipe(Phi, I, Start, II, CM, Plan, *PSE.getSE(), *OrigLoop, Range); } return nullptr; @@ -8546,7 +8549,7 @@ if (auto Phi = dyn_cast(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) return tryToBlend(Phi, Operands, Plan); - if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) return toVPRecipeResult(Recipe); VPHeaderPHIRecipe *PhiRecipe = nullptr; @@ -9465,33 +9468,20 @@ // variable. Instruction *EntryVal = Trunc ? cast(Trunc) : IV; - auto &DL = EntryVal->getModule()->getDataLayout(); - - BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - // Generate code for the induction step. Note that induction steps are - // required to be loop-invariant - auto CreateStepValue = [&](const SCEV *Step) -> Value * { - if (SE.isSCEVable(IV->getType())) { - SCEVExpander Exp(SE, DL, "induction"); - return Exp.expandCodeFor(Step, Step->getType(), - VectorPH->getTerminator()); - } - return cast(Step)->getValue(); - }; - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); - // Now do the actual transformations, and start with creating the step value. - Value *Step = CreateStepValue(ID.getStep()); + // Now do the actual transformations, and start with fetching the step value. + Value *Step = State.get(getOperand(1), VPIteration(0, 0)); assert((isa(EntryVal) || isa(EntryVal)) && "Expected either an induction phi-node or a truncate of it!"); // Construct the initial value of the vector IV in the vector loop preheader auto CurrIP = Builder.saveIP(); + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); Builder.SetInsertPoint(VectorPH->getTerminator()); if (isa(EntryVal)) { assert(Start->getType()->isIntegerTy() && diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -76,7 +76,7 @@ /// return it. If not, return null. VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands, - VFRange &Range) const; + VPlan &Plan, VFRange &Range) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1066,27 +1066,21 @@ bool NeedsScalarIV; bool NeedsVectorIV; - /// SCEV used to expand step. - /// FIXME: move expansion of step to the pre-header, once it is modeled - /// explicitly. - ScalarEvolution &SE; - public: - VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, + VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, - bool NeedsScalarIV, bool NeedsVectorIV, - ScalarEvolution &SE) - : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), - IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), - NeedsVectorIV(NeedsVectorIV), SE(SE) {} + bool NeedsScalarIV, bool NeedsVectorIV) + : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}), + VPValue(IV, this), IV(IV), IndDesc(IndDesc), + NeedsScalarIV(NeedsScalarIV), NeedsVectorIV(NeedsVectorIV) {} - VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, + VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc, bool NeedsScalarIV, - bool NeedsVectorIV, ScalarEvolution &SE) - : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), - IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), - NeedsVectorIV(NeedsVectorIV), SE(SE) {} + bool NeedsVectorIV) + : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}), + VPValue(Trunc, this), IV(IV), IndDesc(IndDesc), + NeedsScalarIV(NeedsScalarIV), NeedsVectorIV(NeedsVectorIV) {} ~VPWidenIntOrFpInductionRecipe() override = default; @@ -1829,6 +1823,8 @@ void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + const SCEV *getSCEV() const { return Expr; } }; /// Canonical scalar induction phi of the vector loop. Starting at the specified @@ -3036,6 +3032,13 @@ /// Returns true if only the first lane of \p Def is used. bool onlyFirstLaneUsed(VPValue *Def); +/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p +/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in +/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's +/// pre-header already contains a recipe expanding \p Expr, return it. If not, +/// create a new one. +VPValue *expandSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE); + } // end namespace vputils } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1295,6 +1295,9 @@ getVPValue(0)->printAsOperand(O, SlotTracker); } else O << " " << VPlanIngredient(IV); + + O << ", "; + getOperand(1)->printAsOperand(O, SlotTracker); } void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, @@ -1750,3 +1753,30 @@ return cast(U)->onlyFirstLaneUsed(Def); }); } + +VPValue *vputils::expandSCEVExpr(VPlan &Plan, const SCEV *Expr, + ScalarEvolution &SE) { + if (auto *E = dyn_cast(Expr)) { + VPValue *Step = new VPValue(E->getValue()); + Plan.addExternalDef(Step); + return Step; + } + + if (auto *E = dyn_cast(Expr)) { + VPValue *Step = new VPValue(E->getValue()); + Plan.addExternalDef(Step); + return Step; + } + + VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock(); + for (VPRecipeBase &R : *Preheader) { + if (auto *ExpandR = dyn_cast(&R)) { + if (ExpandR->getSCEV() == Expr) + return ExpandR; + } + } + + VPExpandSCEVRecipe *Step = new VPExpandSCEVRecipe(Expr, SE); + Preheader->appendRecipe(Step); + return Step; +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -49,8 +49,9 @@ auto *Phi = cast(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); - NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, - true, SE); + VPValue *Step = vputils::expandSCEVExpr(*Plan, II->getStep(), SE); + NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, + false, true); } else { Plan->addVPValue(Phi, VPPhi); continue; @@ -419,27 +420,12 @@ continue; const InductionDescriptor &ID = IV->getInductionDescriptor(); - const SCEV *StepSCEV = ID.getStep(); - VPValue *Step = nullptr; - if (auto *E = dyn_cast(StepSCEV)) { - Step = new VPValue(E->getValue()); - Plan.addExternalDef(Step); - } else if (auto *E = dyn_cast(StepSCEV)) { - Step = new VPValue(E->getValue()); - Plan.addExternalDef(Step); - } else { - Step = new VPExpandSCEVRecipe(StepSCEV, SE); - } - + VPValue *Step = vputils::expandSCEVExpr(Plan, ID.getStep(), SE); Instruction *TruncI = IV->getTruncInst(); VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( IV->getPHINode()->getType(), ID, Plan.getCanonicalIV(), IV->getStartValue(), Step, TruncI ? TruncI->getType() : nullptr); - HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi()); - if (Step->getDef()) - Plan.getEntry()->getEntryBasicBlock()->appendRecipe( - cast(Step->getDef())); // If there are no vector users of IV, simply update all users to use Step // instead. diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -426,7 +426,7 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION\l" + ; CHECK-NEXT: " %iv = phi %iv.next, 0\l" + -; CHECK-NEXT: " ir<%v2> +; CHECK-NEXT: " ir<%v2>, vp<[[EXP_SCEV]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, vp<[[EXP_SCEV]]> ; CHECK-NEXT: WIDEN ir<%v3> = add ir<%v2>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>