diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4216,7 +4216,6 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, VPWidenPHIRecipe *PhiR, VPTransformState &State) { - PHINode *P = cast(PN); if (EnableVPlanNativePath) { // Currently we enter here in the VPlan-native path for non-induction // PHIs where all control flow is uniform. We simply widen these PHIs. @@ -4227,128 +4226,12 @@ : VectorType::get(PN->getType(), State.VF); Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); State.set(PhiR, VecPhi, 0); - OrigPHIsToFix.push_back(P); + OrigPHIsToFix.push_back(cast(PN)); return; } - - assert(PN->getParent() == OrigLoop->getHeader() && - "Non-header phis should have been handled elsewhere"); - - // In order to support recurrences we need to be able to vectorize Phi nodes. - // Phi nodes have cycles, so we need to vectorize them in two stages. This is - // stage #1: We create a new vector PHI node with no incoming edges. We'll use - // this value when we vectorize all of the instructions that use the PHI. - - assert(!Legal->isReductionVariable(P) && - "reductions should be handled elsewhere"); - - setDebugLocFromInst(P); - - // This PHINode must be an induction variable. - // Make sure that we know about it. - assert(Legal->getInductionVars().count(P) && "Not an induction variable"); - - InductionDescriptor II = Legal->getInductionVars().lookup(P); - const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); - - auto *IVR = PhiR->getParent()->getPlan()->getCanonicalIV(); - PHINode *CanonicalIV = cast(State.get(IVR, 0)); - - // FIXME: The newly created binary instructions should contain nsw/nuw flags, - // which can be found from the original scalar operations. - switch (II.getKind()) { - case InductionDescriptor::IK_NoInduction: - llvm_unreachable("Unknown induction"); - case InductionDescriptor::IK_IntInduction: - case InductionDescriptor::IK_FpInduction: - llvm_unreachable("Integer/fp induction is handled elsewhere."); - case InductionDescriptor::IK_PtrInduction: { - // Handle the pointer induction variable case. - assert(P->getType()->isPointerTy() && "Unexpected type."); - - if (all_of(PhiR->users(), [PhiR](const VPUser *U) { - return cast(U)->usesScalars(PhiR); - })) { - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = - Builder.CreateSExtOrTrunc(CanonicalIV, II.getStep()->getType()); - // Determine the number of scalars we need to generate for each unroll - // iteration. If the instruction is uniform, we only need to generate the - // first lane. Otherwise, we generate all VF values. - bool IsUniform = vputils::onlyFirstLaneUsed(PhiR); - assert((IsUniform || !State.VF.isScalable()) && - "Cannot scalarize a scalable VF"); - unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); - - for (unsigned Part = 0; Part < UF; ++Part) { - Value *PartStart = - createStepForVF(Builder, PtrInd->getType(), VF, Part); - - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Value *Idx = Builder.CreateAdd( - PartStart, ConstantInt::get(PtrInd->getType(), Lane)); - Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - - Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(), - State.CFG.PrevBB->getTerminator()); - Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, - II.getStartValue(), Step, II); - SclrGep->setName("next.gep"); - State.set(PhiR, SclrGep, VPIteration(Part, Lane)); - } - } - return; - } - assert(isa(II.getStep()) && - "Induction step not a SCEV constant!"); - Type *PhiType = II.getStep()->getType(); - - // Build a pointer phi - Value *ScalarStartValue = PhiR->getStartValue()->getLiveInIRValue(); - Type *ScStValueType = ScalarStartValue->getType(); - PHINode *NewPointerPhi = - PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); - NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader); - - // A pointer induction, performed by using a gep - BasicBlock *LoopLatch = LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); - Instruction *InductionLoc = LoopLatch->getTerminator(); - const SCEV *ScalarStep = II.getStep(); - SCEVExpander Exp(*PSE.getSE(), DL, "induction"); - Value *ScalarStepValue = - Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); - Value *RuntimeVF = getRuntimeVF(Builder, PhiType, VF); - Value *NumUnrolledElems = - Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF)); - Value *InductionGEP = GetElementPtrInst::Create( - II.getElementType(), NewPointerPhi, - Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", - InductionLoc); - NewPointerPhi->addIncoming(InductionGEP, LoopLatch); - - // Create UF many actual address geps that use the pointer - // phi as base and a vectorized version of the step value - // () as offset. - for (unsigned Part = 0; Part < State.UF; ++Part) { - Type *VecPhiType = VectorType::get(PhiType, State.VF); - Value *StartOffsetScalar = - Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part)); - Value *StartOffset = - Builder.CreateVectorSplat(State.VF, StartOffsetScalar); - // Create a vector of consecutive numbers from zero to VF. - StartOffset = - Builder.CreateAdd(StartOffset, Builder.CreateStepVector(VecPhiType)); - - Value *GEP = Builder.CreateGEP( - II.getElementType(), NewPointerPhi, - Builder.CreateMul( - StartOffset, Builder.CreateVectorSplat(State.VF, ScalarStepValue), - "vector.gep")); - State.set(PhiR, GEP, Part); - } - } - } + llvm_unreachable( + "Non-native vplans are not expected to have VPWidenPHIRecipes."); } /// A helper function for checking whether an integer division-related @@ -8327,7 +8210,7 @@ !NeedsScalarIVOnly, SE); } -VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( +VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI( PHINode *Phi, ArrayRef Operands, VFRange &Range) const { // Check if this is an integer or fp induction. If so, build the recipe that @@ -8336,6 +8219,11 @@ return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *PSE.getSE(), *OrigLoop, Range); + auto I = Legal->getInductionVars().find(Phi); + if (I != Legal->getInductionVars().end()) + return new VPWidenPointerInductionRecipe(Phi, Operands[0], I->second, + *PSE.getSE()); + return nullptr; } @@ -9651,6 +9539,101 @@ VecInd->addIncoming(LastInduction, LoopVectorLatch); } +void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { + assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction); + + PHINode *Phi = cast(getUnderlyingInstr()); + assert(Phi->getType()->isPointerTy() && "Unexpected type."); + + auto *IVR = getParent()->getPlan()->getCanonicalIV(); + PHINode *CanonicalIV = cast(State.get(IVR, 0)); + + if (all_of(users(), [this](const VPUser *U) { + return cast(U)->usesScalars(this); + })) { + // This is the normalized GEP that starts counting at zero. + Value *PtrInd = State.Builder.CreateSExtOrTrunc( + CanonicalIV, IndDesc.getStep()->getType()); + // Determine the number of scalars we need to generate for each unroll + // iteration. If the instruction is uniform, we only need to generate the + // first lane. Otherwise, we generate all VF values. + bool IsUniform = vputils::onlyFirstLaneUsed(this); + assert((IsUniform || !State.VF.isScalable()) && + "Cannot scalarize a scalable VF"); + unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *PartStart = + createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part); + + for (unsigned Lane = 0; Lane < Lanes; ++Lane) { + Value *Idx = State.Builder.CreateAdd( + PartStart, ConstantInt::get(PtrInd->getType(), Lane)); + Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx); + + Value *Step = CreateStepValue(IndDesc.getStep(), SE, + State.CFG.PrevBB->getTerminator()); + Value *SclrGep = emitTransformedIndex( + State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc); + SclrGep->setName("next.gep"); + State.set(this, SclrGep, VPIteration(Part, Lane)); + } + } + return; + } + + assert(isa(IndDesc.getStep()) && + "Induction step not a SCEV constant!"); + Type *PhiType = IndDesc.getStep()->getType(); + + // Build a pointer phi + Value *ScalarStartValue = getStartValue()->getLiveInIRValue(); + Type *ScStValueType = ScalarStartValue->getType(); + PHINode *NewPointerPhi = + PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); + NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader); + + // A pointer induction, performed by using a gep + BasicBlock *LoopLatch = + State.LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); + + const DataLayout &DL = LoopLatch->getModule()->getDataLayout(); + Instruction *InductionLoc = LoopLatch->getTerminator(); + const SCEV *ScalarStep = IndDesc.getStep(); + SCEVExpander Exp(SE, DL, "induction"); + Value *ScalarStepValue = Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); + Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); + Value *NumUnrolledElems = + State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF)); + Value *InductionGEP = GetElementPtrInst::Create( + IndDesc.getElementType(), NewPointerPhi, + State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", + InductionLoc); + NewPointerPhi->addIncoming(InductionGEP, LoopLatch); + + // Create UF many actual address geps that use the pointer + // phi as base and a vectorized version of the step value + // () as offset. + for (unsigned Part = 0; Part < State.UF; ++Part) { + Type *VecPhiType = VectorType::get(PhiType, State.VF); + Value *StartOffsetScalar = + State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part)); + Value *StartOffset = + State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar); + // Create a vector of consecutive numbers from zero to VF. + StartOffset = State.Builder.CreateAdd( + StartOffset, State.Builder.CreateStepVector(VecPhiType)); + + Value *GEP = State.Builder.CreateGEP( + IndDesc.getElementType(), NewPointerPhi, + State.Builder.CreateMul( + StartOffset, + State.Builder.CreateVectorSplat(State.VF, ScalarStepValue), + "vector.gep")); + State.set(this, GEP, Part); + } +} + void VPScalarIVStepsRecipe::execute(VPTransformState &State) { assert(!State.Instance && "VPScalarIVStepsRecipe being replicated."); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -74,9 +74,9 @@ /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. - VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands, - VFRange &Range) const; + VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi, + ArrayRef Operands, + VFRange &Range) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1201,6 +1201,48 @@ } }; +class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { + const InductionDescriptor &IndDesc; + + /// SCEV used to expand step. + /// FIXME: move expansion of step to the pre-header, once it is modeled + /// explicitly. + ScalarEvolution &SE; + +public: + /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. + VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, + const InductionDescriptor &IndDesc, + ScalarEvolution &SE) + : VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC, + Phi), + IndDesc(IndDesc), SE(SE) { + addOperand(Start); + } + + ~VPWidenPointerInductionRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *B) { + return B->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC; + } + static inline bool classof(const VPHeaderPHIRecipe *R) { + return R->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVWidenPointerInductionSC; + } + + /// Generate + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling header phis that are widened in the vector loop. /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are /// managed in the recipe directly. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -577,6 +577,7 @@ case VPBranchOnMaskSC: return false; case VPWidenIntOrFpInductionSC: + case VPWidenPointerInductionSC: case VPWidenCanonicalIVSC: case VPWidenPHISC: case VPBlendSC: @@ -983,7 +984,8 @@ for (VPRecipeBase &R : Header->phis()) { // Skip phi-like recipes that generate their backedege values themselves. // TODO: Model their backedge values explicitly. - if (isa(&R) || isa(&R)) + if (isa(&R) || isa(&R) || + isa(&R)) continue; auto *PhiR = cast(&R); @@ -1274,6 +1276,17 @@ } else O << " " << VPlanIngredient(IV); } + +void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = WIDEN-POINTER-INDUCTION "; + getOperand(0)->printAsOperand(O, SlotTracker); + O << ", "; + O << *IndDesc.getStep(); +} + #endif bool VPWidenIntOrFpInductionRecipe::isCanonical() const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -106,6 +106,7 @@ VPVFirstOrderRecurrencePHISC, VPVWidenPHISC, VPVWidenIntOrFpInductionSC, + VPVWidenPointerInductionSC, VPVPredInstPHI, VPVReductionPHISC, }; @@ -346,6 +347,7 @@ VPFirstOrderRecurrencePHISC, VPWidenPHISC, VPWidenIntOrFpInductionSC, + VPWidenPointerInductionSC, VPPredInstPHISC, VPReductionPHISC, VPFirstPHISC = VPBlendSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-PHI %ptr.iv.1 = phi %start.1, %ptr.iv.1.next -; CHECK-NEXT: WIDEN-PHI %ptr.iv.2 = phi %start.2, %ptr.iv.2.next +; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 1 +; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.1>, ir<%ptr.iv.2.next> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2>