diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -315,6 +315,10 @@ /// floating point induction. const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const; + /// Returns a pointer to the induction descriptor, if \p Phi is pointer + /// induction. + const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const; + /// Returns True if V is a cast that is part of an induction def-use chain, /// and had been proven to be redundant under a runtime guard (in other /// words, the cast has the same SCEV expression as the induction phi). diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -971,6 +971,16 @@ return nullptr; } +const InductionDescriptor * +LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const { + if (!isInductionPhi(Phi)) + return nullptr; + auto &ID = getInductionVars().find(Phi)->second; + if (ID.getKind() == InductionDescriptor::IK_PtrInduction) + return &ID; + return nullptr; +} + bool LoopVectorizationLegality::isCastedInductionVariable( const Value *V) const { auto *Inst = dyn_cast(V); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -490,9 +490,8 @@ /// new unrolled loop, where UF is the unroll factor. using VectorParts = SmallVector; - /// Vectorize a single first-order recurrence or pointer induction PHINode in - /// a block. This method handles the induction variable canonicalization. It - /// supports both VF = 1 for unrolled loops and arbitrary length vectors. + /// Vectorize a single vector PHINode in a block in the VPlan-native path + /// only. void widenPHIInstruction(Instruction *PN, VPWidenPHIRecipe *PhiR, VPTransformState &State); @@ -4217,139 +4216,18 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, VPWidenPHIRecipe *PhiR, VPTransformState &State) { - PHINode *P = cast(PN); - if (EnableVPlanNativePath) { - // Currently we enter here in the VPlan-native path for non-induction - // PHIs where all control flow is uniform. We simply widen these PHIs. - // Create a vector phi with no operands - the vector phi operands will be - // set at the end of vector code generation. - Type *VecTy = (State.VF.isScalar()) - ? PN->getType() - : VectorType::get(PN->getType(), State.VF); - Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); - State.set(PhiR, VecPhi, 0); - OrigPHIsToFix.push_back(P); - - return; - } - - assert(PN->getParent() == OrigLoop->getHeader() && - "Non-header phis should have been handled elsewhere"); - - // In order to support recurrences we need to be able to vectorize Phi nodes. - // Phi nodes have cycles, so we need to vectorize them in two stages. This is - // stage #1: We create a new vector PHI node with no incoming edges. We'll use - // this value when we vectorize all of the instructions that use the PHI. - - assert(!Legal->isReductionVariable(P) && - "reductions should be handled elsewhere"); - - setDebugLocFromInst(P); - - // This PHINode must be an induction variable. - // Make sure that we know about it. - assert(Legal->getInductionVars().count(P) && "Not an induction variable"); - - InductionDescriptor II = Legal->getInductionVars().lookup(P); - const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); - - auto *IVR = PhiR->getParent()->getPlan()->getCanonicalIV(); - PHINode *CanonicalIV = cast(State.get(IVR, 0)); - - // FIXME: The newly created binary instructions should contain nsw/nuw flags, - // which can be found from the original scalar operations. - switch (II.getKind()) { - case InductionDescriptor::IK_NoInduction: - llvm_unreachable("Unknown induction"); - case InductionDescriptor::IK_IntInduction: - case InductionDescriptor::IK_FpInduction: - llvm_unreachable("Integer/fp induction is handled elsewhere."); - case InductionDescriptor::IK_PtrInduction: { - // Handle the pointer induction variable case. - assert(P->getType()->isPointerTy() && "Unexpected type."); - - if (all_of(PhiR->users(), [PhiR](const VPUser *U) { - return cast(U)->usesScalars(PhiR); - })) { - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = - Builder.CreateSExtOrTrunc(CanonicalIV, II.getStep()->getType()); - // Determine the number of scalars we need to generate for each unroll - // iteration. If the instruction is uniform, we only need to generate the - // first lane. Otherwise, we generate all VF values. - bool IsUniform = vputils::onlyFirstLaneUsed(PhiR); - assert((IsUniform || !State.VF.isScalable()) && - "Cannot scalarize a scalable VF"); - unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); - - for (unsigned Part = 0; Part < UF; ++Part) { - Value *PartStart = - createStepForVF(Builder, PtrInd->getType(), VF, Part); - - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Value *Idx = Builder.CreateAdd( - PartStart, ConstantInt::get(PtrInd->getType(), Lane)); - Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - - Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(), - State.CFG.PrevBB->getTerminator()); - Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, - II.getStartValue(), Step, II); - SclrGep->setName("next.gep"); - State.set(PhiR, SclrGep, VPIteration(Part, Lane)); - } - } - return; - } - assert(isa(II.getStep()) && - "Induction step not a SCEV constant!"); - Type *PhiType = II.getStep()->getType(); - - // Build a pointer phi - Value *ScalarStartValue = PhiR->getStartValue()->getLiveInIRValue(); - Type *ScStValueType = ScalarStartValue->getType(); - PHINode *NewPointerPhi = - PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); - NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader); - - // A pointer induction, performed by using a gep - BasicBlock *LoopLatch = LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); - Instruction *InductionLoc = LoopLatch->getTerminator(); - const SCEV *ScalarStep = II.getStep(); - SCEVExpander Exp(*PSE.getSE(), DL, "induction"); - Value *ScalarStepValue = - Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); - Value *RuntimeVF = getRuntimeVF(Builder, PhiType, VF); - Value *NumUnrolledElems = - Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF)); - Value *InductionGEP = GetElementPtrInst::Create( - II.getElementType(), NewPointerPhi, - Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", - InductionLoc); - NewPointerPhi->addIncoming(InductionGEP, LoopLatch); - - // Create UF many actual address geps that use the pointer - // phi as base and a vectorized version of the step value - // () as offset. - for (unsigned Part = 0; Part < State.UF; ++Part) { - Type *VecPhiType = VectorType::get(PhiType, State.VF); - Value *StartOffsetScalar = - Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part)); - Value *StartOffset = - Builder.CreateVectorSplat(State.VF, StartOffsetScalar); - // Create a vector of consecutive numbers from zero to VF. - StartOffset = - Builder.CreateAdd(StartOffset, Builder.CreateStepVector(VecPhiType)); - - Value *GEP = Builder.CreateGEP( - II.getElementType(), NewPointerPhi, - Builder.CreateMul( - StartOffset, Builder.CreateVectorSplat(State.VF, ScalarStepValue), - "vector.gep")); - State.set(PhiR, GEP, Part); - } - } - } + assert(EnableVPlanNativePath && + "Non-native vplans are not expected to have VPWidenPHIRecipes."); + // Currently we enter here in the VPlan-native path for non-induction + // PHIs where all control flow is uniform. We simply widen these PHIs. + // Create a vector phi with no operands - the vector phi operands will be + // set at the end of vector code generation. + Type *VecTy = (State.VF.isScalar()) + ? PN->getType() + : VectorType::get(PN->getType(), State.VF); + Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); + State.set(PhiR, VecPhi, 0); + OrigPHIsToFix.push_back(cast(PN)); } /// A helper function for checking whether an integer division-related @@ -8336,7 +8214,7 @@ !NeedsScalarIVOnly, SE); } -VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( +VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI( PHINode *Phi, ArrayRef Operands, VFRange &Range) const { // Check if this is an integer or fp induction. If so, build the recipe that @@ -8345,6 +8223,10 @@ return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *PSE.getSE(), *OrigLoop, Range); + // Check if this is pointer induction. If so, build the recipe for it. + if (auto *II = Legal->getPointerInductionDescriptor(Phi)) + return new VPWidenPointerInductionRecipe(Phi, Operands[0], *II, + *PSE.getSE()); return nullptr; } @@ -9660,6 +9542,102 @@ VecInd->addIncoming(LastInduction, LoopVectorLatch); } +void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { + assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction && + "Not a pointer induction according to InductionDescriptor!"); + + PHINode *Phi = cast(getUnderlyingInstr()); + assert(Phi->getType()->isPointerTy() && "Unexpected type."); + + auto *IVR = getParent()->getPlan()->getCanonicalIV(); + PHINode *CanonicalIV = cast(State.get(IVR, 0)); + + if (all_of(users(), [this](const VPUser *U) { + return cast(U)->usesScalars(this); + })) { + // This is the normalized GEP that starts counting at zero. + Value *PtrInd = State.Builder.CreateSExtOrTrunc( + CanonicalIV, IndDesc.getStep()->getType()); + // Determine the number of scalars we need to generate for each unroll + // iteration. If the instruction is uniform, we only need to generate the + // first lane. Otherwise, we generate all VF values. + bool IsUniform = vputils::onlyFirstLaneUsed(this); + assert((IsUniform || !State.VF.isScalable()) && + "Cannot scalarize a scalable VF"); + unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *PartStart = + createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part); + + for (unsigned Lane = 0; Lane < Lanes; ++Lane) { + Value *Idx = State.Builder.CreateAdd( + PartStart, ConstantInt::get(PtrInd->getType(), Lane)); + Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx); + + Value *Step = CreateStepValue(IndDesc.getStep(), SE, + State.CFG.PrevBB->getTerminator()); + Value *SclrGep = emitTransformedIndex( + State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc); + SclrGep->setName("next.gep"); + State.set(this, SclrGep, VPIteration(Part, Lane)); + } + } + return; + } + + assert(isa(IndDesc.getStep()) && + "Induction step not a SCEV constant!"); + Type *PhiType = IndDesc.getStep()->getType(); + + // Build a pointer phi + Value *ScalarStartValue = getStartValue()->getLiveInIRValue(); + Type *ScStValueType = ScalarStartValue->getType(); + PHINode *NewPointerPhi = + PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); + NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader); + + // A pointer induction, performed by using a gep + BasicBlock *LoopLatch = + State.LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); + + const DataLayout &DL = LoopLatch->getModule()->getDataLayout(); + Instruction *InductionLoc = LoopLatch->getTerminator(); + const SCEV *ScalarStep = IndDesc.getStep(); + SCEVExpander Exp(SE, DL, "induction"); + Value *ScalarStepValue = Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); + Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); + Value *NumUnrolledElems = + State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF)); + Value *InductionGEP = GetElementPtrInst::Create( + IndDesc.getElementType(), NewPointerPhi, + State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", + InductionLoc); + NewPointerPhi->addIncoming(InductionGEP, LoopLatch); + + // Create UF many actual address geps that use the pointer + // phi as base and a vectorized version of the step value + // () as offset. + for (unsigned Part = 0; Part < State.UF; ++Part) { + Type *VecPhiType = VectorType::get(PhiType, State.VF); + Value *StartOffsetScalar = + State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part)); + Value *StartOffset = + State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar); + // Create a vector of consecutive numbers from zero to VF. + StartOffset = State.Builder.CreateAdd( + StartOffset, State.Builder.CreateStepVector(VecPhiType)); + + Value *GEP = State.Builder.CreateGEP( + IndDesc.getElementType(), NewPointerPhi, + State.Builder.CreateMul( + StartOffset, + State.Builder.CreateVectorSplat(State.VF, ScalarStepValue), + "vector.gep")); + State.set(this, GEP, Part); + } +} + void VPScalarIVStepsRecipe::execute(VPTransformState &State) { assert(!State.Instance && "VPScalarIVStepsRecipe being replicated."); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -74,9 +74,9 @@ /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. - VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands, - VFRange &Range) const; + VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi, + ArrayRef Operands, + VFRange &Range) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1185,6 +1185,9 @@ VPValue *getStartValue() { return getNumOperands() == 0 ? nullptr : getOperand(0); } + VPValue *getStartValue() const { + return getNumOperands() == 0 ? nullptr : getOperand(0); + } /// Returns the incoming value from the loop backedge. VPValue *getBackedgeValue() { @@ -1198,6 +1201,49 @@ } }; +class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { + const InductionDescriptor &IndDesc; + + /// SCEV used to expand step. + /// FIXME: move expansion of step to the pre-header, once it is modeled + /// explicitly. + ScalarEvolution &SE; + +public: + /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p + /// Start. + VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, + const InductionDescriptor &IndDesc, + ScalarEvolution &SE) + : VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC, + Phi), + IndDesc(IndDesc), SE(SE) { + addOperand(Start); + } + + ~VPWidenPointerInductionRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *B) { + return B->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC; + } + static inline bool classof(const VPHeaderPHIRecipe *R) { + return R->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVWidenPointerInductionSC; + } + + /// Generate vector values for the pointer induction. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling header phis that are widened in the vector loop. /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are /// managed in the recipe directly. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -575,6 +575,7 @@ case VPBranchOnMaskSC: return false; case VPWidenIntOrFpInductionSC: + case VPWidenPointerInductionSC: case VPWidenCanonicalIVSC: case VPWidenPHISC: case VPBlendSC: @@ -981,7 +982,8 @@ for (VPRecipeBase &R : Header->phis()) { // Skip phi-like recipes that generate their backedege values themselves. // TODO: Model their backedge values explicitly. - if (isa(&R) || isa(&R)) + if (isa(&R) || isa(&R) || + isa(&R)) continue; auto *PhiR = cast(&R); @@ -1272,6 +1274,16 @@ } else O << " " << VPlanIngredient(IV); } + +void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = WIDEN-POINTER-INDUCTION "; + getStartValue()->printAsOperand(O, SlotTracker); + O << ", " << *IndDesc.getStep(); +} + #endif bool VPWidenIntOrFpInductionRecipe::isCanonical() const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -106,6 +106,7 @@ VPVFirstOrderRecurrencePHISC, VPVWidenPHISC, VPVWidenIntOrFpInductionSC, + VPVWidenPointerInductionSC, VPVPredInstPHI, VPVReductionPHISC, }; @@ -346,6 +347,7 @@ VPFirstOrderRecurrencePHISC, VPWidenPHISC, VPWidenIntOrFpInductionSC, + VPWidenPointerInductionSC, VPPredInstPHISC, VPReductionPHISC, VPFirstPHISC = VPBlendSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-PHI %ptr.iv.1 = phi %start.1, %ptr.iv.1.next -; CHECK-NEXT: WIDEN-PHI %ptr.iv.2 = phi %start.2, %ptr.iv.2.next +; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 1 +; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.1>, ir<%ptr.iv.2.next> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2>