Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -538,9 +538,7 @@ /// Query the target whether it would be prefered to create a predicated /// vector loop, which can avoid the need to emit a scalar epilogue loop. - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, + bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const; @@ -1651,11 +1649,9 @@ AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) = 0; - virtual bool - preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) = 0; + virtual bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI, + LoopVectorizationLegality *LVL, + InterleavedAccessInfo *IAI) = 0; virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0; virtual std::optional instCombineIntrinsic( @@ -2051,12 +2047,10 @@ HardwareLoopInfo &HWLoopInfo) override { return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, + bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) override { - return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); + return Impl.preferPredicateOverEpilogue(TLI, LVL, IAI); } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override { Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -163,9 +163,7 @@ return false; } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, + bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const { return false; Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -622,12 +622,10 @@ return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, + bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) { - return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); + return BaseT::preferPredicateOverEpilogue(TLI, LVL, IAI); } TailFoldingStyle Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -388,6 +388,16 @@ return ConditionalAssumes; } + Loop *getLoop() const { return TheLoop; } + + LoopInfo *getLoopInfo() const { return LI; } + + AssumptionCache *getAssumptionCache() const { return AC; } + + ScalarEvolution *getScalarEvolution() const { return PSE.getSE(); } + + DominatorTree *getDominatorTree() const { return DT; } + private: /// Return true if the pre-header, exiting and latch blocks of \p Lp and all /// its nested loops are considered legal for vectorization. These legal Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -306,10 +306,9 @@ } bool TargetTransformInfo::preferPredicateOverEpilogue( - Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, - TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, + TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const { - return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); + return TTIImpl->preferPredicateOverEpilogue(TLI, LVL, IAI); } TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle( Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -356,9 +356,7 @@ return TailFoldingStyle::DataWithoutLaneMask; } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, + bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI); Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3379,10 +3379,9 @@ return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); } -bool AArch64TTIImpl::preferPredicateOverEpilogue( - Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, - TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) { +bool AArch64TTIImpl::preferPredicateOverEpilogue(TargetLibraryInfo *TLI, + LoopVectorizationLegality *LVL, + InterleavedAccessInfo *IAI) { if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled) return false; Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -303,9 +303,7 @@ AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo); - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, + bool preferPredicateOverEpilogue(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2238,10 +2238,9 @@ return true; } -bool ARMTTIImpl::preferPredicateOverEpilogue( - Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, - TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) { +bool ARMTTIImpl::preferPredicateOverEpilogue(TargetLibraryInfo *TLI, + LoopVectorizationLegality *LVL, + InterleavedAccessInfo *IAI) { if (!EnableTailPredication) { LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n"); return false; @@ -2253,6 +2252,8 @@ if (!ST->hasMVEIntegerOps()) return false; + Loop *L = LVL->getLoop(); + // For now, restrict this to single block loops. if (L->getNumBlocks() > 1) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block " @@ -2262,6 +2263,7 @@ assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected"); + LoopInfo *LI = LVL->getLoopInfo(); HardwareLoopInfo HWLoopInfo(L); if (!HWLoopInfo.canAnalyze(*LI)) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " @@ -2269,21 +2271,25 @@ return false; } + AssumptionCache *AC = LVL->getAssumptionCache(); + ScalarEvolution *SE = LVL->getScalarEvolution(); + // This checks if we have the low-overhead branch architecture // extension, and if we will create a hardware-loop: - if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) { + if (!isHardwareLoopProfitable(L, *SE, *AC, TLI, HWLoopInfo)) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " "profitable.\n"); return false; } - if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) { + DominatorTree *DT = LVL->getDominatorTree(); + if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT)) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " "a candidate.\n"); return false; } - return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI()); + return canTailPredicateLoop(L, LI, *SE, DL, LVL->getLAI()); } TailFoldingStyle Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9757,7 +9757,6 @@ static ScalarEpilogueLowering getScalarEpilogueLowering( Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, - AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) { // 1) OptSize takes precedence over all other options, i.e. if this is set, // don't look at hints or options, and don't request a scalar epilogue. @@ -9793,7 +9792,7 @@ }; // 4) if the TTI hook indicates this is profitable, request predication. - if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL, IAI)) + if (TTI->preferPredicateOverEpilogue(TLI, &LVL, IAI)) return CM_ScalarEpilogueNotNeededUsePredicate; return CM_ScalarEpilogueAllowed; @@ -9886,8 +9885,8 @@ Function *F = L->getHeader()->getParent(); InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); - ScalarEpilogueLowering SEL = getScalarEpilogueLowering( - F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL, &IAI); + ScalarEpilogueLowering SEL = + getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, *LVL, &IAI); LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); @@ -10155,8 +10154,8 @@ // Check the function attributes and profiles to find out if this function // should be optimized for size. - ScalarEpilogueLowering SEL = getScalarEpilogueLowering( - F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL, &IAI); + ScalarEpilogueLowering SEL = + getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, LVL, &IAI); // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads.