diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -192,6 +192,15 @@ DataAndControlFlowWithoutRuntimeCheck }; +struct TailFoldingInfo { + TargetLibraryInfo *TLI; + LoopVectorizationLegality *LVL; + InterleavedAccessInfo *IAI; + TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, + InterleavedAccessInfo *IAI) + : TLI(TLI), LVL(LVL), IAI(IAI) {} +}; + class TargetTransformInfo; typedef TargetTransformInfo TTI; @@ -583,11 +592,7 @@ /// Query the target whether it would be prefered to create a predicated /// vector loop, which can avoid the need to emit a scalar epilogue loop. - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, - LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) const; + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const; /// Query the target what the preferred style of tail folding is. /// \param IVUpdateMayOverflow Tells whether it is known if the IV update @@ -1703,11 +1708,7 @@ AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) = 0; - virtual bool - preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) = 0; + virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = 0; virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0; virtual std::optional instCombineIntrinsic( @@ -2110,12 +2111,8 @@ HardwareLoopInfo &HWLoopInfo) override { return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, - LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) override { - return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override { + return Impl.preferPredicateOverEpilogue(TFI); } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -163,13 +163,7 @@ return false; } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, - LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) const { - return false; - } + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -622,12 +622,8 @@ return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, - LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) { - return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) { + return BaseT::preferPredicateOverEpilogue(TFI); } TailFoldingStyle diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -392,6 +392,16 @@ return &PSE; } + Loop *getLoop() const { return TheLoop; } + + LoopInfo *getLoopInfo() const { return LI; } + + AssumptionCache *getAssumptionCache() const { return AC; } + + ScalarEvolution *getScalarEvolution() const { return PSE.getSE(); } + + DominatorTree *getDominatorTree() const { return DT; } + private: /// Return true if the pre-header, exiting and latch blocks of \p Lp and all /// its nested loops are considered legal for vectorization. These legal diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -322,10 +322,8 @@ } bool TargetTransformInfo::preferPredicateOverEpilogue( - Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, - TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) const { - return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); + TailFoldingInfo *TFI) const { + return TTIImpl->preferPredicateOverEpilogue(TFI); } TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle( diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -358,11 +358,7 @@ return TailFoldingStyle::DataWithoutLaneMask; } - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, - LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI); + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI); bool supportsScalableVectors() const { return ST->hasSVE(); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3410,29 +3410,27 @@ return false; } -bool AArch64TTIImpl::preferPredicateOverEpilogue( - Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, - TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) { +bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) { if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled) return false; // We don't currently support vectorisation with interleaving for SVE - with // such loops we're better off not using tail-folding. This gives us a chance // to fall back on fixed-width vectorisation using NEON's ld2/st2/etc. - if (IAI->hasGroups()) + if (TFI->IAI->hasGroups()) return false; TailFoldingKind Required; // Defaults to 0. - if (LVL->getReductionVars().size()) + if (TFI->LVL->getReductionVars().size()) Required.add(TailFoldingKind::TFReductions); - if (LVL->getFixedOrderRecurrences().size()) + if (TFI->LVL->getFixedOrderRecurrences().size()) Required.add(TailFoldingKind::TFRecurrences); // We call this to discover whether any load/store pointers in the loop have // negative strides. This will require extra work to reverse the loop // predicate, which may be expensive. - if (containsDecreasingPointers(L, LVL->getPredicatedScalarEvolution())) + if (containsDecreasingPointers(TFI->LVL->getLoop(), + TFI->LVL->getPredicatedScalarEvolution())) Required.add(TailFoldingKind::TFReverse); if (!Required) Required.add(TailFoldingKind::TFSimple); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -303,11 +303,7 @@ AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo); - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, - LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI); + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2238,10 +2238,7 @@ return true; } -bool ARMTTIImpl::preferPredicateOverEpilogue( - Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, - TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, - InterleavedAccessInfo *IAI) { +bool ARMTTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) { if (!EnableTailPredication) { LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n"); return false; @@ -2253,6 +2250,9 @@ if (!ST->hasMVEIntegerOps()) return false; + LoopVectorizationLegality *LVL = TFI->LVL; + Loop *L = LVL->getLoop(); + // For now, restrict this to single block loops. if (L->getNumBlocks() > 1) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block " @@ -2262,6 +2262,7 @@ assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected"); + LoopInfo *LI = LVL->getLoopInfo(); HardwareLoopInfo HWLoopInfo(L); if (!HWLoopInfo.canAnalyze(*LI)) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " @@ -2269,21 +2270,25 @@ return false; } + AssumptionCache *AC = LVL->getAssumptionCache(); + ScalarEvolution *SE = LVL->getScalarEvolution(); + // This checks if we have the low-overhead branch architecture // extension, and if we will create a hardware-loop: - if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) { + if (!isHardwareLoopProfitable(L, *SE, *AC, TFI->TLI, HWLoopInfo)) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " "profitable.\n"); return false; } - if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) { + DominatorTree *DT = LVL->getDominatorTree(); + if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT)) { LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " "a candidate.\n"); return false; } - return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI()); + return canTailPredicateLoop(L, LI, *SE, DL, LVL->getLAI()); } TailFoldingStyle diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9723,7 +9723,6 @@ static ScalarEpilogueLowering getScalarEpilogueLowering( Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, - AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) { // 1) OptSize takes precedence over all other options, i.e. if this is set, // don't look at hints or options, and don't request a scalar epilogue. @@ -9759,7 +9758,8 @@ }; // 4) if the TTI hook indicates this is profitable, request predication. - if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL, IAI)) + TailFoldingInfo TFI(TLI, &LVL, IAI); + if (TTI->preferPredicateOverEpilogue(&TFI)) return CM_ScalarEpilogueNotNeededUsePredicate; return CM_ScalarEpilogueAllowed; @@ -9852,8 +9852,8 @@ Function *F = L->getHeader()->getParent(); InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); - ScalarEpilogueLowering SEL = getScalarEpilogueLowering( - F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL, &IAI); + ScalarEpilogueLowering SEL = + getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, *LVL, &IAI); LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); @@ -10121,8 +10121,8 @@ // Check the function attributes and profiles to find out if this function // should be optimized for size. - ScalarEpilogueLowering SEL = getScalarEpilogueLowering( - F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL, &IAI); + ScalarEpilogueLowering SEL = + getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, LVL, &IAI); // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads.