Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -103,6 +103,12 @@ bool canAnalyze(LoopInfo &LI); }; +enum PeelLevel { + PL_None, + PL_NonProfileBased, + PL_All +}; + /// This pass provides access to the codegen interfaces that are needed /// for IR-level transformations. class TargetTransformInfo { @@ -436,6 +442,8 @@ /// unrolling transformation will select a peeling factor based on profile /// information and other factors. unsigned PeelCount; + /// Allow different levels of peeling off iterations from a loop. + PeelLevel PeelingLevel; /// Default unroll count for loops with run-time trip count. unsigned DefaultUnrollRuntimeCount; // Set the maximum unrolling factor. The unrolling factor may be selected @@ -469,8 +477,6 @@ bool Force; /// Allow using trip count upper bound to unroll loops. bool UpperBound; - /// Allow peeling off loop iterations for loops with low dynamic tripcount. - bool AllowPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. Index: llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -59,7 +59,7 @@ /// struct LoopUnrollOptions { Optional AllowPartial; - Optional AllowPeeling; + Optional PeelingLevel; Optional AllowRuntime; Optional AllowUpperBound; int OptLevel; @@ -93,8 +93,8 @@ } /// Enables or disables loop peeling. - LoopUnrollOptions &setPeeling(bool Peeling) { - AllowPeeling = Peeling; + LoopUnrollOptions &setPeelingLevel(PeelLevel PeelingLevel) { + this->PeelingLevel = PeelingLevel; return *this; } Index: llvm/include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -132,7 +132,7 @@ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling); + Optional UserUpperBound, Optional UserPeelingLevel); unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1428,7 +1428,7 @@ if (ParamName == "partial") { UnrollOpts.setPartial(Enable); } else if (ParamName == "peeling") { - UnrollOpts.setPeeling(Enable); + UnrollOpts.setPeelingLevel(Enable ? PL_All : PL_None); } else if (ParamName == "runtime") { UnrollOpts.setRuntime(Enable); } else if (ParamName == "upperbound") { Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -148,10 +148,12 @@ "threshold, the loop is considered as flat and will be less " "aggressively unrolled.")); -static cl::opt - UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, - cl::desc("Allows loops to be peeled when the dynamic " - "trip count is known to be low.")); +static cl::opt UrollPeelingLevel( + "unroll-peeling-level", cl::Hidden, cl::desc("Sets the level for peeling"), + cl::values(clEnumVal(PL_None, "disable loop peeling"), + clEnumVal(PL_NonProfileBased, + "enable peeling with heuristics without profile"), + clEnumVal(PL_All, "enable all peeling heuristics"))); static cl::opt UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, @@ -178,7 +180,7 @@ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling) { + Optional UserUpperBound, Optional UserPeelingLevel) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -200,7 +202,7 @@ UP.AllowExpensiveTripCount = false; UP.Force = false; UP.UpperBound = false; - UP.AllowPeeling = true; + UP.PeelingLevel = PL_All; UP.UnrollAndJam = false; UP.UnrollAndJamInnerLoopThreshold = 60; @@ -237,8 +239,8 @@ UP.Runtime = UnrollRuntime; if (UnrollMaxUpperBound == 0) UP.UpperBound = false; - if (UnrollAllowPeeling.getNumOccurrences() > 0) - UP.AllowPeeling = UnrollAllowPeeling; + if (UrollPeelingLevel.getNumOccurrences() > 0) + UP.PeelingLevel = UrollPeelingLevel; if (UnrollUnrollRemainder.getNumOccurrences() > 0) UP.UnrollRemainder = UnrollUnrollRemainder; @@ -255,8 +257,8 @@ UP.Runtime = *UserRuntime; if (UserUpperBound.hasValue()) UP.UpperBound = *UserUpperBound; - if (UserAllowPeeling.hasValue()) - UP.AllowPeeling = *UserAllowPeeling; + if (UserPeelingLevel.hasValue()) + UP.PeelingLevel = *UserPeelingLevel; return UP; } @@ -976,13 +978,12 @@ static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - bool PreserveLCSSA, int OptLevel, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional ProvidedCount, Optional ProvidedThreshold, Optional ProvidedAllowPartial, Optional ProvidedRuntime, Optional ProvidedUpperBound, - Optional ProvidedAllowPeeling) { + Optional ProvidedPeelingLevel) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); @@ -1007,7 +1008,7 @@ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling); + ProvidedPeelingLevel); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1168,19 +1169,19 @@ Optional ProvidedAllowPartial; Optional ProvidedRuntime; Optional ProvidedUpperBound; - Optional ProvidedAllowPeeling; + Optional ProvidedPeelingLevel; LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false, Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, Optional UpperBound = None, - Optional AllowPeeling = None) + Optional PeelingLevel = None) : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), - ProvidedAllowPeeling(AllowPeeling) { + ProvidedPeelingLevel(PeelingLevel) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1203,10 +1204,10 @@ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, - PreserveLCSSA, OptLevel, OnlyWhenForced, - ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); + L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, + OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, + ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, + ProvidedPeelingLevel); if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1249,7 +1250,8 @@ AllowPartial == -1 ? None : Optional(AllowPartial), Runtime == -1 ? None : Optional(Runtime), UpperBound == -1 ? None : Optional(UpperBound), - AllowPeeling == -1 ? None : Optional(AllowPeeling)); + AllowPeeling == -1 ? None : Optional(AllowPeeling ? PL_All + : PL_None)); } Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced, @@ -1290,7 +1292,7 @@ ForgetSCEV, /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; + /*AllowPeeling*/ PL_None) != LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1419,9 +1421,9 @@ // Check if the profile summary indicates that the profiled application // has a huge working set size, in which case we disable peeling to avoid // bloating it further. - Optional LocalAllowPeeling = UnrollOpts.AllowPeeling; + Optional LocalAllowPeeling = UnrollOpts.PeelingLevel; if (PSI && PSI->hasHugeWorkingSetSize()) - LocalAllowPeeling = false; + LocalAllowPeeling = PL_None; std::string LoopName = L.getName(); // The API here is quite complex to call and we allow to select some // flavors of unrolling during construction time (by setting UnrollOpts). Index: llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -279,7 +279,7 @@ } // Skip peeling if it's disabled. - if (!UP.AllowPeeling) + if (UP.PeelingLevel == PL_None) return; // Here we try to get rid of Phis which become invariants after 1, 2, ..., N @@ -333,6 +333,8 @@ // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. + if (UP.PeelingLevel == PL_NonProfileBased) + return; // We only do this in the presence of profile information, since otherwise // our estimates of the trip count are not reliable enough. if (L->getHeader()->getParent()->hasProfileData()) { Index: llvm/test/Transforms/LoopUnroll/peel-loop-not-forced.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/peel-loop-not-forced.ll +++ llvm/test/Transforms/LoopUnroll/peel-loop-not-forced.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 | FileCheck %s -; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 -unroll-allow-peeling=false | FileCheck %s --check-prefix=DISABLE +; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 -unroll-peeling-level=PL_None | FileCheck %s --check-prefix=DISABLE define i32 @invariant_backedge_1(i32 %a, i32 %b) { ; CHECK-LABEL: @invariant_backedge_1