Index: llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -62,6 +62,7 @@ Optional AllowPeeling; Optional AllowRuntime; Optional AllowUpperBound; + Optional FullPeeling; int OptLevel; /// If false, use a cost model to determine whether unrolling of a loop is @@ -110,6 +111,12 @@ OptLevel = O; return *this; } + + // Sets "optimization level" tuning parameter for loop unrolling. + LoopUnrollOptions &setFullPeeling(int O) { + FullPeeling = O; + return *this; + } }; /// Loop unroll pass that will support both full and partial unrolling. Index: llvm/include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -132,7 +132,8 @@ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling); + Optional UserUpperBound, Optional UserAllowPeeling, + Optional UserAllowFullPeeling); unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1428,6 +1428,8 @@ UnrollOpts.setPartial(Enable); } else if (ParamName == "peeling") { UnrollOpts.setPeeling(Enable); + } else if (ParamName == "full-peeling") { + UnrollOpts.setFullPeeling(Enable); } else if (ParamName == "runtime") { UnrollOpts.setRuntime(Enable); } else if (ParamName == "upperbound") { Index: llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -293,9 +293,9 @@ if (Latch != Exit || SubLoopLatch != SubLoopExit) return LoopUnrollResult::Unmodified; - TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, nullptr, nullptr, OptLevel, - None, None, None, None, None, None); + TargetTransformInfo::UnrollingPreferences UP = + gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None, + None, None, None, None, None, None); if (AllowUnrollAndJam.getNumOccurrences() > 0) UP.UnrollAndJam = AllowUnrollAndJam; if (UnrollAndJamThreshold.getNumOccurrences() > 0) Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -178,7 +178,8 @@ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling) { + Optional UserUpperBound, Optional UserAllowPeeling, + Optional UserAllowFullPeeling) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -258,6 +259,8 @@ UP.UpperBound = *UserUpperBound; if (UserAllowPeeling.hasValue()) UP.AllowPeeling = *UserAllowPeeling; + if (UserAllowFullPeeling.hasValue()) + UP.FullPeeling = *UserAllowFullPeeling; return UP; } @@ -977,13 +980,13 @@ static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - bool PreserveLCSSA, int OptLevel, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional ProvidedCount, Optional ProvidedThreshold, Optional ProvidedAllowPartial, Optional ProvidedRuntime, Optional ProvidedUpperBound, - Optional ProvidedAllowPeeling) { + Optional ProvidedAllowPeeling, + Optional ProvidedAllowFullPeeling) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); @@ -1008,7 +1011,7 @@ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling); + ProvidedAllowPeeling, ProvidedAllowFullPeeling); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1170,18 +1173,21 @@ Optional ProvidedRuntime; Optional ProvidedUpperBound; Optional ProvidedAllowPeeling; + Optional ProvidedAllowFullPeeling; LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false, Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, Optional UpperBound = None, - Optional AllowPeeling = None) + Optional AllowPeeling = None, + Optional AllowFullPeeling = None) : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), - ProvidedAllowPeeling(AllowPeeling) { + ProvidedAllowPeeling(AllowPeeling), + ProvidedAllowFullPeeling(AllowFullPeeling) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1204,10 +1210,10 @@ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, - PreserveLCSSA, OptLevel, OnlyWhenForced, - ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); + L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, + OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, + ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, + ProvidedAllowPeeling, ProvidedAllowFullPeeling); if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1291,7 +1297,8 @@ ForgetSCEV, /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; + /*AllowPeeling*/ false, /*AllowFullPeeling*/ false) != + LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1431,7 +1438,7 @@ /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV, /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, - UnrollOpts.AllowUpperBound, LocalAllowPeeling); + UnrollOpts.AllowUpperBound, LocalAllowPeeling, UnrollOpts.FullPeeling); Changed |= Result != LoopUnrollResult::Unmodified; // The parent must not be damaged by unrolling! Index: llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -1,6 +1,7 @@ ; REQUIRES: asserts ; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s ; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,unroll)' 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,unroll)' 2>&1 | FileCheck %s --check-prefixes=CHECK-NO-PEEL ; Make sure we use the profile information correctly to peel-off 3 iterations ; from the loop, and update the branch weights for the peeled loop properly. @@ -8,7 +9,7 @@ ; CHECK: Loop Unroll: F[basic] ; CHECK: PEELING loop %for.body with iteration count 3! - +; CHECK-NO-PEEL-NOT: PEELING loop %for.body ; CHECK-LABEL: @basic ; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15 ; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !16