diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1399,8 +1399,10 @@ return Err; // Add the nested pass manager with the appropriate adaptor. bool UseMemorySSA = (Name == "loop-mssa"); - bool UseBFI = llvm::any_of( - InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; }); + bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) { + return Pipeline.Name.contains("licm") || + Pipeline.Name.contains("simple-loop-unswitch"); + }); bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "loop-predication"; }); diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/GuardUtils.h" @@ -26,6 +27,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3044,6 +3046,7 @@ bool NonTrivial, function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref DestroyLoopCB) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); @@ -3080,6 +3083,14 @@ if (L.getHeader()->getParent()->hasOptSize()) return false; + // Skip cold loops, as unswitching them brings little benefit + // but increases the code size + if (PSI && PSI->hasProfileSummary() && BFI && + PSI->isColdBlock(L.getHeader(), BFI)) { + LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n"); + return false; + } + // Skip non-trivial unswitching for loops that cannot be cloned. if (!L.isSafeToClone()) return false; @@ -3105,7 +3116,11 @@ LPMUpdater &U) { Function &F = *L.getHeader()->getParent(); (void)F; - + ProfileSummaryInfo *PSI = nullptr; + if (auto OuterProxy = + AM.getResult(L, AR) + .getCachedResult(F)) + PSI = OuterProxy->getCachedResult(*F.getParent()); LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); @@ -3152,7 +3167,7 @@ } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr, - DestroyLoopCB)) + PSI, AR.BFI, DestroyLoopCB)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) @@ -3214,7 +3229,6 @@ LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); - auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); auto &AC = getAnalysis().getAssumptionCache(F); @@ -3251,9 +3265,9 @@ if (VerifyMemorySSA) MSSA->verifyMemorySSA(); - - bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, - UnswitchCB, SE, &MSSAU, DestroyLoopCB); + bool Changed = + unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE, + &MSSAU, nullptr, nullptr, DestroyLoopCB); if (VerifyMemorySSA) MSSA->verifyMemorySSA(); diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -174,6 +174,7 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -137,6 +137,7 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -110,6 +110,7 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -119,6 +119,7 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -148,6 +148,7 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -114,6 +114,7 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Transforms/LoopPredication/preserve-bpi.ll b/llvm/test/Transforms/LoopPredication/preserve-bpi.ll --- a/llvm/test/Transforms/LoopPredication/preserve-bpi.ll +++ b/llvm/test/Transforms/LoopPredication/preserve-bpi.ll @@ -10,6 +10,7 @@ ; CHECK: Running pass: LoopPredicationPass on Loop at depth 1 ; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1 ; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 +; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-NEXT: Running pass: LoopPredicationPass on Loop at depth 1 ; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1 ; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll @@ -46,31 +46,18 @@ ; CHECK: entry_cold_loop: ; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]] ; CHECK: cold_loop_begin.preheader: -; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]] -; CHECK: cold_loop_begin.preheader.split.us: -; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]] -; CHECK: cold_loop_begin.us: -; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]] -; CHECK: cold_loop_a.us: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() -; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]] -; CHECK: cold_loop_latch.us: -; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 -; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]] -; CHECK: cold_loop_exit.loopexit.split.us: -; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]] -; CHECK: cold_loop_begin.preheader.split: ; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]] ; CHECK: cold_loop_begin: -; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]] +; CHECK: cold_loop_a: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() +; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]] ; CHECK: cold_loop_b: ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @b() -; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]] +; CHECK-NEXT: br label [[COLD_LOOP_LATCH]] ; CHECK: cold_loop_latch: ; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 -; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]] -; CHECK: cold_loop_exit.loopexit.split: -; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]] +; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: cold_loop_exit.loopexit: ; CHECK-NEXT: br label [[COLD_LOOP_EXIT]] ; CHECK: cold_loop_exit: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll @@ -18,6 +18,7 @@ ; the analysis caches. ; ; CHECK: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 containing: %loop_begin
,%loop_b,%loop_b_inner,%loop_b_inner_exit,%loop_a,%loop_a_inner,%loop_a_inner_exit,%latch +; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-NEXT: Clearing all analysis results for: loop_a_inner