Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -47,6 +48,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -70,6 +72,18 @@ Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); +static cl::opt +LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency", cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to access PGO " + "heuristics to minimize code growth in cold regions and be more " + "aggressive in hot regions.")); + +// The default frequency 20 was chosen based on LoopVectorize's choice of cold frequency. +static cl::opt +ColdFrequency("loop-unswitch-cold-block-frequency", cl::init(20), cl::Hidden, + cl::desc("Block frequency to be considered as cold. Non-trivial unswitches " + "are not applied to cold blocks.")); + namespace { class LUAnalysisCache { @@ -153,6 +167,9 @@ std::vector LoopProcessWorklist; LUAnalysisCache BranchesInfo; + BlockFrequencyInfo *BFI; + + BlockFrequency ColdEntryFreq; bool OptimizeForSize; bool redoLoop; @@ -195,6 +212,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); } private: @@ -359,6 +377,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) @@ -406,6 +425,14 @@ if (skipOptnoneFunction(L)) return false; + BFI = &getAnalysis().getBFI(); + + // Use BranchProbability to compute a minimum frequency based on + // entry baseline frequency. Blocks below this frequency are considered + // as cold. + const BranchProbability ColdProb(ColdFrequency, 100); + ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; + AC = &getAnalysis().getAssumptionCache( *L->getHeader()->getParent()); LI = &getAnalysis().getLoopInfo(); @@ -464,6 +491,12 @@ return true; } + // Compute the weighted frequency of this loop being executed and see if it + // is less than ColdFrequency% of the function entry baseline frequency. + BlockFrequency LoopEntryFreq = BFI->getBlockFreq(loopPreheader); + if (LoopUnswitchWithBlockFrequency && LoopEntryFreq < ColdEntryFreq) + return false; + // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this // loop. Index: test/Transforms/LoopUnswitch/cold-loop.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnswitch/cold-loop.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s + +; This test contains a cold loop where the first condition is trivial +; and the second condition is non-trivial. LoopUnswitch pass should be +; able to unswitch the trivial one but not the non-trivial one. + +define i32 @test(i1 %cond1, i1 %cond2, i1 %cond3) { + br i1 %cond1, label %loop_preheader, label %loop_exit, !prof !0 + +; CHECK: loop_preheader: +; CHECK: br i1 %cond2, label %loop_preheader.loop_preheader.split_crit_edge, label %loop_preheader.loop_exit.loopexit.split_crit_edge +loop_preheader: + br label %loop_begin + +loop_begin: + br i1 %cond2, label %continue, label %loop_exit ; trivial condition + +; CHECK: continue: +; CHECK: br i1 %cond3, label %do_something, label %do_something_else +continue: + br i1 %cond3, label %do_something, label %do_something_else ; non-trivial condition + +do_something: + call void @some_func1() noreturn nounwind + br label %joint + +do_something_else: + call void @some_func2() noreturn nounwind + br label %joint + +joint: + br label %loop_begin + +loop_exit: + ret i32 0 +} + +declare void @some_func1() noreturn +declare void @some_func2() noreturn + +!0 = !{!"branch_weights", i32 1, i32 10} \ No newline at end of file