Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -37,6 +37,10 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -70,6 +74,21 @@ Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); +// FIXME: This is false by default due to pervasive problems with +// exactly what block frequency models. +// Refer bug: https://llvm.org/bugs/show_bug.cgi?id=20316 +static cl::opt +LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency", + cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to access PGO " + "heuristics to minimize code growth in cold regions and be more " + "aggressive in hot regions.")); + +static cl::opt +ColdFrequency("loop-unswitch-cold-block-frequency", cl::init(20), cl::Hidden, + cl::desc("Block frequency to be considered as cold. Non-trivial " + "unswitches are not applied to cold blocks.")); + namespace { class LUAnalysisCache { @@ -154,6 +173,9 @@ LUAnalysisCache BranchesInfo; + BlockFrequencyInfo BFI; + BlockFrequency ColdEntryFreq; + bool OptimizeForSize; bool redoLoop; @@ -415,6 +437,18 @@ DT = DTWP ? &DTWP->getDomTree() : nullptr; currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); + + if (LoopUnswitchWithBlockFrequency) { + BranchProbabilityInfo BPI(*F, *LI); + BFI.calculate(*L->getHeader()->getParent(), BPI, *LI); + + // Use BranchProbability to compute a minimum frequency based on + // function entry baseline frequency. Loops with headers below this + // frequency are considered as cold. + const BranchProbability ColdProb(ColdFrequency, 100); + ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb; + } + bool Changed = false; do { assert(currentLoop->isLCSSAForm(*DT)); @@ -469,6 +503,16 @@ loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) return false; + if (LoopUnswitchWithBlockFrequency) { + // Compute the weighted frequency of the hottest block in the + // loop (loopHeader in this case since inner loops should be + // processed before outer loop). If it is less than ColdFrequency, + // we should not unswitch. + BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader); + if (LoopEntryFreq < ColdEntryFreq) + return false; + } + // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this // loop. Index: test/Transforms/LoopUnswitch/cold-loop.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnswitch/cold-loop.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s + +; trivial condition should be unswithed regardless of coldness. +define i32 @test1(i1 %cond1, i1 %cond2) { + br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0 + +loop_begin: + +; CHECK: br i1 true, label %continue, label %loop_exit.loopexit + br i1 %cond2, label %continue, label %loop_exit ; trivial condition + +continue: + call void @some_func1() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +; cold non-trivial condition should not be unswitched. +define i32 @test2(i32* %var, i1 %cond1, i1 %cond2) { + br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0 + +loop_begin: + store i32 1, i32* %var + +; CHECK: br i1 %cond2, label %continue1, label %continue2 + br i1 %cond2, label %continue1, label %continue2 ; non-trivial condition + +continue1: + call void @some_func1() noreturn nounwind + br label %joint + +continue2: + call void @some_func2() noreturn nounwind + br label %joint + +joint: + br label %loop_begin + +loop_exit: + ret i32 0 +} + +declare void @some_func1() noreturn +declare void @some_func2() noreturn + +!0 = !{!"branch_weights", i32 1, i32 100000}