Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -38,6 +38,10 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -71,6 +75,20 @@ Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); +static cl::opt +LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency", + cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to access PGO " + "heuristics to minimize code growth in cold regions.")); + +static cl::opt +ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden, + cl::desc("Coldness threshold in percentage. The loop header frequency " + "(relative to the entry frequency) is compared with this " + "threshold to determine if non-trivial unswitching should be " + "enabled.")); + + namespace { class LUAnalysisCache { @@ -155,6 +173,13 @@ LUAnalysisCache BranchesInfo; + bool EnabledPGO; + + // BFI and ColdEntryFreq are only used when PGO and + // LoopUnswitchWithBlockFrequency are enabled. + BlockFrequencyInfo BFI; + BlockFrequency ColdEntryFreq; + bool OptimizeForSize; bool redoLoop; @@ -417,6 +442,20 @@ DT = DTWP ? &DTWP->getDomTree() : nullptr; currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); + + EnabledPGO = F->getEntryCount().hasValue(); + + if (LoopUnswitchWithBlockFrequency && EnabledPGO) { + BranchProbabilityInfo BPI(*F, *LI); + BFI.calculate(*L->getHeader()->getParent(), BPI, *LI); + + // Use BranchProbability to compute a minimum frequency based on + // function entry baseline frequency. Loops with headers below this + // frequency are considered as cold. + const BranchProbability ColdProb(ColdnessThreshold, 100); + ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb; + } + bool Changed = false; do { assert(currentLoop->isLCSSAForm(*DT)); @@ -471,6 +510,16 @@ loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) return false; + if (LoopUnswitchWithBlockFrequency && EnabledPGO) { + // Compute the weighted frequency of the hottest block in the + // loop (loopHeader in this case since inner loops should be + // processed before outer loop). If it is less than ColdFrequency, + // we should not unswitch. + BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader); + if (LoopEntryFreq < ColdEntryFreq) + return false; + } + // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this // loop. Index: test/Transforms/LoopUnswitch/cold-loop.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnswitch/cold-loop.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s + +;; trivial condition should be unswithed regardless of coldness. +define i32 @test1(i1 %cond1, i1 %cond2) !prof !1 { + br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0 + +loop_begin: +; CHECK: br i1 true, label %continue, label %loop_exit.loopexit + br i1 %cond2, label %continue, label %loop_exit ; trivial condition + +continue: + call void @some_func1() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +;; cold non-trivial condition should not be unswitched. +define i32 @test2(i32* %var, i1 %cond1, i1 %cond2) !prof !1 { + br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0 + +loop_begin: + store i32 1, i32* %var +; CHECK: br i1 %cond2, label %continue1, label %continue2 + br i1 %cond2, label %continue1, label %continue2 ; non-trivial condition + +continue1: + call void @some_func1() noreturn nounwind + br label %joint + +continue2: + call void @some_func2() noreturn nounwind + br label %joint + +joint: +;; unswitching will duplicate these calls. + call void @some_func3() noreturn nounwind + call void @some_func4() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +declare void @some_func1() noreturn +declare void @some_func2() noreturn +declare void @some_func3() noreturn +declare void @some_func4() noreturn + +!0 = !{!"branch_weights", i32 1, i32 100000000} +!1 = !{!"function_entry_count", i64 100}