Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -25,6 +25,7 @@ class AliasSetTracker; class AssumptionCache; class BasicBlock; +class BlockFrequencyInfo; class DataLayout; class DominatorTree; class Loop; @@ -369,12 +370,13 @@ /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions /// before uses, allowing us to hoist a loop body in one pass without iteration. -/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout, +/// Takes DomTreeNode, AliasAnalysis, LoopInfo, BlockFrequencyInfo, +/// DominatorTree, DataLayout, /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the /// loop and loop safety information as arguments. It returns changed status. -bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, Loop *, AliasSetTracker *, - LICMSafetyInfo *); +bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, + BlockFrequencyInfo *, DominatorTree *, TargetLibraryInfo *, + Loop *, AliasSetTracker *, LICMSafetyInfo *); /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -34,6 +34,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -121,6 +122,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -135,6 +137,7 @@ AliasAnalysis *AA; // Current AliasAnalysis information LoopInfo *LI; // Current LoopInfo DominatorTree *DT; // Dominator Tree for the current Loop. + BlockFrequencyInfo *BFI; TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. @@ -164,6 +167,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LICM(); } @@ -182,6 +186,7 @@ LI = &getAnalysis().getLoopInfo(); AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); + BFI = &getAnalysis().getBFI(); TLI = &getAnalysis().getTLI(); @@ -212,7 +217,7 @@ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, CurAST, &SafetyInfo); if (Preheader) - Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, BFI, DT, TLI, CurLoop, CurAST, &SafetyInfo); // Now that all loop invariants have been removed from the loop, promote any @@ -328,7 +333,8 @@ /// uses, allowing us to hoist a loop body in one pass without iteration. /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + BlockFrequencyInfo *BFI, DominatorTree *DT, + TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -344,7 +350,9 @@ // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). bool Changed = false; - if (!inSubLoop(BB, CurLoop, LI)) + if (!inSubLoop(BB, CurLoop, LI)) { + bool ShouldHoist = + BFI->getBlockFreq(BB) >= BFI->getBlockFreq(CurLoop->getLoopPreheader()); for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { Instruction &I = *II++; // Try constant folding this instruction. If all the operands are @@ -359,6 +367,8 @@ I.eraseFromParent(); continue; } + if (!ShouldHoist) + continue; // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it @@ -371,10 +381,12 @@ CurLoop->getLoopPreheader()->getTerminator())) Changed |= hoist(I, DT, CurLoop, SafetyInfo); } + } const std::vector &Children = N->getChildren(); for (DomTreeNode *Child : Children) - Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + Changed |= + hoistRegion(Child, AA, LI, BFI, DT, TLI, CurLoop, CurAST, SafetyInfo); return Changed; } Index: test/Other/pass-pipelines.ll =================================================================== --- test/Other/pass-pipelines.ll +++ test/Other/pass-pipelines.ll @@ -37,6 +37,12 @@ ; CHECK-O2-NEXT: FunctionPass Manager ; CHECK-O2-NOT: Manager ; CHECK-O2: Loop Pass Manager +; CHECK-O2: Branch Probability Analysis +; CHECK-O2: Block Frequency Analysis +; CHECK-O2: Loop Pass Manager +; CHECK-O2: Loop Invariant Code Motion +; CHECK-O2: Loop Pass Manager +; CHECK-O2: Unswitch loops ; CHECK-O2-NOT: Manager ; FIXME: We shouldn't be pulling out to simplify-cfg and instcombine and ; causing new loop pass managers. Index: test/Transforms/LICM/not-hoist-low-freq.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/not-hoist-low-freq.ll @@ -0,0 +1,67 @@ +; RUN: opt -S -licm < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Original source code: +; int g; +; int foo(int p, int x) { +; for (int i = 0; i != x; i++) +; if (__builtin_expect(i == p, 0)) { +; x += g; x *= g; +; } +; return x; +; } +; +; Load of global value g should not be hoisted to preheader. + +@g = global i32 0, align 4 + +; Function Attrs: norecurse nounwind readonly uwtable +define i32 @_Z3fooii(i32, i32) #0 { + %3 = icmp eq i32 %1, 0 + br i1 %3, label %._crit_edge, label %.lr.ph.preheader + +.lr.ph.preheader: ; preds = %2 + br label %.lr.ph + +.lr.ph: ; preds = %.lr.ph.preheader, %9 + %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ] + %.012 = phi i32 [ %.1, %.combine ], [ %1, %.lr.ph.preheader ] + %4 = icmp eq i32 %.03, %0 + br i1 %4, label %.then, label %.combine, !prof !1 + +.then: ; preds = %.lr.ph + %5 = load i32, i32* @g, align 4, !tbaa !2 + %6 = add nsw i32 %5, %.012 + %7 = mul nsw i32 %6, %5 + br label %.combine + +; CHECK: .then: +; CHECK: load i32, i32* @g +; CHECK: br label %.combine + +.combine: ; preds = %.lr.ph, %.then + %.1 = phi i32 [ %7, %.then ], [ %.012, %.lr.ph ] + %8 = add nuw nsw i32 %.03, 1 + %9 = icmp eq i32 %8, %.1 + br i1 %9, label %._crit_edge.loopexit, label %.lr.ph + +._crit_edge.loopexit: ; preds = %.combine + %.1.lcssa = phi i32 [ %.1, %.combine ] + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %2 + %.01.lcssa = phi i32 [ 0, %2 ], [ %.1.lcssa, %._crit_edge.loopexit ] + ret i32 %.01.lcssa +} + +attributes #0 = { norecurse nounwind readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.9.0 (trunk 268689)"} +!1 = !{!"branch_weights", i32 1, i32 2000} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"}