Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -353,6 +353,7 @@ const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% + auto L = MLI->getLoopFor(BB); MachineBasicBlock *BestSucc = nullptr; // FIXME: Due to the performance of the probability and weight routines in // the MBPI analysis, we manually compute probabilities using the edge @@ -380,10 +381,29 @@ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + // We should compute the probability of this successor by only considering + // edges in the current loop but not its peer or inner loops. This happens + // when BB belongs to a loop but we are building chains for another loop + // (either outer or peer). In this case, the edge from BB to the successor + // outside of the inner loop usually has a small probability, due to edges + // from BB to successors inside of the inner loop. Then when we compute the + // sum of weights on BB, we need to exclude weights on edges belonging to + // the inner loop. + auto SuccLoop = MLI->getLoopFor(Succ); + if (L && L != SuccLoop && !(SuccLoop && L->contains(SuccLoop))) { + for (auto SuccBB : BB->successors()) { + if (MLI->getLoopFor(SuccBB) == L) { + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, SuccBB); + SumWeight -= SuccWeight / WeightScale; + } + } + } + BranchProbability SuccProbInLoop(SuccWeight / WeightScale, SumWeight); + // If we outline optional branches, look whether Succ is unavoidable, i.e. // dominates all terminators of the MachineFunction. If it does, other // successors must be optional. Don't do this for cold branches. - if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && + if (OutlineOptionalBranches && SuccProbInLoop > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) { auto HasShortOptionalBranch = [&]() { for (MachineBasicBlock *Pred : Succ->predecessors()) { @@ -407,8 +427,8 @@ // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { - if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + if (SuccProbInLoop < HotProb) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProbInLoop << " (prob) (CFG conflict)\n"); continue; } @@ -430,13 +450,13 @@ } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProbInLoop << " (prob) (non-cold CFG conflict)\n"); continue; } } - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProbInLoop << " (prob)" << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") << "\n"); @@ -1224,4 +1244,3 @@ return false; } - Index: test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll @@ -0,0 +1,50 @@ +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK + +define void @foo() { +; Test that when determining the edge probability from a node in an inner loop +; to a node in an outloop, the weights on edges in the inner loop should be +; ignored. +; +; CHECK-LABEL: foo: +; CHECK: callq c +; CHECK: callq b + +entry: + %call = call zeroext i1 @a() + br i1 %call, label %if.then, label %if.else, !prof !1 + +if.then: + %call1 = call zeroext i1 @a() + br i1 %call1, label %while.body, label %if.end.1, !prof !1 + +while.body: + %call2 = call zeroext i1 @a() + br i1 %call2, label %if.then.1, label %while.cond + +if.then.1: + call void @d() + br label %while.cond + +while.cond: + %call3 = call zeroext i1 @a() + br i1 %call3, label %while.body, label %if.end + +if.end.1: + call void @d() + br label %if.end + +if.else: + call void @b() + br label %if.end + +if.end: + call void @c() + ret void +} + +declare zeroext i1 @a() +declare void @b() +declare void @c() +declare void @d() + +!1 = !{!"branch_weights", i32 10, i32 1}