Index: include/llvm/Transforms/Utils/BasicBlockUtils.h =================================================================== --- include/llvm/Transforms/Utils/BasicBlockUtils.h +++ include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -18,6 +18,7 @@ // FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/InstrTypes.h" @@ -25,6 +26,8 @@ namespace llvm { +class BlockFrequencyInfo; +class BranchProbabilityInfo; class DominatorTree; class Function; class Instruction; @@ -301,7 +304,15 @@ // and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and // create the following structure: // A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 -bool SplitIndirectBrCriticalEdges(Function &F); +// If BPI and BFI are non-null, BPI/BFI will be updated accordingly. +// If IndirectBrTargets is non-null, the critical edges to those target blocks +// are split. Otherwise, we search for all critical edge targets in the +// function. +bool SplitIndirectBrCriticalEdges( + Function &F, + SmallSetVector *IndirectBrTargets = nullptr, + BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr); } // end namespace llvm Index: lib/Transforms/Instrumentation/CFGMST.h =================================================================== --- lib/Transforms/Instrumentation/CFGMST.h +++ lib/Transforms/Instrumentation/CFGMST.h @@ -37,7 +37,7 @@ /// for a given CFG. template class CFGMST { public: - Function &F; + Function *F; // Store all the edges in CFG. It may contain some stale edges // when Removed is set. @@ -93,9 +93,9 @@ // Edges with large weight will be put into MST first so they are less likely // to be instrumented. void buildEdges() { - DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); + DEBUG(dbgs() << "Build Edge on " << F->getName() << "\n"); - const BasicBlock *BB = &(F.getEntryBlock()); + const BasicBlock *BB = &(F->getEntryBlock()); uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); // Add a fake edge to the entry. addEdge(nullptr, BB, EntryWeight); @@ -108,7 +108,7 @@ static const uint32_t CriticalEdgeMultiplier = 1000; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); uint64_t BBWeight = (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); @@ -214,7 +214,7 @@ BlockFrequencyInfo *BFI; public: - CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr, + CFGMST(Function *Func, BranchProbabilityInfo *BPI_ = nullptr, BlockFrequencyInfo *BFI_ = nullptr) : F(Func), BPI(BPI_), BFI(BFI_) { buildEdges(); Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -532,7 +532,22 @@ bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), - SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { + SIVisitor(Func), MIVisitor(Func), MST(&F, BPI, BFI) { + // Split indirectbr critical edges that are to be instrumented, if any, (and + // recompute the MST) here rather than later in getInstrBB() to avoid + // invalidating the MST. + SmallSetVector IndirectBrTargets; + for (auto &E : MST.AllEdges) { + if (!E->InMST && E->IsCritical && E->SrcBB && E->DestBB && + isa(E->SrcBB->getTerminator())) { + IndirectBrTargets.insert(const_cast(E->DestBB)); + } + } + if (!IndirectBrTargets.empty()) { + SplitIndirectBrCriticalEdges(F, &IndirectBrTargets, BPI, BFI); + MST = CFGMST(&F, BPI, BFI); + } + // This should be done before CFG hash computation. SIVisitor.countSelects(Func); MIVisitor.countMemIntrinsics(Func); Index: lib/Transforms/Utils/BreakCriticalEdges.cpp =================================================================== --- lib/Transforms/Utils/BreakCriticalEdges.cpp +++ lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -20,6 +20,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" @@ -331,23 +333,31 @@ return IBB; } -bool llvm::SplitIndirectBrCriticalEdges(Function &F) { +bool llvm::SplitIndirectBrCriticalEdges( + Function &F, + SmallSetVector *IndirectBrTargets, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { + SmallSetVector Targets; + if (IndirectBrTargets) + Targets = *IndirectBrTargets; + else // Check whether the function has any indirectbrs, and collect which blocks // they may jump to. Since most functions don't have indirect branches, // this lowers the common case's overhead to O(Blocks) instead of O(Edges). - SmallSetVector Targets; - for (auto &BB : F) { - auto *IBI = dyn_cast(BB.getTerminator()); - if (!IBI) - continue; + for (auto &BB : F) { + auto *IBI = dyn_cast(BB.getTerminator()); + if (!IBI) + continue; - for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) - Targets.insert(IBI->getSuccessor(Succ)); - } + for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) + Targets.insert(IBI->getSuccessor(Succ)); + } if (Targets.empty()) return false; + bool ShouldUpdateAnalysis = BPI && BFI; bool Changed = false; for (BasicBlock *Target : Targets) { SmallVector OtherPreds; @@ -363,6 +373,14 @@ continue; BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); + if (ShouldUpdateAnalysis) { + // Copy the BFI/BPI from Target to BodyBlock. + for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors(); + I < E; ++I) + BPI->setEdgeProbability(BodyBlock, I, + BPI->getEdgeProbability(Target, I)); + BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency()); + } // It's possible Target was its own successor through an indirectbr. // In this case, the indirectbr now comes from BodyBlock. if (IBRPred == Target) @@ -374,13 +392,22 @@ ValueToValueMapTy VMap; BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); + BlockFrequency BlockFreqForDirectSucc; for (BasicBlock *Pred : OtherPreds) { // If the target is a loop to itself, then the terminator of the split - // block needs to be updated. - if (Pred == Target) - BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc); - else - Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + // block (BodyBlock) needs to be updated. + BasicBlock *Src = Pred != Target ? Pred : BodyBlock; + Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + if (ShouldUpdateAnalysis) + BlockFreqForDirectSucc += BFI->getBlockFreq(Src) * + BPI->getEdgeProbability(Src, DirectSucc); + } + if (ShouldUpdateAnalysis) { + BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency()); + BlockFrequency NewBlockFreqForTarget = + BFI->getBlockFreq(Target) - BlockFreqForDirectSucc; + BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency()); + BPI->eraseBlock(Target); } // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that Index: test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s + +; Function Attrs: norecurse nounwind readonly uwtable +define i32 @foo(i8* nocapture readonly %p) #1 { +entry: + %targets = alloca [256 x i8*], align 16 + %arrayidx1 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 93 + store i8* blockaddress(@foo, %if.end), i8** %arrayidx1, align 8 + br label %for.cond2 + +for.cond2: ; preds = %if.end, %for.cond2, %entry + %p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %for.cond2 ] + %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1 + %0 = load i8, i8* %p.addr.0, align 1 + %cond = icmp eq i8 %0, 93 + br i1 %cond, label %if.end.preheader, label %for.cond2 + +if.end.preheader: ; preds = %for.cond2 + br label %if.end + +if.end: ; preds = %if.end.preheader, %if.end +; CHECK: if.end: ; preds = %.split + %p.addr.1 = phi i8* [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %if.end.preheader ] + %incdec.ptr5 = getelementptr inbounds i8, i8* %p.addr.1, i64 1 + %1 = load i8, i8* %p.addr.1, align 1 + %idxprom6 = zext i8 %1 to i64 + %arrayidx7 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 %idxprom6 + %2 = load i8*, i8** %arrayidx7, align 8 + indirectbr i8* %2, [label %for.cond2, label %if.end] +; CHECK: indirectbr i8* %2, [label %for.cond2, label %if.end] +}