Index: include/llvm/Transforms/Utils/BasicBlockUtils.h =================================================================== --- include/llvm/Transforms/Utils/BasicBlockUtils.h +++ include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -25,6 +25,8 @@ namespace llvm { +class BlockFrequencyInfo; +class BranchProbabilityInfo; class DominatorTree; class Function; class Instruction; @@ -301,7 +303,10 @@ // and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and // create the following structure: // A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 -bool SplitIndirectBrCriticalEdges(Function &F); +// If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly. +bool SplitIndirectBrCriticalEdges(Function &F, + BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr); } // end namespace llvm Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -716,6 +716,9 @@ static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap &ComdatMembers) { + // Split indirectbr critical edges here before computing the MST rather than + // later in getInstrBB() to avoid invalidating it. + SplitIndirectBrCriticalEdges(F, BPI, BFI); FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, BFI); unsigned NumCounters = FuncInfo.getNumCounters(); @@ -1463,6 +1466,9 @@ continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); + // Split indirectbr critical edges here before computing the MST rather than + // later in getInstrBB() to avoid invalidating it. + SplitIndirectBrCriticalEdges(F, BPI, BFI); PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); if (!Func.readCounters(PGOReader.get())) continue; Index: lib/Transforms/Utils/BreakCriticalEdges.cpp =================================================================== --- lib/Transforms/Utils/BreakCriticalEdges.cpp +++ lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -20,6 +20,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" @@ -331,7 +333,9 @@ return IBB; } -bool llvm::SplitIndirectBrCriticalEdges(Function &F) { +bool llvm::SplitIndirectBrCriticalEdges(Function &F, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { // Check whether the function has any indirectbrs, and collect which blocks // they may jump to. Since most functions don't have indirect branches, // this lowers the common case's overhead to O(Blocks) instead of O(Edges). @@ -348,6 +352,7 @@ if (Targets.empty()) return false; + bool ShouldUpdateAnalysis = BPI && BFI; bool Changed = false; for (BasicBlock *Target : Targets) { SmallVector OtherPreds; @@ -363,6 +368,14 @@ continue; BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); + if (ShouldUpdateAnalysis) { + // Copy the BFI/BPI from Target to BodyBlock. + for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors(); + I < E; ++I) + BPI->setEdgeProbability(BodyBlock, I, + BPI->getEdgeProbability(Target, I)); + BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency()); + } // It's possible Target was its own successor through an indirectbr. // In this case, the indirectbr now comes from BodyBlock. if (IBRPred == Target) @@ -374,13 +387,22 @@ ValueToValueMapTy VMap; BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); + BlockFrequency BlockFreqForDirectSucc; for (BasicBlock *Pred : OtherPreds) { // If the target is a loop to itself, then the terminator of the split - // block needs to be updated. - if (Pred == Target) - BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc); - else - Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + // block (BodyBlock) needs to be updated. + BasicBlock *Src = Pred != Target ? Pred : BodyBlock; + Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + if (ShouldUpdateAnalysis) + BlockFreqForDirectSucc += BFI->getBlockFreq(Src) * + BPI->getEdgeProbability(Src, DirectSucc); + } + if (ShouldUpdateAnalysis) { + BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency()); + BlockFrequency NewBlockFreqForTarget = + BFI->getBlockFreq(Target) - BlockFreqForDirectSucc; + BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency()); + BPI->eraseBlock(Target); } // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that Index: test/Transforms/PGOProfile/Inputs/indirectbr.proftext =================================================================== --- test/Transforms/PGOProfile/Inputs/indirectbr.proftext +++ test/Transforms/PGOProfile/Inputs/indirectbr.proftext @@ -2,12 +2,11 @@ :ir foo # Func Hash: -40197883220 +47485104005 # Num Counters: 4 # Counter Values: -202 -88 +139 20 5 - +63 Index: test/Transforms/PGOProfile/indirectbr.ll =================================================================== --- test/Transforms/PGOProfile/indirectbr.ll +++ test/Transforms/PGOProfile/indirectbr.ll @@ -37,12 +37,14 @@ ; BRANCHPROB: Printing analysis 'Branch Probability Analysis' for function 'foo': ; BRANCHPROB:---- Branch Probabilities ---- ; BRANCHPROB: edge entry -> if.then probability is 0x37c32b17 / 0x80000000 = 43.56% -; BRANCHPROB: edge entry -> return probability is 0x483cd4e9 / 0x80000000 = 56.44% +; BRANCHPROB: edge entry -> return.clone probability is 0x483cd4e9 / 0x80000000 = 56.44% ; BRANCHPROB: edge if.then -> return probability is 0x5ba2e8ba / 0x80000000 = 71.59% ; BRANCHPROB: edge if.then -> label2 probability is 0x1d1745d1 / 0x80000000 = 22.73% ; BRANCHPROB: edge if.then -> label3 probability is 0x0745d174 / 0x80000000 = 5.68% -; BRANCHPROB: edge label2 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] -; BRANCHPROB: edge label3 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; BRANCHPROB: edge label2 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; BRANCHPROB: edge label3 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; BRANCHPROB: edge return -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; BRANCHPROB: edge return.clone -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] Index: test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @bar(i32 %v) local_unnamed_addr #0 { +entry: + %mul = shl nsw i32 %v, 1 + ret i32 %mul +} + +; Function Attrs: norecurse nounwind readonly uwtable +define i32 @foo(i8* nocapture readonly %p) #1 { +entry: + %targets = alloca [256 x i8*], align 16 + %arrayidx1 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 93 + store i8* blockaddress(@foo, %if.end), i8** %arrayidx1, align 8 + br label %for.cond2 + +for.cond2: ; preds = %if.end, %for.cond2, %entry +; CHECK: for.cond2: ; preds = %.split1 + %p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %for.cond2 ] + %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1 + %0 = load i8, i8* %p.addr.0, align 1 + %cond = icmp eq i8 %0, 93 + br i1 %cond, label %if.end.preheader, label %for.cond2 + +if.end.preheader: ; preds = %for.cond2 + br label %if.end + +if.end: ; preds = %if.end.preheader, %if.end +; CHECK: if.end: ; preds = %.split1 + %p.addr.1 = phi i8* [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %if.end.preheader ] + %incdec.ptr5 = getelementptr inbounds i8, i8* %p.addr.1, i64 1 + %1 = load i8, i8* %p.addr.1, align 1 + %idxprom6 = zext i8 %1 to i64 + %arrayidx7 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 %idxprom6 + %2 = load i8*, i8** %arrayidx7, align 8 + indirectbr i8* %2, [label %for.cond2, label %if.end] +; CHECK: indirectbr i8* %2, [label %for.cond2, label %if.end] +}