Skip to content

Commit f3bda1d

Browse files
committedDec 12, 2017
Split IndirectBr critical edges before PGO gen/use passes.
Summary: The PGO gen/use passes currently fail with an assert failure if there's a critical edge whose source is an IndirectBr instruction and that edge needs to be instrumented. To avoid this in certain cases, split IndirectBr critical edges in the PGO gen/use passes. This works for blocks with single indirectbr predecessors, but not for those with multiple indirectbr predecessors (splitting an IndirectBr critical edge isn't always possible.) Reviewers: davidxl, xur Reviewed By: davidxl Subscribers: efriedma, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D40699 llvm-svn: 320511
1 parent 195c97e commit f3bda1d

File tree

6 files changed

+87
-14
lines changed

6 files changed

+87
-14
lines changed
 

‎llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
namespace llvm {
2727

28+
class BlockFrequencyInfo;
29+
class BranchProbabilityInfo;
2830
class DominatorTree;
2931
class Function;
3032
class Instruction;
@@ -301,7 +303,10 @@ Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
301303
// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
302304
// create the following structure:
303305
// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
304-
bool SplitIndirectBrCriticalEdges(Function &F);
306+
// If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly.
307+
bool SplitIndirectBrCriticalEdges(Function &F,
308+
BranchProbabilityInfo *BPI = nullptr,
309+
BlockFrequencyInfo *BFI = nullptr);
305310

306311
} // end namespace llvm
307312

‎llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,9 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
716716
static void instrumentOneFunc(
717717
Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
718718
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
719+
// Split indirectbr critical edges here before computing the MST rather than
720+
// later in getInstrBB() to avoid invalidating it.
721+
SplitIndirectBrCriticalEdges(F, BPI, BFI);
719722
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
720723
BFI);
721724
unsigned NumCounters = FuncInfo.getNumCounters();
@@ -1463,6 +1466,9 @@ static bool annotateAllFunctions(
14631466
continue;
14641467
auto *BPI = LookupBPI(F);
14651468
auto *BFI = LookupBFI(F);
1469+
// Split indirectbr critical edges here before computing the MST rather than
1470+
// later in getInstrBB() to avoid invalidating it.
1471+
SplitIndirectBrCriticalEdges(F, BPI, BFI);
14661472
PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
14671473
if (!Func.readCounters(PGOReader.get()))
14681474
continue;

‎llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp

+28-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include "llvm/ADT/SmallVector.h"
2121
#include "llvm/ADT/Statistic.h"
2222
#include "llvm/Analysis/AliasAnalysis.h"
23+
#include "llvm/Analysis/BlockFrequencyInfo.h"
24+
#include "llvm/Analysis/BranchProbabilityInfo.h"
2325
#include "llvm/Analysis/CFG.h"
2426
#include "llvm/Analysis/LoopInfo.h"
2527
#include "llvm/IR/CFG.h"
@@ -331,7 +333,9 @@ findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
331333
return IBB;
332334
}
333335

334-
bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
336+
bool llvm::SplitIndirectBrCriticalEdges(Function &F,
337+
BranchProbabilityInfo *BPI,
338+
BlockFrequencyInfo *BFI) {
335339
// Check whether the function has any indirectbrs, and collect which blocks
336340
// they may jump to. Since most functions don't have indirect branches,
337341
// this lowers the common case's overhead to O(Blocks) instead of O(Edges).
@@ -348,6 +352,7 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
348352
if (Targets.empty())
349353
return false;
350354

355+
bool ShouldUpdateAnalysis = BPI && BFI;
351356
bool Changed = false;
352357
for (BasicBlock *Target : Targets) {
353358
SmallVector<BasicBlock *, 16> OtherPreds;
@@ -363,6 +368,14 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
363368
continue;
364369

365370
BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
371+
if (ShouldUpdateAnalysis) {
372+
// Copy the BFI/BPI from Target to BodyBlock.
373+
for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
374+
I < E; ++I)
375+
BPI->setEdgeProbability(BodyBlock, I,
376+
BPI->getEdgeProbability(Target, I));
377+
BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
378+
}
366379
// It's possible Target was its own successor through an indirectbr.
367380
// In this case, the indirectbr now comes from BodyBlock.
368381
if (IBRPred == Target)
@@ -374,13 +387,22 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
374387
ValueToValueMapTy VMap;
375388
BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
376389

390+
BlockFrequency BlockFreqForDirectSucc;
377391
for (BasicBlock *Pred : OtherPreds) {
378392
// If the target is a loop to itself, then the terminator of the split
379-
// block needs to be updated.
380-
if (Pred == Target)
381-
BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
382-
else
383-
Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
393+
// block (BodyBlock) needs to be updated.
394+
BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
395+
Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
396+
if (ShouldUpdateAnalysis)
397+
BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
398+
BPI->getEdgeProbability(Src, DirectSucc);
399+
}
400+
if (ShouldUpdateAnalysis) {
401+
BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
402+
BlockFrequency NewBlockFreqForTarget =
403+
BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
404+
BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
405+
BPI->eraseBlock(Target);
384406
}
385407

386408
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that

‎llvm/test/Transforms/PGOProfile/Inputs/indirectbr.proftext

+3-4
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
:ir
33
foo
44
# Func Hash:
5-
40197883220
5+
47485104005
66
# Num Counters:
77
4
88
# Counter Values:
9-
202
10-
88
9+
139
1110
20
1211
5
13-
12+
63

‎llvm/test/Transforms/PGOProfile/indirectbr.ll

+5-3
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,14 @@ return:
3737
; BRANCHPROB: Printing analysis 'Branch Probability Analysis' for function 'foo':
3838
; BRANCHPROB:---- Branch Probabilities ----
3939
; BRANCHPROB: edge entry -> if.then probability is 0x37c32b17 / 0x80000000 = 43.56%
40-
; BRANCHPROB: edge entry -> return probability is 0x483cd4e9 / 0x80000000 = 56.44%
40+
; BRANCHPROB: edge entry -> return.clone probability is 0x483cd4e9 / 0x80000000 = 56.44%
4141
; BRANCHPROB: edge if.then -> return probability is 0x5ba2e8ba / 0x80000000 = 71.59%
4242
; BRANCHPROB: edge if.then -> label2 probability is 0x1d1745d1 / 0x80000000 = 22.73%
4343
; BRANCHPROB: edge if.then -> label3 probability is 0x0745d174 / 0x80000000 = 5.68%
44-
; BRANCHPROB: edge label2 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
45-
; BRANCHPROB: edge label3 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
44+
; BRANCHPROB: edge label2 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
45+
; BRANCHPROB: edge label3 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
46+
; BRANCHPROB: edge return -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
47+
; BRANCHPROB: edge return.clone -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
4648

4749

4850

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
2+
3+
; Function Attrs: norecurse nounwind readnone uwtable
4+
define i32 @bar(i32 %v) local_unnamed_addr #0 {
5+
entry:
6+
%mul = shl nsw i32 %v, 1
7+
ret i32 %mul
8+
}
9+
10+
; Function Attrs: norecurse nounwind readonly uwtable
11+
define i32 @foo(i8* nocapture readonly %p) #1 {
12+
entry:
13+
%targets = alloca [256 x i8*], align 16
14+
%arrayidx1 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 93
15+
store i8* blockaddress(@foo, %if.end), i8** %arrayidx1, align 8
16+
br label %for.cond2
17+
18+
for.cond2: ; preds = %if.end, %for.cond2, %entry
19+
; CHECK: for.cond2: ; preds = %.split1
20+
%p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %for.cond2 ]
21+
%incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1
22+
%0 = load i8, i8* %p.addr.0, align 1
23+
%cond = icmp eq i8 %0, 93
24+
br i1 %cond, label %if.end.preheader, label %for.cond2
25+
26+
if.end.preheader: ; preds = %for.cond2
27+
br label %if.end
28+
29+
if.end: ; preds = %if.end.preheader, %if.end
30+
; CHECK: if.end: ; preds = %.split1
31+
%p.addr.1 = phi i8* [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %if.end.preheader ]
32+
%incdec.ptr5 = getelementptr inbounds i8, i8* %p.addr.1, i64 1
33+
%1 = load i8, i8* %p.addr.1, align 1
34+
%idxprom6 = zext i8 %1 to i64
35+
%arrayidx7 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 %idxprom6
36+
%2 = load i8*, i8** %arrayidx7, align 8
37+
indirectbr i8* %2, [label %for.cond2, label %if.end]
38+
; CHECK: indirectbr i8* %2, [label %for.cond2, label %if.end]
39+
}

0 commit comments

Comments
 (0)
Please sign in to comment.