20
20
#include " llvm/ADT/Statistic.h"
21
21
#include " llvm/Analysis/GlobalsModRef.h"
22
22
#include " llvm/Analysis/CFG.h"
23
+ #include " llvm/Analysis/BlockFrequencyInfo.h"
24
+ #include " llvm/Analysis/BlockFrequencyInfoImpl.h"
25
+ #include " llvm/Analysis/BranchProbabilityInfo.h"
23
26
#include " llvm/Analysis/ConstantFolding.h"
24
27
#include " llvm/Analysis/InstructionSimplify.h"
25
28
#include " llvm/Analysis/LazyValueInfo.h"
26
29
#include " llvm/Analysis/Loads.h"
30
+ #include " llvm/Analysis/LoopInfo.h"
27
31
#include " llvm/Analysis/TargetLibraryInfo.h"
28
32
#include " llvm/IR/DataLayout.h"
29
33
#include " llvm/IR/IntrinsicInst.h"
30
34
#include " llvm/IR/LLVMContext.h"
35
+ #include " llvm/IR/MDBuilder.h"
31
36
#include " llvm/IR/Metadata.h"
32
37
#include " llvm/IR/ValueHandle.h"
33
38
#include " llvm/Pass.h"
37
42
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
38
43
#include " llvm/Transforms/Utils/Local.h"
39
44
#include " llvm/Transforms/Utils/SSAUpdater.h"
45
+ #include < algorithm>
46
+ #include < memory>
40
47
using namespace llvm ;
41
48
42
49
#define DEBUG_TYPE " jump-threading"
@@ -81,6 +88,9 @@ namespace {
81
88
class JumpThreading : public FunctionPass {
82
89
TargetLibraryInfo *TLI;
83
90
LazyValueInfo *LVI;
91
+ std::unique_ptr<BlockFrequencyInfo> BFI;
92
+ std::unique_ptr<BranchProbabilityInfo> BPI;
93
+ bool HasProfileData;
84
94
#ifdef NDEBUG
85
95
SmallPtrSet<BasicBlock*, 16 > LoopHeaders;
86
96
#else
@@ -119,6 +129,11 @@ namespace {
119
129
AU.addRequired <TargetLibraryInfoWrapperPass>();
120
130
}
121
131
132
+ void releaseMemory () override {
133
+ BFI.reset ();
134
+ BPI.reset ();
135
+ }
136
+
122
137
void FindLoopHeaders (Function &F);
123
138
bool ProcessBlock (BasicBlock *BB);
124
139
bool ThreadEdge (BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
@@ -139,6 +154,12 @@ namespace {
139
154
140
155
bool SimplifyPartiallyRedundantLoad (LoadInst *LI);
141
156
bool TryToUnfoldSelect (CmpInst *CondCmp, BasicBlock *BB);
157
+
158
+ private:
159
+ BasicBlock *SplitBlockPreds (BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
160
+ const char *Suffix);
161
+ void UpdateBlockFreqAndEdgeWeight (BasicBlock *PredBB, BasicBlock *BB,
162
+ BasicBlock *NewBB, BasicBlock *SuccBB);
142
163
};
143
164
}
144
165
@@ -162,6 +183,16 @@ bool JumpThreading::runOnFunction(Function &F) {
162
183
DEBUG (dbgs () << " Jump threading on function '" << F.getName () << " '\n " );
163
184
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI ();
164
185
LVI = &getAnalysis<LazyValueInfo>();
186
+ BFI.reset ();
187
+ BPI.reset ();
188
+ // When profile data is available, we need to update edge weights after
189
+ // successful jump threading, which requires both BPI and BFI being available.
190
+ HasProfileData = F.getEntryCount ().hasValue ();
191
+ if (HasProfileData) {
192
+ LoopInfo LI{DominatorTree (F)};
193
+ BPI.reset (new BranchProbabilityInfo (F, LI));
194
+ BFI.reset (new BlockFrequencyInfo (F, *BPI, LI));
195
+ }
165
196
166
197
// Remove unreachable blocks from function as they may result in infinite
167
198
// loop. We do threading if we found something profitable. Jump threading a
@@ -977,8 +1008,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
977
1008
}
978
1009
979
1010
// Split them out to their own block.
980
- UnavailablePred =
981
- SplitBlockPredecessors (LoadBB, PredsToSplit, " thread-pre-split" );
1011
+ UnavailablePred = SplitBlockPreds (LoadBB, PredsToSplit, " thread-pre-split" );
982
1012
}
983
1013
984
1014
// If the value isn't available in all predecessors, then there will be
@@ -1403,7 +1433,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
1403
1433
else {
1404
1434
DEBUG (dbgs () << " Factoring out " << PredBBs.size ()
1405
1435
<< " common predecessors.\n " );
1406
- PredBB = SplitBlockPredecessors (BB, PredBBs, " .thr_comm" );
1436
+ PredBB = SplitBlockPreds (BB, PredBBs, " .thr_comm" );
1407
1437
}
1408
1438
1409
1439
// And finally, do it!
@@ -1424,6 +1454,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
1424
1454
BB->getParent (), BB);
1425
1455
NewBB->moveAfter (PredBB);
1426
1456
1457
+ // Set the block frequency of NewBB.
1458
+ if (HasProfileData) {
1459
+ auto NewBBFreq =
1460
+ BFI->getBlockFreq (PredBB) * BPI->getEdgeProbability (PredBB, BB);
1461
+ BFI->setBlockFreq (NewBB, NewBBFreq.getFrequency ());
1462
+ }
1463
+
1427
1464
BasicBlock::iterator BI = BB->begin ();
1428
1465
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
1429
1466
ValueMapping[PN] = PN->getIncomingValueForBlock (PredBB);
@@ -1447,7 +1484,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
1447
1484
1448
1485
// We didn't copy the terminator from BB over to NewBB, because there is now
1449
1486
// an unconditional jump to SuccBB. Insert the unconditional jump.
1450
- BranchInst *NewBI =BranchInst::Create (SuccBB, NewBB);
1487
+ BranchInst *NewBI = BranchInst::Create (SuccBB, NewBB);
1451
1488
NewBI->setDebugLoc (BB->getTerminator ()->getDebugLoc ());
1452
1489
1453
1490
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
@@ -1508,11 +1545,85 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
1508
1545
// frequently happens because of phi translation.
1509
1546
SimplifyInstructionsInBlock (NewBB, TLI);
1510
1547
1548
+ // Update the edge weight from BB to SuccBB, which should be less than before.
1549
+ UpdateBlockFreqAndEdgeWeight (PredBB, BB, NewBB, SuccBB);
1550
+
1511
1551
// Threaded an edge!
1512
1552
++NumThreads;
1513
1553
return true ;
1514
1554
}
1515
1555
1556
+ // / Create a new basic block that will be the predecessor of BB and successor of
1557
+ // / all blocks in Preds. When profile data is availble, update the frequency of
1558
+ // / this new block.
1559
+ BasicBlock *JumpThreading::SplitBlockPreds (BasicBlock *BB,
1560
+ ArrayRef<BasicBlock *> Preds,
1561
+ const char *Suffix) {
1562
+ // Collect the frequencies of all predecessors of BB, which will be used to
1563
+ // update the edge weight on BB->SuccBB.
1564
+ BlockFrequency PredBBFreq (0 );
1565
+ if (HasProfileData)
1566
+ for (auto Pred : Preds)
1567
+ PredBBFreq += BFI->getBlockFreq (Pred) * BPI->getEdgeProbability (Pred, BB);
1568
+
1569
+ BasicBlock *PredBB = SplitBlockPredecessors (BB, Preds, Suffix);
1570
+
1571
+ // Set the block frequency of the newly created PredBB, which is the sum of
1572
+ // frequencies of Preds.
1573
+ if (HasProfileData)
1574
+ BFI->setBlockFreq (PredBB, PredBBFreq.getFrequency ());
1575
+ return PredBB;
1576
+ }
1577
+
1578
+ // / Update the block frequency of BB and branch weight and the metadata on the
1579
+ // / edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
1580
+ // / Freq(PredBB->BB) / Freq(BB->SuccBB).
1581
+ void JumpThreading::UpdateBlockFreqAndEdgeWeight (BasicBlock *PredBB,
1582
+ BasicBlock *BB,
1583
+ BasicBlock *NewBB,
1584
+ BasicBlock *SuccBB) {
1585
+ if (!HasProfileData)
1586
+ return ;
1587
+
1588
+ assert (BFI && BPI && " BFI & BPI should have been created here" );
1589
+
1590
+ // As the edge from PredBB to BB is deleted, we have to update the block
1591
+ // frequency of BB.
1592
+ auto BBOrigFreq = BFI->getBlockFreq (BB);
1593
+ auto NewBBFreq = BFI->getBlockFreq (NewBB);
1594
+ auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability (BB, SuccBB);
1595
+ auto BBNewFreq = BBOrigFreq - NewBBFreq;
1596
+ BFI->setBlockFreq (BB, BBNewFreq.getFrequency ());
1597
+
1598
+ // Collect updated outgoing edges' frequencies from BB and use them to update
1599
+ // edge weights.
1600
+ SmallVector<uint64_t , 4 > BBSuccFreq;
1601
+ for (auto I = succ_begin (BB), E = succ_end (BB); I != E; ++I) {
1602
+ auto SuccFreq = (*I == SuccBB)
1603
+ ? BB2SuccBBFreq - NewBBFreq
1604
+ : BBOrigFreq * BPI->getEdgeProbability (BB, *I);
1605
+ BBSuccFreq.push_back (SuccFreq.getFrequency ());
1606
+ }
1607
+
1608
+ // Normalize edge weights in Weights64 so that the sum of them can fit in
1609
+ BranchProbability::normalizeEdgeWeights (BBSuccFreq.begin (), BBSuccFreq.end ());
1610
+
1611
+ SmallVector<uint32_t , 4 > Weights;
1612
+ for (auto Freq : BBSuccFreq)
1613
+ Weights.push_back (static_cast <uint32_t >(Freq));
1614
+
1615
+ // Update edge weights in BPI.
1616
+ for (int I = 0 , E = Weights.size (); I < E; I++)
1617
+ BPI->setEdgeWeight (BB, I, Weights[I]);
1618
+
1619
+ if (Weights.size () >= 2 ) {
1620
+ auto TI = BB->getTerminator ();
1621
+ TI->setMetadata (
1622
+ LLVMContext::MD_prof,
1623
+ MDBuilder (TI->getParent ()->getContext ()).createBranchWeights (Weights));
1624
+ }
1625
+ }
1626
+
1516
1627
// / DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
1517
1628
// / to BB which contains an i1 PHI node and a conditional branch on that PHI.
1518
1629
// / If we can duplicate the contents of BB up into PredBB do so now, this
@@ -1546,7 +1657,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
1546
1657
else {
1547
1658
DEBUG (dbgs () << " Factoring out " << PredBBs.size ()
1548
1659
<< " common predecessors.\n " );
1549
- PredBB = SplitBlockPredecessors (BB, PredBBs, " .thr_comm" );
1660
+ PredBB = SplitBlockPreds (BB, PredBBs, " .thr_comm" );
1550
1661
}
1551
1662
1552
1663
// Okay, we decided to do this! Clone all the instructions in BB onto the end
0 commit comments