Skip to content

Commit b74d3b3

Browse files
author
Cong Hou
committedOct 14, 2015
Update the branch weight metadata in JumpThreading pass.
Currently in JumpThreading pass, the branch weight metadata is not updated after CFG modification. Consider the jump threading on PredBB, BB, and SuccBB. After jump threading, the weight on BB->SuccBB should be adjusted as some of it is contributed by the edge PredBB->BB, which doesn't exist anymore. This patch tries to update the edge weight in metadata on BB->SuccBB by scaling it by 1 - Freq(PredBB->BB) / Freq(BB->SuccBB). This is the third attempt to submit this patch, while the first two led to failures in some FDO tests. After investigation, it is the edge weight normalization that caused those failures. In this patch the edge weight normalization is fixed so that there is no zero weight in the output and the sum of all weights can fit in 32-bit integer. Several unit tests are added. Differential revision: http://reviews.llvm.org/D10979 llvm-svn: 250345
1 parent aae328d commit b74d3b3

File tree

8 files changed

+285
-5
lines changed

8 files changed

+285
-5
lines changed
 

‎llvm/include/llvm/Analysis/BlockFrequencyInfo.h

+3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class BlockFrequencyInfo {
4545
/// floating points.
4646
BlockFrequency getBlockFreq(const BasicBlock *BB) const;
4747

48+
// Set the frequency of the given basic block.
49+
void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
50+
4851
/// calculate - compute block frequency info for the given function.
4952
void calculate(const Function &F, const BranchProbabilityInfo &BPI,
5053
const LoopInfo &LI);

‎llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h

+18
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,8 @@ class BlockFrequencyInfoImplBase {
477477

478478
BlockFrequency getBlockFreq(const BlockNode &Node) const;
479479

480+
void setBlockFreq(const BlockNode &Node, uint64_t Freq);
481+
480482
raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const;
481483
raw_ostream &printBlockFreq(raw_ostream &OS,
482484
const BlockFrequency &Freq) const;
@@ -913,6 +915,7 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
913915
BlockFrequency getBlockFreq(const BlockT *BB) const {
914916
return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB));
915917
}
918+
void setBlockFreq(const BlockT *BB, uint64_t Freq);
916919
Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
917920
return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB));
918921
}
@@ -965,6 +968,21 @@ void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
965968
finalizeMetrics();
966969
}
967970

971+
template <class BT>
972+
void BlockFrequencyInfoImpl<BT>::setBlockFreq(const BlockT *BB, uint64_t Freq) {
973+
if (Nodes.count(BB))
974+
BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq);
975+
else {
976+
// If BB is a newly added block after BFI is done, we need to create a new
977+
// BlockNode for it assigned with a new index. The index can be determined
978+
// by the size of Freqs.
979+
BlockNode NewNode(Freqs.size());
980+
Nodes[BB] = NewNode;
981+
Freqs.emplace_back();
982+
BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq);
983+
}
984+
}
985+
968986
template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() {
969987
const BlockT *Entry = &F->front();
970988
RPOT.reserve(F->size());

‎llvm/include/llvm/Support/BranchProbability.h

+51
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
#define LLVM_SUPPORT_BRANCHPROBABILITY_H
1616

1717
#include "llvm/Support/DataTypes.h"
18+
#include <algorithm>
1819
#include <cassert>
20+
#include <climits>
21+
#include <numeric>
1922

2023
namespace llvm {
2124

@@ -53,6 +56,11 @@ class BranchProbability {
5356
template <class ProbabilityList>
5457
static void normalizeProbabilities(ProbabilityList &Probs);
5558

59+
// Normalize a list of weights by scaling them down so that the sum of them
60+
// doesn't exceed UINT32_MAX.
61+
template <class WeightListIter>
62+
static void normalizeEdgeWeights(WeightListIter Begin, WeightListIter End);
63+
5664
uint32_t getNumerator() const { return N; }
5765
static uint32_t getDenominator() { return D; }
5866

@@ -135,6 +143,49 @@ void BranchProbability::normalizeProbabilities(ProbabilityList &Probs) {
135143
Prob.N = (Prob.N * uint64_t(D) + Sum / 2) / Sum;
136144
}
137145

146+
template <class WeightListIter>
147+
void BranchProbability::normalizeEdgeWeights(WeightListIter Begin,
148+
WeightListIter End) {
149+
// First we compute the sum with 64-bits of precision.
150+
uint64_t Sum = std::accumulate(Begin, End, uint64_t(0));
151+
152+
if (Sum > UINT32_MAX) {
153+
// Compute the scale necessary to cause the weights to fit, and re-sum with
154+
// that scale applied.
155+
assert(Sum / UINT32_MAX < UINT32_MAX &&
156+
"The sum of weights exceeds UINT32_MAX^2!");
157+
uint32_t Scale = Sum / UINT32_MAX + 1;
158+
for (auto I = Begin; I != End; ++I)
159+
*I /= Scale;
160+
Sum = std::accumulate(Begin, End, uint64_t(0));
161+
}
162+
163+
// Eliminate zero weights.
164+
auto ZeroWeightNum = std::count(Begin, End, 0u);
165+
if (ZeroWeightNum > 0) {
166+
// If all weights are zeros, replace them by 1.
167+
if (Sum == 0)
168+
std::fill(Begin, End, 1u);
169+
else {
170+
// We are converting zeros into ones, and here we need to make sure that
171+
// after this the sum won't exceed UINT32_MAX.
172+
if (Sum + ZeroWeightNum > UINT32_MAX) {
173+
for (auto I = Begin; I != End; ++I)
174+
*I /= 2;
175+
ZeroWeightNum = std::count(Begin, End, 0u);
176+
Sum = std::accumulate(Begin, End, uint64_t(0));
177+
}
178+
// Scale up non-zero weights and turn zero weights into ones.
179+
uint64_t ScalingFactor = (UINT32_MAX - ZeroWeightNum) / Sum;
180+
assert(ScalingFactor >= 1);
181+
if (ScalingFactor > 1)
182+
for (auto I = Begin; I != End; ++I)
183+
*I *= ScalingFactor;
184+
std::replace(Begin, End, 0u, 1u);
185+
}
186+
}
187+
}
188+
138189
}
139190

140191
#endif

‎llvm/lib/Analysis/BlockFrequencyInfo.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
129129
return BFI ? BFI->getBlockFreq(BB) : 0;
130130
}
131131

132+
void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB,
133+
uint64_t Freq) {
134+
assert(BFI && "Expected analysis to be available");
135+
BFI->setBlockFreq(BB, Freq);
136+
}
137+
132138
/// Pop up a ghostview window with the current block frequency propagation
133139
/// rendered using dot.
134140
void BlockFrequencyInfo::view() const {

‎llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
530530
return Freqs[Node.Index].Scaled;
531531
}
532532

533+
void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
534+
uint64_t Freq) {
535+
assert(Node.isValid() && "Expected valid node");
536+
assert(Node.Index < Freqs.size() && "Expected legal index");
537+
Freqs[Node.Index].Integer = Freq;
538+
}
539+
533540
std::string
534541
BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
535542
return std::string();

‎llvm/lib/Transforms/Scalar/JumpThreading.cpp

+116-5
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,19 @@
2020
#include "llvm/ADT/Statistic.h"
2121
#include "llvm/Analysis/GlobalsModRef.h"
2222
#include "llvm/Analysis/CFG.h"
23+
#include "llvm/Analysis/BlockFrequencyInfo.h"
24+
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
25+
#include "llvm/Analysis/BranchProbabilityInfo.h"
2326
#include "llvm/Analysis/ConstantFolding.h"
2427
#include "llvm/Analysis/InstructionSimplify.h"
2528
#include "llvm/Analysis/LazyValueInfo.h"
2629
#include "llvm/Analysis/Loads.h"
30+
#include "llvm/Analysis/LoopInfo.h"
2731
#include "llvm/Analysis/TargetLibraryInfo.h"
2832
#include "llvm/IR/DataLayout.h"
2933
#include "llvm/IR/IntrinsicInst.h"
3034
#include "llvm/IR/LLVMContext.h"
35+
#include "llvm/IR/MDBuilder.h"
3136
#include "llvm/IR/Metadata.h"
3237
#include "llvm/IR/ValueHandle.h"
3338
#include "llvm/Pass.h"
@@ -37,6 +42,8 @@
3742
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
3843
#include "llvm/Transforms/Utils/Local.h"
3944
#include "llvm/Transforms/Utils/SSAUpdater.h"
45+
#include <algorithm>
46+
#include <memory>
4047
using namespace llvm;
4148

4249
#define DEBUG_TYPE "jump-threading"
@@ -81,6 +88,9 @@ namespace {
8188
class JumpThreading : public FunctionPass {
8289
TargetLibraryInfo *TLI;
8390
LazyValueInfo *LVI;
91+
std::unique_ptr<BlockFrequencyInfo> BFI;
92+
std::unique_ptr<BranchProbabilityInfo> BPI;
93+
bool HasProfileData;
8494
#ifdef NDEBUG
8595
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
8696
#else
@@ -119,6 +129,11 @@ namespace {
119129
AU.addRequired<TargetLibraryInfoWrapperPass>();
120130
}
121131

132+
void releaseMemory() override {
133+
BFI.reset();
134+
BPI.reset();
135+
}
136+
122137
void FindLoopHeaders(Function &F);
123138
bool ProcessBlock(BasicBlock *BB);
124139
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
@@ -139,6 +154,12 @@ namespace {
139154

140155
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
141156
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
157+
158+
private:
159+
BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
160+
const char *Suffix);
161+
void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
162+
BasicBlock *NewBB, BasicBlock *SuccBB);
142163
};
143164
}
144165

@@ -162,6 +183,16 @@ bool JumpThreading::runOnFunction(Function &F) {
162183
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
163184
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
164185
LVI = &getAnalysis<LazyValueInfo>();
186+
BFI.reset();
187+
BPI.reset();
188+
// When profile data is available, we need to update edge weights after
189+
// successful jump threading, which requires both BPI and BFI being available.
190+
HasProfileData = F.getEntryCount().hasValue();
191+
if (HasProfileData) {
192+
LoopInfo LI{DominatorTree(F)};
193+
BPI.reset(new BranchProbabilityInfo(F, LI));
194+
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
195+
}
165196

166197
// Remove unreachable blocks from function as they may result in infinite
167198
// loop. We do threading if we found something profitable. Jump threading a
@@ -977,8 +1008,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
9771008
}
9781009

9791010
// Split them out to their own block.
980-
UnavailablePred =
981-
SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
1011+
UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
9821012
}
9831013

9841014
// If the value isn't available in all predecessors, then there will be
@@ -1403,7 +1433,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
14031433
else {
14041434
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
14051435
<< " common predecessors.\n");
1406-
PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
1436+
PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
14071437
}
14081438

14091439
// And finally, do it!
@@ -1424,6 +1454,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
14241454
BB->getParent(), BB);
14251455
NewBB->moveAfter(PredBB);
14261456

1457+
// Set the block frequency of NewBB.
1458+
if (HasProfileData) {
1459+
auto NewBBFreq =
1460+
BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
1461+
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
1462+
}
1463+
14271464
BasicBlock::iterator BI = BB->begin();
14281465
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
14291466
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -1447,7 +1484,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
14471484

14481485
// We didn't copy the terminator from BB over to NewBB, because there is now
14491486
// an unconditional jump to SuccBB. Insert the unconditional jump.
1450-
BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
1487+
BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
14511488
NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
14521489

14531490
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
@@ -1508,11 +1545,85 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
15081545
// frequently happens because of phi translation.
15091546
SimplifyInstructionsInBlock(NewBB, TLI);
15101547

1548+
// Update the edge weight from BB to SuccBB, which should be less than before.
1549+
UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
1550+
15111551
// Threaded an edge!
15121552
++NumThreads;
15131553
return true;
15141554
}
15151555

1556+
/// Create a new basic block that will be the predecessor of BB and successor of
1557+
/// all blocks in Preds. When profile data is availble, update the frequency of
1558+
/// this new block.
1559+
BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
1560+
ArrayRef<BasicBlock *> Preds,
1561+
const char *Suffix) {
1562+
// Collect the frequencies of all predecessors of BB, which will be used to
1563+
// update the edge weight on BB->SuccBB.
1564+
BlockFrequency PredBBFreq(0);
1565+
if (HasProfileData)
1566+
for (auto Pred : Preds)
1567+
PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
1568+
1569+
BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
1570+
1571+
// Set the block frequency of the newly created PredBB, which is the sum of
1572+
// frequencies of Preds.
1573+
if (HasProfileData)
1574+
BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
1575+
return PredBB;
1576+
}
1577+
1578+
/// Update the block frequency of BB and branch weight and the metadata on the
1579+
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
1580+
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
1581+
void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
1582+
BasicBlock *BB,
1583+
BasicBlock *NewBB,
1584+
BasicBlock *SuccBB) {
1585+
if (!HasProfileData)
1586+
return;
1587+
1588+
assert(BFI && BPI && "BFI & BPI should have been created here");
1589+
1590+
// As the edge from PredBB to BB is deleted, we have to update the block
1591+
// frequency of BB.
1592+
auto BBOrigFreq = BFI->getBlockFreq(BB);
1593+
auto NewBBFreq = BFI->getBlockFreq(NewBB);
1594+
auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
1595+
auto BBNewFreq = BBOrigFreq - NewBBFreq;
1596+
BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
1597+
1598+
// Collect updated outgoing edges' frequencies from BB and use them to update
1599+
// edge weights.
1600+
SmallVector<uint64_t, 4> BBSuccFreq;
1601+
for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
1602+
auto SuccFreq = (*I == SuccBB)
1603+
? BB2SuccBBFreq - NewBBFreq
1604+
: BBOrigFreq * BPI->getEdgeProbability(BB, *I);
1605+
BBSuccFreq.push_back(SuccFreq.getFrequency());
1606+
}
1607+
1608+
// Normalize edge weights in Weights64 so that the sum of them can fit in
1609+
BranchProbability::normalizeEdgeWeights(BBSuccFreq.begin(), BBSuccFreq.end());
1610+
1611+
SmallVector<uint32_t, 4> Weights;
1612+
for (auto Freq : BBSuccFreq)
1613+
Weights.push_back(static_cast<uint32_t>(Freq));
1614+
1615+
// Update edge weights in BPI.
1616+
for (int I = 0, E = Weights.size(); I < E; I++)
1617+
BPI->setEdgeWeight(BB, I, Weights[I]);
1618+
1619+
if (Weights.size() >= 2) {
1620+
auto TI = BB->getTerminator();
1621+
TI->setMetadata(
1622+
LLVMContext::MD_prof,
1623+
MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
1624+
}
1625+
}
1626+
15161627
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
15171628
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
15181629
/// If we can duplicate the contents of BB up into PredBB do so now, this
@@ -1546,7 +1657,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
15461657
else {
15471658
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
15481659
<< " common predecessors.\n");
1549-
PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
1660+
PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
15501661
}
15511662

15521663
// Okay, we decided to do this! Clone all the instructions in BB onto the end

0 commit comments

Comments
 (0)
Please sign in to comment.