Index: include/llvm/CodeGen/MachineBasicBlock.h =================================================================== --- include/llvm/CodeGen/MachineBasicBlock.h +++ include/llvm/CodeGen/MachineBasicBlock.h @@ -65,6 +65,10 @@ Instructions Insts; const BasicBlock *BB; int Number; + + /// A flag tracking whether the weights of all successors are normalized. + bool AreSuccWeightsNormalized; + MachineFunction *xParent; /// Keep track of the predecessor / successor basic blocks. @@ -129,6 +133,9 @@ const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } + /// Return whether all weights of successors are normalized. + bool areSuccWeightsNormalized() const { return AreSuccWeightsNormalized; } + /// MachineBasicBlock iterator that automatically skips over MIs that are /// inside bundles (i.e. walk top level MIs only). template @@ -384,6 +391,12 @@ /// Set successor weight of a given iterator. void setSuccWeight(succ_iterator I, uint32_t weight); + /// Normalize all succesor weights so that the sum of them does not exceed + /// UINT32_MAX. Return true if the weights are modified and false otherwise. + /// Note that weights that are modified after calling this function are not + /// guaranteed to be normalized. + bool normalizeSuccWeights(); + /// Remove successor from the successors list of this MachineBasicBlock. The /// Predecessors list of succ is automatically updated. void removeSuccessor(MachineBasicBlock *succ); Index: include/llvm/CodeGen/MachineBranchProbabilityInfo.h =================================================================== --- include/llvm/CodeGen/MachineBranchProbabilityInfo.h +++ include/llvm/CodeGen/MachineBranchProbabilityInfo.h @@ -60,6 +60,10 @@ // adjustment. Any edge weights used with the sum should be divided by Scale. uint32_t getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const; + // Get sum of the block successors' weights, and force normalizing the + // successors' weights of MBB so that their sum fit within 32-bits. + uint32_t getSumForBlock(MachineBasicBlock *MBB) const; + // A 'Hot' edge is an edge which probability is >= 80%. bool isEdgeHot(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const; @@ -83,8 +87,34 @@ raw_ostream &printEdgeProbability(raw_ostream &OS, const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const; + + // Normalize a list of weights by scaling them down so that the sum of them + // doesn't exceed UINT32_MAX. Return the scale. + template + static uint32_t normalizeEdgeWeights(WeightList &Weights); }; +template +uint32_t +MachineBranchProbabilityInfo::normalizeEdgeWeights(WeightList &Weights) { + assert(Weights.size() < UINT32_MAX && "Too many weights in the list!"); + // First we compute the sum with 64-bits of precision. + uint64_t Sum = std::accumulate(Weights.begin(), Weights.end(), uint64_t(0)); + + // If the computed sum fits in 32-bits, we're done. + if (Sum <= UINT32_MAX) + return 1; + + // Otherwise, compute the scale necessary to cause the weights to fit, and + // re-sum with that scale applied. + assert((Sum / UINT32_MAX) < UINT32_MAX && + "The sum of weights exceeds UINT32_MAX^2!"); + uint32_t Scale = (Sum / UINT32_MAX) + 1; + for (auto &W : Weights) + W /= Scale; + return Scale; +} + } Index: lib/Analysis/BranchProbabilityInfo.cpp =================================================================== --- lib/Analysis/BranchProbabilityInfo.cpp +++ lib/Analysis/BranchProbabilityInfo.cpp @@ -202,7 +202,9 @@ assert(Weight->getValue().getActiveBits() <= 32 && "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); - WeightSum += Weights.back(); + // We will turn zero weights into one so here we need to take this into + // account when summing up weights. + WeightSum += std::max(Weights.back(), 1); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); @@ -213,7 +215,8 @@ WeightSum = 0; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - uint32_t W = Weights[i] / ScalingFactor; + // Turn zero weights into one. + uint32_t W = std::max(Weights[i] / ScalingFactor, 1); WeightSum += W; setEdgeWeight(BB, i, W); } Index: lib/CodeGen/IfConversion.cpp =================================================================== --- lib/CodeGen/IfConversion.cpp +++ lib/CodeGen/IfConversion.cpp @@ -1232,15 +1232,17 @@ bool HasEarlyExit = CvtBBI->FalseBB != nullptr; uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; - uint32_t WeightScale = 0; if (HasEarlyExit) { // Get weights before modifying CvtBBI->BB and BBI.BB. + // Explictly normalize the weights of all edges from CvtBBI->BB so that we + // are aware that the edge weights obtained below are normalized. + CvtBBI->BB->normalizeSuccWeights(); CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB); CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB); BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB); BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB); - SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale); + SumWeight = MBPI->getSumForBlock(CvtBBI->BB); } if (CvtBBI->BB->pred_size() > 1) { @@ -1277,8 +1279,8 @@ // New_Weight(BBI.BB, CvtBBI->FalseBB) = // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB) - uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale; - uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale; + uint64_t NewNext = BBNext * SumWeight + BBCvt * CvtNext; + uint64_t NewFalse = BBCvt * CvtFalse; // We need to scale down all weights of BBI.BB to fit uint32_t. // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to // the next block. Index: lib/CodeGen/MachineBasicBlock.cpp =================================================================== --- lib/CodeGen/MachineBasicBlock.cpp +++ lib/CodeGen/MachineBasicBlock.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -39,8 +40,9 @@ #define DEBUG_TYPE "codegen" MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) - : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), - AddressTaken(false), CachedMCSymbol(nullptr) { + : BB(bb), Number(-1), AreSuccWeightsNormalized(false), xParent(&mf), + Alignment(0), IsLandingPad(false), AddressTaken(false), + CachedMCSymbol(nullptr) { Insts.Parent = this; } @@ -481,8 +483,10 @@ if (weight != 0 && Weights.empty()) Weights.resize(Successors.size()); - if (weight != 0 || !Weights.empty()) + if (weight != 0 || !Weights.empty()) { Weights.push_back(weight); + AreSuccWeightsNormalized = false; + } Successors.push_back(succ); succ->addPredecessor(this); @@ -1096,7 +1100,25 @@ void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) { if (Weights.empty()) return; - *getWeightIterator(I) = weight; + auto WeightIter = getWeightIterator(I); + uint32_t OldWeight = *WeightIter; + *WeightIter = weight; + if (weight > OldWeight) + AreSuccWeightsNormalized = false; +} + +/// Normalize all succesor weights so that the sum of them does not exceed +/// UINT32_MAX. Return true if the weights are modified and false otherwise. +/// Note that weights that are modified after calling this function are not +/// guaranteed to be normalized. +bool MachineBasicBlock::normalizeSuccWeights() { + if (!AreSuccWeightsNormalized) { + uint32_t Scale = + MachineBranchProbabilityInfo::normalizeEdgeWeights(Weights); + AreSuccWeightsNormalized = true; + return Scale != 1; + } + return false; } /// getWeightIterator - Return wight iterator corresonding to the I successor Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -361,8 +361,7 @@ // improve the MBPI interface to efficiently support query patterns such as // this. uint32_t BestWeight = 0; - uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); + uint32_t SumWeight = MBPI->getSumForBlock(BB); DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); for (MachineBasicBlock *Succ : BB->successors()) { if (BlockFilter && !BlockFilter->count(Succ)) @@ -378,7 +377,7 @@ } uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); - BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + BranchProbability SuccProb(SuccWeight, SumWeight); // If we outline optional branches, look whether Succ is unavoidable, i.e. // dominates all terminators of the MachineFunction. If it does, other @@ -675,8 +674,7 @@ // FIXME: Due to the performance of the probability and weight routines in // the MBPI analysis, we use the internal weights and manually compute the // probabilities to avoid quadratic behavior. - uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale); + uint32_t SumWeight = MBPI->getSumForBlock(MBB); for (MachineBasicBlock *Succ : MBB->successors()) { if (Succ->isLandingPad()) continue; @@ -705,7 +703,7 @@ BlocksExitingToOuterLoop.insert(MBB); } - BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + BranchProbability SuccProb(SuccWeight, SumWeight); BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; Index: lib/CodeGen/MachineBranchProbabilityInfo.cpp =================================================================== --- lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -28,36 +28,35 @@ void MachineBranchProbabilityInfo::anchor() { } -uint32_t MachineBranchProbabilityInfo:: -getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { - // First we compute the sum with 64-bits of precision, ensuring that cannot - // overflow by bounding the number of weights considered. Hopefully no one - // actually needs 2^32 successors. - assert(MBB->succ_size() < UINT32_MAX); - uint64_t Sum = 0; - Scale = 1; +uint32_t +MachineBranchProbabilityInfo::getSumForBlock(MachineBasicBlock *MBB) const { + // Normalize the weights of MBB's all successors so that the sum is guaranteed + // to be no greater than UINT32_MAX. + MBB->normalizeSuccWeights(); + + SmallVector Weights; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - Sum += Weight; - } + E = MBB->succ_end(); + I != E; ++I) + Weights.push_back(getEdgeWeight(MBB, I)); - // If the computed sum fits in 32-bits, we're done. - if (Sum <= UINT32_MAX) - return Sum; + return std::accumulate(Weights.begin(), Weights.end(), 0u); +} - // Otherwise, compute the scale necessary to cause the weights to fit, and - // re-sum with that scale applied. - assert((Sum / UINT32_MAX) < UINT32_MAX); - Scale = (Sum / UINT32_MAX) + 1; - Sum = 0; +uint32_t +MachineBranchProbabilityInfo::getSumForBlock(const MachineBasicBlock *MBB, + uint32_t &Scale) const { + SmallVector Weights; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - Sum += Weight / Scale; - } - assert(Sum <= UINT32_MAX); - return Sum; + E = MBB->succ_end(); + I != E; ++I) + Weights.push_back(getEdgeWeight(MBB, I)); + + if (MBB->areSuccWeightsNormalized()) + Scale = 1; + else + Scale = MachineBranchProbabilityInfo::normalizeEdgeWeights(Weights); + return std::accumulate(Weights.begin(), Weights.end(), 0u); } uint32_t MachineBranchProbabilityInfo:: Index: test/CodeGen/X86/pr24377.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr24377.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=x86_64-linux < %s + +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @__kmpc_atomic_cmplx8_mul() #0 { +entry: + br i1 undef, label %if.then.i.43, label %if.end.i.47 + +if.then.i.43: ; preds = %entry + unreachable + +if.end.i.47: ; preds = %entry + br i1 undef, label %if.then.4.i.48, label %_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49 + +if.then.4.i.48: ; preds = %if.end.i.47 + unreachable + +_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49: ; preds = %if.end.i.47 + br i1 undef, label %complex_mul_libcall.23, label %complex_mul_cont.25, !prof !1 + +complex_mul_libcall.23: ; preds = %_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49 + tail call void @__muldc3() #1 + br label %complex_mul_cont.25 + +complex_mul_cont.25: ; preds = %complex_mul_libcall.23, %_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49 + br i1 undef, label %if.then.i.54, label %return + +if.then.i.54: ; preds = %complex_mul_cont.25 + unreachable + +return: ; preds = %complex_mul_cont.25 + ret void +} + +declare void @__muldc3() + +attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.8.0 "} +!1 = !{!"branch_weights", i32 0, i32 -1} Index: test/Transforms/SampleProfile/branch.ll =================================================================== --- test/Transforms/SampleProfile/branch.ll +++ test/Transforms/SampleProfile/branch.ll @@ -36,8 +36,8 @@ tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !14, metadata !DIExpression()), !dbg !27 %cmp = icmp slt i32 %argc, 2, !dbg !28 br i1 %cmp, label %return, label %if.end, !dbg !28 -; CHECK: edge entry -> return probability is 0 / 1 = 0% -; CHECK: edge entry -> if.end probability is 1 / 1 = 100% +; CHECK: edge entry -> return probability is 1 / 2 = 50% +; CHECK: edge entry -> if.end probability is 1 / 2 = 50% if.end: ; preds = %entry %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !30 @@ -46,8 +46,8 @@ tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !17, metadata !DIExpression()), !dbg !30 %cmp1 = icmp sgt i32 %call, 100, !dbg !35 br i1 %cmp1, label %for.body, label %if.end6, !dbg !35 -; CHECK: edge if.end -> for.body probability is 0 / 1 = 0% -; CHECK: edge if.end -> if.end6 probability is 1 / 1 = 100% +; CHECK: edge if.end -> for.body probability is 1 / 2 = 50% +; CHECK: edge if.end -> if.end6 probability is 1 / 2 = 50% for.body: ; preds = %if.end, %for.body %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ] @@ -65,8 +65,8 @@ tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !21, metadata !DIExpression()), !dbg !38 %exitcond = icmp eq i32 %inc, %call, !dbg !38 br i1 %exitcond, label %if.end6, label %for.body, !dbg !38 -; CHECK: edge for.body -> if.end6 probability is 0 / 10226 = 0% -; CHECK: edge for.body -> for.body probability is 10226 / 10226 = 100% [HOT edge] +; CHECK: edge for.body -> if.end6 probability is 1 / 10227 = 0.00977804% +; CHECK: edge for.body -> for.body probability is 10226 / 10227 = 99.9902% [HOT edge] if.end6: ; preds = %for.body, %if.end %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ] Index: test/Transforms/SampleProfile/calls.ll =================================================================== --- test/Transforms/SampleProfile/calls.ll +++ test/Transforms/SampleProfile/calls.ll @@ -52,8 +52,8 @@ store i32 %inc, i32* %i, align 4, !dbg !14 %cmp = icmp slt i32 %0, 400000000, !dbg !14 br i1 %cmp, label %while.body, label %while.end, !dbg !14 -; CHECK: edge while.cond -> while.body probability is 5391 / 5391 = 100% [HOT edge] -; CHECK: edge while.cond -> while.end probability is 0 / 5391 = 0% +; CHECK: edge while.cond -> while.body probability is 5391 / 5392 = 99.9815% [HOT edge] +; CHECK: edge while.cond -> while.end probability is 1 / 5392 = 0.018546% while.body: ; preds = %while.cond %1 = load i32, i32* %i, align 4, !dbg !16 @@ -63,8 +63,8 @@ ; both branches out of while.body had the same weight. In reality, ; the edge while.body->if.then is taken most of the time. ; -; CHECK: edge while.body -> if.then probability is 5752 / 5752 = 100% [HOT edge] -; CHECK: edge while.body -> if.else probability is 0 / 5752 = 0% +; CHECK: edge while.body -> if.then probability is 5752 / 5753 = 99.9826% [HOT edge] +; CHECK: edge while.body -> if.else probability is 1 / 5753 = 0.0173822% if.then: ; preds = %while.body Index: test/Transforms/SampleProfile/propagate.ll =================================================================== --- test/Transforms/SampleProfile/propagate.ll +++ test/Transforms/SampleProfile/propagate.ll @@ -73,8 +73,8 @@ %5 = load i64, i64* %N.addr, align 8, !dbg !15 %cmp1 = icmp slt i64 %4, %5, !dbg !15 br i1 %cmp1, label %for.body, label %for.end18, !dbg !15 -; CHECK: edge for.cond -> for.body probability is 10 / 10 = 100% [HOT edge] -; CHECK: edge for.cond -> for.end18 probability is 0 / 10 = 0% +; CHECK: edge for.cond -> for.body probability is 10 / 11 = 90.9091% [HOT edge] +; CHECK: edge for.cond -> for.end18 probability is 1 / 11 = 9.09091% for.body: ; preds = %for.cond %6 = load i64, i64* %i, align 8, !dbg !18 @@ -119,8 +119,8 @@ %14 = load i64, i64* %i, align 8, !dbg !28 %cmp10 = icmp slt i64 %conv9, %14, !dbg !28 br i1 %cmp10, label %for.body11, label %for.end, !dbg !28 -; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16191 = 100% [HOT edge] -; CHECK: edge for.cond8 -> for.end probability is 0 / 16191 = 0% +; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16192 = 99.9938% [HOT edge] +; CHECK: edge for.cond8 -> for.end probability is 1 / 16192 = 0.00617589% for.body11: ; preds = %for.cond8 %15 = load i32, i32* %j, align 4, !dbg !31