Index: lib/Analysis/BranchProbabilityInfo.cpp =================================================================== --- lib/Analysis/BranchProbabilityInfo.cpp +++ lib/Analysis/BranchProbabilityInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ScaledNumber.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -115,11 +116,6 @@ // Minimum weight of an edge. Please note, that weight is NEVER 0. static const uint32_t MIN_WEIGHT = 1; -static uint32_t getMaxWeightFor(BasicBlock *BB) { - return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); -} - - /// \brief Calculate edge weights for successors lead to unreachable. /// /// Predict that a successor which leads necessarily to an @@ -185,15 +181,18 @@ if (!WeightsNode) return false; + // Check that the number of successors is manageable. + assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors"); + // Ensure there are weights for all of the successors. Note that the first // operand to the metadata node is a name, not a weight. if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) return false; - // Build up the final weights that will be used in a temporary buffer, but - // don't add them until all weights are present. Each weight value is clamped - // to [1, getMaxWeightFor(BB)]. - uint32_t WeightLimit = getMaxWeightFor(BB); + // Build up the final weights that will be used in a temporary buffer. + // Compute the sum of all weights to later decide whether they need to + // be scaled to fit in 32 bits. + uint64_t WeightSum = 0; SmallVector Weights; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { @@ -201,11 +200,26 @@ mdconst::dyn_extract(WeightsNode->getOperand(i)); if (!Weight) return false; - Weights.push_back(Weight->getLimitedValue(WeightLimit)); + assert(Weight->getValue().getActiveBits() <= 32 && + "Too many bits for uint32_t"); + Weights.push_back(Weight->getZExtValue()); + WeightSum += Weights.back(); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeWeight(BB, i, Weights[i]); + + // If the sum of weights does not fit in 32 bits, scale every weight down + // accordingly. + uint64_t ScalingFactor = + (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; + + WeightSum = 0; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + uint32_t W = Weights[i] / ScalingFactor; + WeightSum += W; + setEdgeWeight(BB, i, W); + } + assert(WeightSum <= UINT32_MAX && + "Expected weights to scale down to 32 bits"); return true; } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8014,7 +8014,7 @@ // here, use the same flooring mechanism previously used by BPI. Weight = std::max( 1u, BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex())); - assert(Weight <= UINT32_MAX / SI.getNumSuccessors()); + assert(Weight < UINT32_MAX); } Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); } Index: test/Analysis/BranchProbabilityInfo/pr22718.ll =================================================================== --- /dev/null +++ test/Analysis/BranchProbabilityInfo/pr22718.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -analyze -branch-prob | FileCheck %s + +; In this test, the else clause is taken about 90% of the time. This was not +; reflected in the probability computation because the weight is larger than +; the branch weight cap (about 2 billion). +; +; CHECK: edge for.body -> if.then probability is 216661881 / 2166666667 = 9.9 +; CHECK: edge for.body -> if.else probability is 1950004786 / 2166666667 = 90.0 + +@y = common global i64 0, align 8 +@x = common global i64 0, align 8 +@.str = private unnamed_addr constant [17 x i8] c"x = %lu\0Ay = %lu\0A\00", align 1 + +; Function Attrs: inlinehint nounwind uwtable +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i64, align 8 + store i32 0, i32* %retval + store i64 0, i64* @y, align 8 + store i64 0, i64* @x, align 8 + call void @srand(i32 422304) #3 + store i64 0, i64* %i, align 8 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i64, i64* %i, align 8 + %cmp = icmp ult i64 %0, 13000000000 + br i1 %cmp, label %for.body, label %for.end, !prof !1 + +for.body: ; preds = %for.cond + %call = call i32 @rand() #3 + %conv = sitofp i32 %call to double + %mul = fmul double %conv, 1.000000e+02 + %div = fdiv double %mul, 0x41E0000000000000 + %cmp1 = fcmp ogt double %div, 9.000000e+01 + br i1 %cmp1, label %if.then, label %if.else, !prof !2 + +if.then: ; preds = %for.body + %1 = load i64, i64* @x, align 8 + %inc = add i64 %1, 1 + store i64 %inc, i64* @x, align 8 + br label %if.end + +if.else: ; preds = %for.body + %2 = load i64, i64* @y, align 8 + %inc3 = add i64 %2, 1 + store i64 %inc3, i64* @y, align 8 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %for.inc + +for.inc: ; preds = %if.end + %3 = load i64, i64* %i, align 8 + %inc4 = add i64 %3, 1 + store i64 %inc4, i64* %i, align 8 + br label %for.cond + +for.end: ; preds = %for.cond + %4 = load i64, i64* @x, align 8 + %5 = load i64, i64* @y, align 8 + %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), i64 %4, i64 %5) + ret i32 0 +} + +; Function Attrs: nounwind +declare void @srand(i32) #1 + +; Function Attrs: nounwind +declare i32 @rand() #1 + +declare i32 @printf(i8*, ...) #2 + +attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.7.0 (trunk 236218) (llvm/trunk 236235)"} +!1 = !{!"branch_weights", i32 -1044967295, i32 1} +!2 = !{!"branch_weights", i32 433323762, i32 -394957723} Index: test/CodeGen/X86/MachineBranchProb.ll =================================================================== --- test/CodeGen/X86/MachineBranchProb.ll +++ test/CodeGen/X86/MachineBranchProb.ll @@ -18,9 +18,9 @@ %or.cond = or i1 %tobool, %cmp4 br i1 %or.cond, label %for.inc20, label %for.inc, !prof !0 ; CHECK: BB#1: derived from LLVM BB %for.cond2 -; CHECK: Successors according to CFG: BB#3(56008718) BB#4(2203492365) +; CHECK: Successors according to CFG: BB#3(56008718) BB#4(3615818718) ; CHECK: BB#4: derived from LLVM BB %for.cond2 -; CHECK: Successors according to CFG: BB#3(112017436) BB#2(4294967294) +; CHECK: Successors according to CFG: BB#3(56008718) BB#2(3559810000) for.inc: ; preds = %for.cond2 %shl = shl i32 %bit.0, 1