Index: include/llvm/CodeGen/MachineBasicBlock.h
===================================================================
--- include/llvm/CodeGen/MachineBasicBlock.h
+++ include/llvm/CodeGen/MachineBasicBlock.h
@@ -65,6 +65,10 @@
   Instructions Insts;
   const BasicBlock *BB;
   int Number;
+
+  /// A flag tracking whether the weights of all successors are normalized.
+  bool AreSuccWeightsNormalized;
+
   MachineFunction *xParent;
 
   /// Keep track of the predecessor / successor basic blocks.
@@ -129,6 +133,9 @@
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
+  /// Return whether all weights of successors are normalized.
+  bool areSuccWeightsNormalized() const { return AreSuccWeightsNormalized; }
+
   /// MachineBasicBlock iterator that automatically skips over MIs that are
   /// inside bundles (i.e. walk top level MIs only).
   template<typename Ty, typename IterTy>
@@ -384,6 +391,12 @@
   /// Set successor weight of a given iterator.
   void setSuccWeight(succ_iterator I, uint32_t weight);
 
+  /// Normalize all succesor weights so that the sum of them does not exceed
+  /// UINT32_MAX. Return true if the weights are modified and false otherwise.
+  /// Note that weights that are modified after calling this function are not
+  /// guaranteed to be normalized.
+  bool normalizeSuccWeights();
+
   /// Remove successor from the successors list of this MachineBasicBlock. The
   /// Predecessors list of succ is automatically updated.
   void removeSuccessor(MachineBasicBlock *succ);
Index: include/llvm/CodeGen/MachineBranchProbabilityInfo.h
===================================================================
--- include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -60,6 +60,10 @@
   // adjustment. Any edge weights used with the sum should be divided by Scale.
   uint32_t getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const;
 
+  // Get sum of the block successors' weights, and force normalizing the
+  // successors' weights of MBB so that their sum fit within 32-bits.
+  uint32_t getSumForBlock(MachineBasicBlock *MBB) const;
+
   // A 'Hot' edge is an edge which probability is >= 80%.
   bool isEdgeHot(const MachineBasicBlock *Src,
                  const MachineBasicBlock *Dst) const;
@@ -83,8 +87,34 @@
   raw_ostream &printEdgeProbability(raw_ostream &OS,
                                     const MachineBasicBlock *Src,
                                     const MachineBasicBlock *Dst) const;
+
+  // Normalize a list of weights by scaling them down so that the sum of them
+  // doesn't exceed UINT32_MAX. Return the scale.
+  template <class WeightList>
+  static uint32_t normalizeEdgeWeights(WeightList &Weights);
 };
 
+template <class WeightList>
+uint32_t
+MachineBranchProbabilityInfo::normalizeEdgeWeights(WeightList &Weights) {
+  assert(Weights.size() < UINT32_MAX && "Too many weights in the list!");
+  // First we compute the sum with 64-bits of precision.
+  uint64_t Sum = std::accumulate(Weights.begin(), Weights.end(), uint64_t(0));
+
+  // If the computed sum fits in 32-bits, we're done.
+  if (Sum <= UINT32_MAX)
+    return 1;
+
+  // Otherwise, compute the scale necessary to cause the weights to fit, and
+  // re-sum with that scale applied.
+  assert((Sum / UINT32_MAX) < UINT32_MAX &&
+         "The sum of weights exceeds UINT32_MAX^2!");
+  uint32_t Scale = (Sum / UINT32_MAX) + 1;
+  for (auto &W : Weights)
+    W /= Scale;
+  return Scale;
+}
+
 }
 
 
Index: lib/Analysis/BranchProbabilityInfo.cpp
===================================================================
--- lib/Analysis/BranchProbabilityInfo.cpp
+++ lib/Analysis/BranchProbabilityInfo.cpp
@@ -202,7 +202,9 @@
     assert(Weight->getValue().getActiveBits() <= 32 &&
            "Too many bits for uint32_t");
     Weights.push_back(Weight->getZExtValue());
-    WeightSum += Weights.back();
+    // We will turn zero weights into one so here we need to take this into
+    // account when summing up weights.
+    WeightSum += std::max<uint32_t>(Weights.back(), 1);
   }
   assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
 
@@ -213,7 +215,8 @@
 
   WeightSum = 0;
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-    uint32_t W = Weights[i] / ScalingFactor;
+    // Turn zero weights into one.
+    uint32_t W = std::max<uint32_t>(Weights[i] / ScalingFactor, 1);
     WeightSum += W;
     setEdgeWeight(BB, i, W);
   }
Index: lib/CodeGen/IfConversion.cpp
===================================================================
--- lib/CodeGen/IfConversion.cpp
+++ lib/CodeGen/IfConversion.cpp
@@ -1232,15 +1232,17 @@
 
   bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
   uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
-  uint32_t WeightScale = 0;
 
   if (HasEarlyExit) {
     // Get weights before modifying CvtBBI->BB and BBI.BB.
+    // Explictly normalize the weights of all edges from CvtBBI->BB so that we
+    // are aware that the edge weights obtained below are normalized.
+    CvtBBI->BB->normalizeSuccWeights();
     CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB);
     CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB);
     BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB);
     BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB);
-    SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale);
+    SumWeight = MBPI->getSumForBlock(CvtBBI->BB);
   }
 
   if (CvtBBI->BB->pred_size() > 1) {
@@ -1277,8 +1279,8 @@
     // New_Weight(BBI.BB, CvtBBI->FalseBB) =
     //   Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB)
 
-    uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale;
-    uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale;
+    uint64_t NewNext = BBNext * SumWeight + BBCvt * CvtNext;
+    uint64_t NewFalse = BBCvt * CvtFalse;
     // We need to scale down all weights of BBI.BB to fit uint32_t.
     // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to
     // the next block.
Index: lib/CodeGen/MachineBasicBlock.cpp
===================================================================
--- lib/CodeGen/MachineBasicBlock.cpp
+++ lib/CodeGen/MachineBasicBlock.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -39,8 +40,9 @@
 #define DEBUG_TYPE "codegen"
 
 MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
-  : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
-    AddressTaken(false), CachedMCSymbol(nullptr) {
+    : BB(bb), Number(-1), AreSuccWeightsNormalized(false), xParent(&mf),
+      Alignment(0), IsLandingPad(false), AddressTaken(false),
+      CachedMCSymbol(nullptr) {
   Insts.Parent = this;
 }
 
@@ -481,8 +483,10 @@
   if (weight != 0 && Weights.empty())
     Weights.resize(Successors.size());
 
-  if (weight != 0 || !Weights.empty())
+  if (weight != 0 || !Weights.empty()) {
     Weights.push_back(weight);
+    AreSuccWeightsNormalized = false;
+  }
 
    Successors.push_back(succ);
    succ->addPredecessor(this);
@@ -1096,7 +1100,25 @@
 void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) {
   if (Weights.empty())
     return;
-  *getWeightIterator(I) = weight;
+  auto WeightIter = getWeightIterator(I);
+  uint32_t OldWeight = *WeightIter;
+  *WeightIter = weight;
+  if (weight > OldWeight)
+    AreSuccWeightsNormalized = false;
+}
+
+/// Normalize all succesor weights so that the sum of them does not exceed
+/// UINT32_MAX. Return true if the weights are modified and false otherwise.
+/// Note that weights that are modified after calling this function are not
+/// guaranteed to be normalized.
+bool MachineBasicBlock::normalizeSuccWeights() {
+  if (!AreSuccWeightsNormalized) {
+    uint32_t Scale =
+        MachineBranchProbabilityInfo::normalizeEdgeWeights(Weights);
+    AreSuccWeightsNormalized = true;
+    return Scale != 1;
+  }
+  return false;
 }
 
 /// getWeightIterator - Return wight iterator corresonding to the I successor
Index: lib/CodeGen/MachineBlockPlacement.cpp
===================================================================
--- lib/CodeGen/MachineBlockPlacement.cpp
+++ lib/CodeGen/MachineBlockPlacement.cpp
@@ -361,8 +361,7 @@
   // improve the MBPI interface to efficiently support query patterns such as
   // this.
   uint32_t BestWeight = 0;
-  uint32_t WeightScale = 0;
-  uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+  uint32_t SumWeight = MBPI->getSumForBlock(BB);
   DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
   for (MachineBasicBlock *Succ : BB->successors()) {
     if (BlockFilter && !BlockFilter->count(Succ))
@@ -378,7 +377,7 @@
     }
 
     uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
-    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+    BranchProbability SuccProb(SuccWeight, SumWeight);
 
     // If we outline optional branches, look whether Succ is unavoidable, i.e.
     // dominates all terminators of the MachineFunction. If it does, other
@@ -675,8 +674,7 @@
     // FIXME: Due to the performance of the probability and weight routines in
     // the MBPI analysis, we use the internal weights and manually compute the
     // probabilities to avoid quadratic behavior.
-    uint32_t WeightScale = 0;
-    uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
+    uint32_t SumWeight = MBPI->getSumForBlock(MBB);
     for (MachineBasicBlock *Succ : MBB->successors()) {
       if (Succ->isLandingPad())
         continue;
@@ -705,7 +703,7 @@
           BlocksExitingToOuterLoop.insert(MBB);
       }
 
-      BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+      BranchProbability SuccProb(SuccWeight, SumWeight);
       BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
       DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
                    << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
Index: lib/CodeGen/MachineBranchProbabilityInfo.cpp
===================================================================
--- lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -28,36 +28,35 @@
 
 void MachineBranchProbabilityInfo::anchor() { }
 
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
-  // First we compute the sum with 64-bits of precision, ensuring that cannot
-  // overflow by bounding the number of weights considered. Hopefully no one
-  // actually needs 2^32 successors.
-  assert(MBB->succ_size() < UINT32_MAX);
-  uint64_t Sum = 0;
-  Scale = 1;
+uint32_t
+MachineBranchProbabilityInfo::getSumForBlock(MachineBasicBlock *MBB) const {
+  // Normalize the weights of MBB's all successors so that the sum is guaranteed
+  // to be no greater than UINT32_MAX.
+  MBB->normalizeSuccWeights();
+
+  SmallVector<uint32_t, 8> Weights;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
-       E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, I);
-    Sum += Weight;
-  }
+                                              E = MBB->succ_end();
+       I != E; ++I)
+    Weights.push_back(getEdgeWeight(MBB, I));
 
-  // If the computed sum fits in 32-bits, we're done.
-  if (Sum <= UINT32_MAX)
-    return Sum;
+  return std::accumulate(Weights.begin(), Weights.end(), 0u);
+}
 
-  // Otherwise, compute the scale necessary to cause the weights to fit, and
-  // re-sum with that scale applied.
-  assert((Sum / UINT32_MAX) < UINT32_MAX);
-  Scale = (Sum / UINT32_MAX) + 1;
-  Sum = 0;
+uint32_t
+MachineBranchProbabilityInfo::getSumForBlock(const MachineBasicBlock *MBB,
+                                             uint32_t &Scale) const {
+  SmallVector<uint32_t, 8> Weights;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
-       E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, I);
-    Sum += Weight / Scale;
-  }
-  assert(Sum <= UINT32_MAX);
-  return Sum;
+                                              E = MBB->succ_end();
+       I != E; ++I)
+    Weights.push_back(getEdgeWeight(MBB, I));
+
+  if (MBB->areSuccWeightsNormalized())
+    Scale = 1;
+  else
+    Scale = MachineBranchProbabilityInfo::normalizeEdgeWeights(Weights);
+  return std::accumulate(Weights.begin(), Weights.end(), 0u);
 }
 
 uint32_t MachineBranchProbabilityInfo::
Index: test/CodeGen/X86/pr24377.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/pr24377.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-linux < %s
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @__kmpc_atomic_cmplx8_mul() #0 {
+entry:
+  br i1 undef, label %if.then.i.43, label %if.end.i.47
+
+if.then.i.43:                                     ; preds = %entry
+  unreachable
+
+if.end.i.47:                                      ; preds = %entry
+  br i1 undef, label %if.then.4.i.48, label %_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49
+
+if.then.4.i.48:                                   ; preds = %if.end.i.47
+  unreachable
+
+_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49: ; preds = %if.end.i.47
+  br i1 undef, label %complex_mul_libcall.23, label %complex_mul_cont.25, !prof !1
+
+complex_mul_libcall.23:                           ; preds = %_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49
+  tail call void @__muldc3() #1
+  br label %complex_mul_cont.25
+
+complex_mul_cont.25:                              ; preds = %complex_mul_libcall.23, %_Z25__kmp_acquire_atomic_lockP16kmp_queuing_locki.exit49
+  br i1 undef, label %if.then.i.54, label %return
+
+if.then.i.54:                                     ; preds = %complex_mul_cont.25
+  unreachable
+
+return:                                           ; preds = %complex_mul_cont.25
+  ret void
+}
+
+declare void @__muldc3()
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.8.0 "}
+!1 = !{!"branch_weights", i32 0, i32 -1}
Index: test/Transforms/SampleProfile/branch.ll
===================================================================
--- test/Transforms/SampleProfile/branch.ll
+++ test/Transforms/SampleProfile/branch.ll
@@ -36,8 +36,8 @@
   tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !14, metadata !DIExpression()), !dbg !27
   %cmp = icmp slt i32 %argc, 2, !dbg !28
   br i1 %cmp, label %return, label %if.end, !dbg !28
-; CHECK: edge entry -> return probability is 0 / 1 = 0%
-; CHECK: edge entry -> if.end probability is 1 / 1 = 100%
+; CHECK: edge entry -> return probability is 1 / 2 = 50%
+; CHECK: edge entry -> if.end probability is 1 / 2 = 50%
 
 if.end:                                           ; preds = %entry
   %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !30
@@ -46,8 +46,8 @@
   tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
   %cmp1 = icmp sgt i32 %call, 100, !dbg !35
   br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
-; CHECK: edge if.end -> for.body probability is 0 / 1 = 0%
-; CHECK: edge if.end -> if.end6 probability is 1 / 1 = 100%
+; CHECK: edge if.end -> for.body probability is 1 / 2 = 50%
+; CHECK: edge if.end -> if.end6 probability is 1 / 2 = 50%
 
 for.body:                                         ; preds = %if.end, %for.body
   %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
@@ -65,8 +65,8 @@
   tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !21, metadata !DIExpression()), !dbg !38
   %exitcond = icmp eq i32 %inc, %call, !dbg !38
   br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
-; CHECK: edge for.body -> if.end6 probability is 0 / 10226 = 0%
-; CHECK: edge for.body -> for.body probability is 10226 / 10226 = 100% [HOT edge]
+; CHECK: edge for.body -> if.end6 probability is 1 / 10227 = 0.00977804%
+; CHECK: edge for.body -> for.body probability is 10226 / 10227 = 99.9902% [HOT edge]
 
 if.end6:                                          ; preds = %for.body, %if.end
   %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
Index: test/Transforms/SampleProfile/calls.ll
===================================================================
--- test/Transforms/SampleProfile/calls.ll
+++ test/Transforms/SampleProfile/calls.ll
@@ -52,8 +52,8 @@
   store i32 %inc, i32* %i, align 4, !dbg !14
   %cmp = icmp slt i32 %0, 400000000, !dbg !14
   br i1 %cmp, label %while.body, label %while.end, !dbg !14
-; CHECK: edge while.cond -> while.body probability is 5391 / 5391 = 100% [HOT edge]
-; CHECK: edge while.cond -> while.end probability is 0 / 5391 = 0%
+; CHECK: edge while.cond -> while.body probability is 5391 / 5392 = 99.9815% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 1 / 5392 = 0.018546%
 
 while.body:                                       ; preds = %while.cond
   %1 = load i32, i32* %i, align 4, !dbg !16
@@ -63,8 +63,8 @@
 ; both branches out of while.body had the same weight. In reality,
 ; the edge while.body->if.then is taken most of the time.
 ;
-; CHECK: edge while.body -> if.then probability is 5752 / 5752 = 100% [HOT edge]
-; CHECK: edge while.body -> if.else probability is 0 / 5752 = 0%
+; CHECK: edge while.body -> if.then probability is 5752 / 5753 = 99.9826% [HOT edge]
+; CHECK: edge while.body -> if.else probability is 1 / 5753 = 0.0173822%
 
 
 if.then:                                          ; preds = %while.body
Index: test/Transforms/SampleProfile/propagate.ll
===================================================================
--- test/Transforms/SampleProfile/propagate.ll
+++ test/Transforms/SampleProfile/propagate.ll
@@ -73,8 +73,8 @@
   %5 = load i64, i64* %N.addr, align 8, !dbg !15
   %cmp1 = icmp slt i64 %4, %5, !dbg !15
   br i1 %cmp1, label %for.body, label %for.end18, !dbg !15
-; CHECK: edge for.cond -> for.body probability is 10 / 10 = 100% [HOT edge]
-; CHECK: edge for.cond -> for.end18 probability is 0 / 10 = 0%
+; CHECK: edge for.cond -> for.body probability is 10 / 11 = 90.9091% [HOT edge]
+; CHECK: edge for.cond -> for.end18 probability is 1 / 11 = 9.09091%
 
 for.body:                                         ; preds = %for.cond
   %6 = load i64, i64* %i, align 8, !dbg !18
@@ -119,8 +119,8 @@
   %14 = load i64, i64* %i, align 8, !dbg !28
   %cmp10 = icmp slt i64 %conv9, %14, !dbg !28
   br i1 %cmp10, label %for.body11, label %for.end, !dbg !28
-; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16191 = 100% [HOT edge]
-; CHECK: edge for.cond8 -> for.end probability is 0 / 16191 = 0%
+; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16192 = 99.9938% [HOT edge]
+; CHECK: edge for.cond8 -> for.end probability is 1 / 16192 = 0.00617589%
 
 for.body11:                                       ; preds = %for.cond8
   %15 = load i32, i32* %j, align 4, !dbg !31