Index: include/llvm/Analysis/BlockFrequencyInfoImpl.h
===================================================================
--- include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -196,12 +196,13 @@
   struct LoopData {
     typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
     typedef SmallVector<BlockNode, 4> NodeList;
-    LoopData *Parent;       ///< The parent loop.
-    bool IsPackaged;        ///< Whether this has been packaged.
-    uint32_t NumHeaders;    ///< Number of headers.
-    ExitMap Exits;          ///< Successor edges (and weights).
-    NodeList Nodes;         ///< Header and the members of the loop.
-    BlockMass BackedgeMass; ///< Mass returned to loop header.
+    typedef SmallVector<BlockMass, 1> HeaderMassList;
+    LoopData *Parent;            ///< The parent loop.
+    bool IsPackaged;             ///< Whether this has been packaged.
+    uint32_t NumHeaders;         ///< Number of headers.
+    ExitMap Exits;               ///< Successor edges (and weights).
+    NodeList Nodes;              ///< Header and the members of the loop.
+    HeaderMassList BackedgeMass; ///< Mass returned to each loop header.
     BlockMass Mass;
     Scaled64 Scale;
 
@@ -431,6 +432,16 @@
   /// \brief Compute the loop scale for a loop.
   void computeLoopScale(LoopData &Loop);
 
+  /// Adjust the mass of all headers in an irreducible loop.
+  ///
+  /// Initially, irreducible loops are assumed to distribute their mass
+  /// equally among its headers. This can lead to wrong frequency estimates
+  /// since some headers may be executed more frequently than others.
+  ///
+  /// This adjusts header mass distribution so it matches the weights of
+  /// the backedges going into each of the loop headers.
+  void adjustLoopHeaderMass(LoopData &Loop);
+
   /// \brief Package up a loop.
   void packageLoop(LoopData &Loop);
 
@@ -1042,6 +1053,8 @@
     for (const BlockNode &M : Loop.Nodes)
       if (!propagateMassToSuccessors(&Loop, M))
         llvm_unreachable("unhandled irreducible control flow");
+
+    adjustLoopHeaderMass(Loop);
   } else {
     Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
     if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
Index: lib/Analysis/BlockFrequencyInfoImpl.cpp
===================================================================
--- lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -286,7 +286,7 @@
 
   if (isLoopHeader(Resolved)) {
     DEBUG(debugSuccessor("backedge"));
-    Dist.addBackedge(OuterLoop->getHeader(), Weight);
+    Dist.addBackedge(Resolved, Weight);
     return true;
   }
 
@@ -349,7 +349,10 @@
 
   // LoopScale == 1 / ExitMass
   // ExitMass == HeadMass - BackedgeMass
-  BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
+  BlockMass TotalBackedgeMass;
+  for (auto &Mass : Loop.BackedgeMass)
+    TotalBackedgeMass += Mass;
+  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
 
   // Block scale stores the inverse of the scale. If this is an infinite loop,
   // its exit mass will be zero. In this case, use an arbitrary scale for the
@@ -358,7 +361,7 @@
       ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
 
   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
-               << " - " << Loop.BackedgeMass << ")\n"
+               << " - " << TotalBackedgeMass << ")\n"
                << " - scale = " << Loop.Scale << "\n");
 }
 
@@ -411,8 +414,10 @@
 
     // Check for a backedge.
     if (W.Type == Weight::Backedge) {
-      OuterLoop->BackedgeMass += Taken;
-      DEBUG(debugAssign(BlockNode(), Taken, "back"));
+      if (W.TargetNode.Index >= OuterLoop->BackedgeMass.size())
+        OuterLoop->BackedgeMass.resize(W.TargetNode.Index + 1);
+      OuterLoop->BackedgeMass[W.TargetNode.Index] += Taken;
+      DEBUG(debugAssign(W.TargetNode, Taken, "back"));
       continue;
     }
 
@@ -713,10 +718,56 @@
 void
 BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
   OuterLoop.Exits.clear();
-  OuterLoop.BackedgeMass = BlockMass::getEmpty();
+  OuterLoop.BackedgeMass.clear();
   auto O = OuterLoop.Nodes.begin() + 1;
   for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
     if (!Working[I->Index].isPackaged())
       *O++ = *I;
   OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
 }
+
+void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
+  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
+
+  // Since the loop has more than one header block, the mass flowing back into
+  // each header will be different. Adjust the mass in each header loop to
+  // reflect the masses flowing through back edges.
+  //
+  // To do this, we distribute the initial mass using the backedge masses
+  // as weights for the distribution.
+  BlockMass LoopMass = BlockMass::getFull();
+  Distribution Dist;
+
+  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
+  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
+    auto &HeaderNode = Loop.Nodes[H];
+    auto &BackedgeMass = Loop.BackedgeMass[HeaderNode.Index];
+    DEBUG(dbgs() << " - Add back edge mass for node "
+                 << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
+    Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+  }
+
+  DitheringDistributer D(Dist, LoopMass);
+
+#ifndef NDEBUG
+  auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
+                         const char *Desc) {
+    dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
+    if (Desc)
+      dbgs() << " [" << Desc << "]";
+    if (T.isValid())
+      dbgs() << " to " << getBlockName(T);
+    dbgs() << "\n";
+  };
+  (void)debugAssign;
+#endif
+
+  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
+               << " to headers using above weights\n");
+  for (const Weight &W : Dist.Weights) {
+    BlockMass Taken = D.takeMass(W.Amount);
+    assert(W.Type == Weight::Local && "all weights should be local");
+    Working[W.TargetNode.Index].getMass() = Taken;
+    DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
+  }
+}
Index: test/Analysis/BlockFrequencyInfo/PR23525.ll
===================================================================
--- /dev/null
+++ test/Analysis/BlockFrequencyInfo/PR23525.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+@g = global i32 0, align 4
+
+; Function Attrs: inlinehint noinline nounwind uwtable
+define i32 @_Z8hot_loopi(i32 %n) !prof !1 {
+entry:
+  %div = sdiv i32 %n, 2
+  %rem12 = and i32 %n, 1
+  %cmp = icmp eq i32 %rem12, 0
+  br i1 %cmp, label %Next, label %for.cond, !prof !2
+
+; CHECK: - for.cond: float = 25.85{{[0-9]*}}, int = 206
+for.cond:                                         ; preds = %entry, %for.inc
+  %i.0 = phi i32 [ %inc, %for.inc ], [ %div, %entry ]
+  %cmp1 = icmp slt i32 %i.0, %n
+  br i1 %cmp1, label %for.body, label %for.end, !prof !3, !llvm.loop !4
+
+; CHECK: - for.body: float = 24.52, int = 196
+for.body:                                         ; preds = %for.cond
+  %rem213 = and i32 %i.0, 1
+  %cmp3 = icmp eq i32 %rem213, 0
+  br i1 %cmp3, label %if.then.4, label %Next, !prof !6
+
+; CHECK: - if.then.4: float = 12.26{{[0-9]*}}, int = 98
+if.then.4:                                        ; preds = %for.body
+  %0 = load i32, i32* @g, align 4, !tbaa !7
+  %mul = shl nsw i32 %0, 1
+  br label %for.inc
+
+; CHECK: - Next: float = 12.41{{[0-9]*}}, int = 99
+Next:                                             ; preds = %for.body, %entry
+  %i.1 = phi i32 [ %div, %entry ], [ %i.0, %for.body ]
+  %1 = load i32, i32* @g, align 4, !tbaa !7
+  %add = add nsw i32 %1, %n
+  br label %for.inc
+
+; CHECK: - for.inc: float = 38.28{{[0-9]*}}, int = 306
+for.inc:                                          ; preds = %if.then.4, %Next
+  %storemerge = phi i32 [ %add, %Next ], [ %mul, %if.then.4 ]
+  %i.2 = phi i32 [ %i.1, %Next ], [ %i.0, %if.then.4 ]
+  store i32 %storemerge, i32* @g, align 4, !tbaa !7
+  %inc = add nsw i32 %i.2, 1
+  br label %for.cond
+
+; CHECK: - for.end: float = 1.0, int = 8
+for.end:                                          ; preds = %for.cond
+  %2 = load i32, i32* @g, align 4, !tbaa !7
+  ret i32 %2
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() !prof !11 {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 0
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.04 = phi i32 [ 1, %entry ], [ %inc, %for.body ]
+  %call = tail call i32 @_Z8hot_loopi(i32 %i.04)
+  %inc = add nuw nsw i32 %i.04, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !12
+}
+
+
+!1 = !{!"function_entry_count", i64 99}
+!2 = !{!"branch_weights", i32 50, i32 51}
+!3 = !{!"branch_weights", i32 2452, i32 100}
+!4 = distinct !{!4, !5}
+!5 = !{!"llvm.loop.unroll.disable"}
+!6 = !{!"branch_weights", i32 1227, i32 1226}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"int", !9, i64 0}
+!9 = !{!"omnipotent char", !10, i64 0}
+!10 = !{!"Simple C/C++ TBAA"}
+!11 = !{!"function_entry_count", i64 1}
+!12 = !{!"branch_weights", i32 2, i32 100}
Index: test/Analysis/BlockFrequencyInfo/irreducible.ll
===================================================================
--- test/Analysis/BlockFrequencyInfo/irreducible.ll
+++ test/Analysis/BlockFrequencyInfo/irreducible.ll
@@ -130,9 +130,6 @@
 ; At the first step, c1 and c2 each get 1/3 of the entry.  At each subsequent
 ; step, c1 and c2 each get 1/3 of what's left in c1 and c2 combined.  This
 ; infinite series sums to 1.
-;
-; Since the currently algorithm *always* assumes entry blocks are equal,
-; -block-freq gets the right answers here.
 define void @crossloops(i2 %x) {
 ; CHECK-LABEL: Printing analysis {{.*}} for function 'crossloops':
 ; CHECK-NEXT: block-frequency-info: crossloops
@@ -386,7 +383,7 @@
 ;
 ; This testcases uses non-trivial branch weights.  The CHECK statements here
 ; will start to fail if we change -block-freq to be more accurate.  Currently,
-; we expect left, right and top to be treated as equal headers.
+; loop headers are affected by the weight of their corresponding back edges.
 define void @nonentry_header(i1 %x, i2 %y) {
 ; CHECK-LABEL: Printing analysis {{.*}} for function 'nonentry_header':
 ; CHECK-NEXT: block-frequency-info: nonentry_header
@@ -395,15 +392,15 @@
   br i1 %x, label %left, label %right, !prof !21
 
 left:
-; CHECK-NEXT: left: float = 3.0,
+; CHECK-NEXT: left: float = 0.14{{[0-9]*}},
   br i1 %x, label %top, label %bottom, !prof !22
 
 right:
-; CHECK-NEXT: right: float = 3.0,
+; CHECK-NEXT: right: float = 0.42{{[0-9]*}},
   br i1 %x, label %top, label %bottom, !prof !22
 
 top:
-; CHECK-NEXT: top: float = 3.0,
+; CHECK-NEXT: top: float = 8.43{{[0-9]*}},
   switch i2 %y, label %exit [ i2 0, label %left
                               i2 1, label %right
                               i2 2, label %bottom ], !prof !23