Index: lib/Transforms/Scalar/JumpThreading.cpp
===================================================================
--- lib/Transforms/Scalar/JumpThreading.cpp
+++ lib/Transforms/Scalar/JumpThreading.cpp
@@ -126,7 +126,116 @@
   BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
 }
 
-/// runOnFunction - Top level algorithm.
+// Update branch probability information according to conditional
+// branch probablity. This is usually made possible for cloned branches
+// in inline instances by the context specific profile in the caller.
+// For instance,
+//
+//  [Block PredBB]
+//  [Branch PredBr]
+//  if (t) {
+//     Block A;
+//  } else {
+//     Block B;
+//  }
+//
+//  [Block BB]
+//  cond = PN([true, %A], [..., %B]); // PHI node
+//  [Branch CondBr]
+//  if (cond) {
+//    ...  // P(cond == true) = 1%
+//  }
+//
+//  Here we know that when block A is taken, cond must be true, which means
+//      P(cond == true | A) = 1
+//
+//  Given that P(cond == true) = P(cond == true | A) * P(A) +
+//                               P(cond == true | B) * P(B)
+//  we get
+//     P(cond == true ) = P(A) + P(cond == true | B) * P(B)
+//
+//  which gives us:
+//     P(A) <= P(c == true), i.e.
+//     P(t == true) <= P(cond == true)
+//
+//  In other words, if we know P(cond == true), we know that P(t == true)
+//  can not be greater than 1%.
+static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
+  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!CondBr)
+    return;
+
+  BranchProbability BP;
+  uint64_t TrueWeight, FalseWeight;
+  if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
+    return;
+
+  // Returns the outgoing edge of the dominating predecessor block
+  // that leads to the PhiNode's incoming block:
+  auto GetPredOutEdge =
+      [](BasicBlock *IncomingBB,
+         BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
+    auto *PredBB = IncomingBB;
+    auto *SuccBB = PhiBB;
+    for (;;) {
+      BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
+      if (PredBr && PredBr->isConditional())
+        return {PredBB, SuccBB};
+      auto *SinglePredBB = PredBB->getSinglePredecessor();
+      if (!SinglePredBB)
+        return {nullptr, nullptr};
+      SuccBB = PredBB;
+      PredBB = SinglePredBB;
+    }
+  };
+
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *PhiOpnd = PN->getIncomingValue(i);
+    ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
+
+    if (!CI || !CI->getType()->isIntegerTy(1))
+      continue;
+
+    BP = (CI->isOne() ? BranchProbability::getBranchProbability(
+                            TrueWeight, TrueWeight + FalseWeight)
+                      : BranchProbability::getBranchProbability(
+                            FalseWeight, TrueWeight + FalseWeight));
+
+    auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
+    if (!PredOutEdge.first)
+      return;
+
+    BasicBlock *PredBB = PredOutEdge.first;
+    BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator());
+
+    uint64_t PredTrueWeight, PredFalseWeight;
+    // FIXME: We currently only set the profile data when it is missing.
+    // With PGO, this can be used to refine even existing profile data with
+    // context information. This needs to be done after more performance
+    // testing.
+    if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
+      continue;
+
+    // We can not infer anything useful when BP >= 50%, because BP is the
+    // upper bound probability value.
+    if (BP >= BranchProbability(50, 100))
+      continue;
+
+    SmallVector<uint32_t, 2> Weights;
+    if (PredBr->getSuccessor(0) == PredOutEdge.second) {
+      Weights.push_back(BP.getNumerator());
+      Weights.push_back(BP.getCompl().getNumerator());
+    } else {
+      Weights.push_back(BP.getCompl().getNumerator());
+      Weights.push_back(BP.getNumerator());
+    }
+    PredBr->setMetadata(LLVMContext::MD_prof,
+                        MDBuilder(PredBr->getParent()->getContext())
+                            .createBranchWeights(Weights));
+  }
+}
+
+/// runOnFunction - Toplevel algorithm.
 ///
 bool JumpThreading::runOnFunction(Function &F) {
   if (skipFunction(F))
@@ -991,6 +1100,11 @@
     if (SimplifyPartiallyRedundantLoad(LI))
       return true;
 
+  // Before threading, try to propagate profile data backwards:
+  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+    if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+      updatePredecessorProfileMetadata(PN, BB);
+
   // Handle a variety of cases where we are branching on something derived from
   // a PHI node in the current block.  If we can prove that any predecessors
   // compute a predictable value based on a PHI node, thread those predecessors.
Index: test/Transforms/JumpThreading/threading_prof1.ll
===================================================================
--- test/Transforms/JumpThreading/threading_prof1.ll
+++ test/Transforms/JumpThreading/threading_prof1.ll
@@ -0,0 +1,99 @@
+; RUN: opt -jump-threading -S < %s | FileCheck %s
+; RUN: opt -passes=jump-threading -S < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: @test()
+bb:
+  %tmp = call i32 @a()
+  %tmp1 = icmp eq i32 %tmp, 1
+  br i1 %tmp1, label %bb5, label %bb2
+; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
+
+bb2:                                              ; preds = %bb
+  %tmp3 = call i32 @b()
+  %tmp4 = icmp ne i32 %tmp3, 1
+  br label %bb5
+; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
+
+bb5:                                              ; preds = %bb2, %bb
+  %tmp6 = phi i1 [ false, %bb ], [ %tmp4, %bb2 ]
+  br i1 %tmp6, label %bb8, label %bb7, !prof !0
+
+bb7:                                              ; preds = %bb5
+  call void @bar()
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb5
+  ret void
+}
+
+define void @test_single_pred1() {
+; CHECK-LABEL: @test_single_pred1()
+bb:
+  %tmp = call i32 @a()
+  %tmp1 = icmp eq i32 %tmp, 1
+  br i1 %tmp1, label %bb5_1, label %bb2
+; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
+
+bb5_1:                                             
+  br label %bb5;
+
+bb2:                                              
+  %tmp3 = call i32 @b()
+  %tmp4 = icmp ne i32 %tmp3, 1
+  br label %bb5
+; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
+
+bb5:                                             
+  %tmp6 = phi i1 [ false, %bb5_1 ], [ %tmp4, %bb2 ]
+  br i1 %tmp6, label %bb8, label %bb7, !prof !0
+
+bb7:                                            
+  call void @bar()
+  br label %bb8
+
+bb8:                                           
+  ret void
+}
+
+define void @test_single_pred2() {
+; CHECK-LABEL: @test_single_pred2()
+bb:
+  %tmp = call i32 @a()
+  %tmp1 = icmp eq i32 %tmp, 1
+  br i1 %tmp1, label %bb5_1, label %bb2
+; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
+
+bb5_1:                                             
+  br label %bb5_2;
+
+bb5_2:                                             
+  br label %bb5;
+
+bb2:                          
+  %tmp3 = call i32 @b()
+  %tmp4 = icmp ne i32 %tmp3, 1
+  br label %bb5
+; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
+
+bb5:                         
+  %tmp6 = phi i1 [ false, %bb5_2 ], [ %tmp4, %bb2 ]
+  br i1 %tmp6, label %bb8, label %bb7, !prof !0
+
+bb7:                        
+  call void @bar()
+  br label %bb8
+
+bb8:                       
+  ret void
+}
+
+declare void @bar()
+
+declare i32 @a()
+
+declare i32 @b()
+
+!0 = !{!"branch_weights", i32 2146410443, i32 1073205}
+;CHECK: ![[PROF1]] = !{!"branch_weights", i32 1073205, i32 2146410443}
+;CHECK: ![[PROF2]] = !{!"branch_weights", i32 2146410443, i32 1073205}
Index: test/Transforms/JumpThreading/threading_prof2.ll
===================================================================
--- test/Transforms/JumpThreading/threading_prof2.ll
+++ test/Transforms/JumpThreading/threading_prof2.ll
@@ -0,0 +1,42 @@
+; RUN: opt -jump-threading -S < %s | FileCheck %s
+; RUN: opt -passes=jump-threading -S < %s | FileCheck %s
+define void @test() {
+bb:
+  %tmp = call i32 @a()
+  %tmp1 = icmp eq i32 %tmp, 1
+  br i1 %tmp1, label %bb5, label %bb2
+; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
+
+bb2:                                              
+  %tmp3 = call i32 @b()
+  %tmp4 = icmp ne i32 %tmp3, 1
+  br label %bb5
+; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
+
+bb5:                                             
+  %tmp6 = phi i1 [ false, %bb ], [ %tmp4, %bb2 ]
+  br i1 %tmp6, label %bb8, label %bb7, !prof !0
+
+bb7:                                            
+  call void @bar()
+  br label %bb9
+
+bb8: 
+  call void @foo()
+  br label %bb9
+
+bb9:                                           
+  ret void
+}
+
+declare void @bar()
+
+declare void @foo()
+
+declare i32 @a()
+
+declare i32 @b()
+
+!0 = !{!"branch_weights", i32 2146410443, i32 1073205}
+;CHECK: ![[PROF1]] = !{!"branch_weights", i32 1073205, i32 2146410443}
+;CHECK: ![[PROF2]] = !{!"branch_weights", i32 2146410443, i32 1073205}