Index: lib/Transforms/Scalar/JumpThreading.cpp
===================================================================
--- lib/Transforms/Scalar/JumpThreading.cpp
+++ lib/Transforms/Scalar/JumpThreading.cpp
@@ -152,6 +152,7 @@
     bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
                                          PredValueInfo &Result,
                                          ConstantPreference Preference,
+                                         bool &Changed,
                                          Instruction *CxtI = nullptr);
     bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
                                 ConstantPreference Preference,
@@ -395,10 +396,9 @@
 ///
 /// This returns true if there were any known values.
 ///
-bool JumpThreading::
-ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
-                                ConstantPreference Preference,
-                                Instruction *CxtI) {
+bool JumpThreading::ComputeValueKnownInPredecessors(
+    Value *V, BasicBlock *BB, PredValueInfo &Result,
+    ConstantPreference Preference, bool &Changed, Instruction *CxtI) {
   // This method walks up use-def chains recursively.  Because of this, we could
   // get into an infinite loop going around loops in the use-def chain.  To
   // prevent this, keep track of what (value, block) pairs we've already visited
@@ -410,6 +410,15 @@
   // stack pops back out again.
   RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
 
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I && !isa<PHINode>(I))
+    if (auto *NewV = SimplifyInstruction(I, BB->getModule()->getDataLayout())) {
+      I->replaceAllUsesWith(NewV);
+      I->eraseFromParent();
+      V = NewV;
+      Changed = true;
+    }
+
   // If V is a constant, then it is known in all predecessors.
   if (Constant *KC = getKnownConstant(V, Preference)) {
     for (BasicBlock *Pred : predecessors(BB))
@@ -420,7 +429,7 @@
 
   // If V is a non-instruction value, or an instruction in a different block,
   // then it can't be derived from a PHI.
-  Instruction *I = dyn_cast<Instruction>(V);
+  I = dyn_cast<Instruction>(V);
   if (!I || I->getParent() != BB) {
 
     // Okay, if this is a live-in value, see if it has a known value at the end
@@ -475,9 +484,9 @@
     if (I->getOpcode() == Instruction::Or ||
         I->getOpcode() == Instruction::And) {
       ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
-                                      WantInteger, CxtI);
+                                      WantInteger, Changed, CxtI);
       ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
-                                      WantInteger, CxtI);
+                                      WantInteger, Changed, CxtI);
 
       if (LHSVals.empty() && RHSVals.empty())
         return false;
@@ -512,8 +521,8 @@
     if (I->getOpcode() == Instruction::Xor &&
         isa<ConstantInt>(I->getOperand(1)) &&
         cast<ConstantInt>(I->getOperand(1))->isOne()) {
-      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
-                                      WantInteger, CxtI);
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, WantInteger,
+                                      Changed, CxtI);
       if (Result.empty())
         return false;
 
@@ -531,7 +540,7 @@
     if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
       PredValueInfoTy LHSVals;
       ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
-                                      WantInteger, CxtI);
+                                      WantInteger, Changed, CxtI);
 
       // Try to use constant folding to simplify the binary operator.
       for (const auto &LHSVal : LHSVals) {
@@ -608,7 +617,7 @@
       if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
         PredValueInfoTy LHSVals;
         ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
-                                        WantInteger, CxtI);
+                                        WantInteger, Changed, CxtI);
 
         for (const auto &LHSVal : LHSVals) {
           Constant *V = LHSVal.first;
@@ -631,7 +640,7 @@
     PredValueInfoTy Conds;
     if ((TrueVal || FalseVal) &&
         ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
-                                        WantInteger, CxtI)) {
+                                        WantInteger, Changed, CxtI)) {
       for (auto &C : Conds) {
         Constant *Cond = C.first;
 
@@ -1180,8 +1189,10 @@
     return false;
 
   PredValueInfoTy PredValues;
-  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
-    return false;
+  bool Changed = false;
+  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
+                                       Changed, CxtI))
+    return Changed;
 
   assert(!PredValues.empty() &&
          "ComputeValueKnownInPredecessors returned true with no values");
@@ -1239,7 +1250,7 @@
 
   // If all edges were unthreadable, we fail.
   if (PredToDestList.empty())
-    return false;
+    return Changed;
 
   // Determine which is the most common successor.  If we have many inputs and
   // this block is a switch, we want to start by threading the batch that goes
@@ -1272,7 +1283,8 @@
                             getSuccessor(GetBestDestForJumpOnUndef(BB));
 
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+  Changed |= ThreadEdge(BB, PredsToFactor, MostPopularDest);
+  return Changed;
 }
 
 /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
@@ -1343,12 +1355,13 @@
 
   PredValueInfoTy XorOpValues;
   bool isLHS = true;
+  bool Changed = false;
   if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
-                                       WantInteger, BO)) {
+                                       WantInteger, Changed, BO)) {
     assert(XorOpValues.empty());
     if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
-                                         WantInteger, BO))
-      return false;
+                                         WantInteger, Changed, BO))
+      return Changed;
     isLHS = false;
   }
 
@@ -1406,7 +1419,8 @@
   }
 
   // Try to duplicate BB into PredBB.
-  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+  Changed |= DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+  return Changed;
 }
 
 
Index: test/Transforms/JumpThreading/basic.ll
===================================================================
--- test/Transforms/JumpThreading/basic.ll
+++ test/Transforms/JumpThreading/basic.ll
@@ -476,6 +476,55 @@
 ; CHECK: }
 }
 
+
+;;; Verify that we can handle constraint propagation through cast.
+define i32 @test16(i1 %cond) {
+Entry:
+; CHECK-LABEL: @test16(
+	br i1 %cond, label %Merge, label %F1
+
+; CHECK: Entry:
+; CHECK-NEXT:  br i1 %cond, label %F2, label %Merge
+
+F1:
+	%v1 = call i32 @f1()
+	br label %Merge
+
+Merge:
+	%B = phi i32 [1, %Entry], [%v1, %F1]
+	%M = icmp eq i32 %B, 0 
+	%M1 = zext i1 %M to i32
+	%N = icmp eq i32 %M1, 1 
+	br i1 %N, label %T2, label %F2
+
+; CHECK: Merge:
+; CHECK-NOT: phi
+; CHECK-NEXT:   %v1 = call i32 @f1()
+
+T2:
+	%Q = zext i1 %M to i32
+	ret i32 %Q
+
+F2:
+	ret i32 %B
+; CHECK: F2:
+; CHECK-NEXT: phi i32
+}
+
+;;; Just check that it doesn't crash / assert
+define i32 @test17() {
+entry:
+	%A = add i32 0, 1
+	%B = icmp eq i32 %A, 0
+	br i1 %B, label %T, label %F
+T:
+	%v1 = call i32 @f1()
+	ret i32 %v1
+F:
+	%v2 = call i32 @f2()
+	ret i32 %v2
+}
+
 ; In this test we check that block duplication is inhibited by the presence
 ; of a function with the 'noduplicate' attribute.