Index: llvm/trunk/include/llvm/Transforms/Utils/Local.h
===================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/Local.h
+++ llvm/trunk/include/llvm/Transforms/Utils/Local.h
@@ -356,6 +356,10 @@
 /// Unknown metadata is removed.
 void combineMetadataForCSE(Instruction *K, const Instruction *J);
 
+// Replace each use of 'From' with 'To', if that use does not belong to basic
+// block where 'From' is defined. Returns the number of replacements made.
+unsigned replaceNonLocalUsesWith(Instruction *From, Value *To);
+
 /// Replace each use of 'From' with 'To' if that use is dominated by
 /// the given edge.  Returns the number of replacements made.
 unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
Index: llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp
+++ llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp
@@ -253,6 +253,35 @@
   return EverChanged;
 }
 
+// Replace uses of Cond with ToVal when safe to do so. If all uses are
+// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
+// because we may incorrectly replace uses when guards/assumes are uses of
+// of `Cond` and we used the guards/assume to reason about the `Cond` value
+// at the end of block. RAUW unconditionally replaces all uses
+// including the guards/assumes themselves and the uses before the
+// guard/assume.
+static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
+  assert(Cond->getType() == ToVal->getType());
+  auto *BB = Cond->getParent();
+  // We can unconditionally replace all uses in non-local blocks (i.e. uses
+  // strictly dominated by BB), since LVI information is true from the
+  // terminator of BB.
+  replaceNonLocalUsesWith(Cond, ToVal);
+  for (Instruction &I : reverse(*BB)) {
+    // Reached the Cond whose uses we are trying to replace, so there are no
+    // more uses.
+    if (&I == Cond)
+      break;
+    // We only replace uses in instructions that are guaranteed to reach the end
+    // of BB, where we know Cond is ToVal.
+    if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+      break;
+    I.replaceUsesOfWith(Cond, ToVal);
+  }
+  if (Cond->use_empty() && !Cond->mayHaveSideEffects())
+    Cond->eraseFromParent();
+}
+
 /// Return the cost of duplicating a piece of this block from first non-phi
 /// and before StopAt instruction to thread across it. Stop scanning the block
 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
@@ -833,13 +862,19 @@
         CondBr->eraseFromParent();
         if (CondCmp->use_empty())
           CondCmp->eraseFromParent();
-        // TODO: We can safely replace *some* uses of the CondInst if it has
+        // We can safely replace *some* uses of the CondInst if it has
         // exactly one value as returned by LVI. RAUW is incorrect in the
         // presence of guards and assumes, that have the `Cond` as the use. This
         // is because we use the guards/assume to reason about the `Cond` value
         // at the end of block, but RAUW unconditionally replaces all uses
         // including the guards/assumes themselves and the uses before the
         // guard/assume.
+        else if (CondCmp->getParent() == BB) {
+          auto *CI = Ret == LazyValueInfo::True ?
+            ConstantInt::getTrue(CondCmp->getType()) :
+            ConstantInt::getFalse(CondCmp->getType());
+          ReplaceFoldableUses(CondCmp, CI);
+        }
         return true;
       }
 
@@ -1325,13 +1360,16 @@
       if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
         if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
           CondInst->eraseFromParent();
-        // TODO: We can safely replace *some* uses of the CondInst if it has
+        // We can safely replace *some* uses of the CondInst if it has
         // exactly one value as returned by LVI. RAUW is incorrect in the
         // presence of guards and assumes, that have the `Cond` as the use. This
         // is because we use the guards/assume to reason about the `Cond` value
         // at the end of block, but RAUW unconditionally replaces all uses
         // including the guards/assumes themselves and the uses before the
         // guard/assume.
+        else if (OnlyVal && OnlyVal != MultipleVal &&
+                 CondInst->getParent() == BB)
+          ReplaceFoldableUses(CondInst, OnlyVal);
       }
       return true;
     }
Index: llvm/trunk/lib/Transforms/Utils/Local.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Utils/Local.cpp
+++ llvm/trunk/lib/Transforms/Utils/Local.cpp
@@ -1796,6 +1796,23 @@
   return Count;
 }
 
+unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
+   assert(From->getType() == To->getType());
+   auto *BB = From->getParent();
+   unsigned Count = 0;
+
+  for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+       UI != UE;) {
+    Use &U = *UI++;
+    auto *I = cast<Instruction>(U.getUser());
+    if (I->getParent() == BB)
+      continue;
+    U.set(To);
+    ++Count;
+  }
+  return Count;
+}
+
 unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
                                         DominatorTree &DT,
                                         const BasicBlockEdge &Root) {
Index: llvm/trunk/test/Transforms/JumpThreading/assume.ll
===================================================================
--- llvm/trunk/test/Transforms/JumpThreading/assume.ll
+++ llvm/trunk/test/Transforms/JumpThreading/assume.ll
@@ -59,12 +59,12 @@
 @g = external global i32
 
 ; Check that we do prove a fact using an assume within the block.
-; FIXME: We can fold the assume based on the semantics of assume.
-; CHECK-LABEL: @can_fold_assume
-; CHECK: %notnull = icmp ne i32* %array, null
-; CHECK-NEXT: call void @llvm.assume(i1 %notnull)
-; CHECK-NEXT: ret void
+; We can fold the assume based on the semantics of assume.
 define void @can_fold_assume(i32* %array) {
+; CHECK-LABEL: @can_fold_assume
+; CHECK-NOT: call void @llvm.assume
+; CHECK-NOT: br
+; CHECK: ret void
   %notnull = icmp ne i32* %array, null
   call void @llvm.assume(i1 %notnull)
   br i1 %notnull, label %normal, label %error
@@ -80,19 +80,128 @@
 declare void @f(i1)
 declare void @exit()
 ; We can fold the assume but not the uses before the assume.
-define void @dont_fold_incorrectly(i32* %array) {
-; CHECK-LABEL:@dont_fold_incorrectly
+define void @cannot_fold_use_before_assume(i32* %array) {
+; CHECK-LABEL:@cannot_fold_use_before_assume
 ; CHECK: @f(i1 %notnull)
 ; CHECK-NEXT: exit()
-; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NOT: assume
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @f(i1 %notnull)
+  call void @exit()
+  call void @llvm.assume(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
+
+declare void @dummy(i1) nounwind argmemonly
+define void @can_fold_some_use_before_assume(i32* %array) {
+
+; CHECK-LABEL:@can_fold_some_use_before_assume
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: @dummy(i1 true)
+; CHECK-NOT: assume
 ; CHECK-NEXT: ret void
   %notnull = icmp ne i32* %array, null
   call void @f(i1 %notnull)
+  call void @dummy(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+
+}
+
+; FIXME: can fold assume and all uses before/after assume.
+; because the trapping exit call is after the assume.
+define void @can_fold_assume_and_all_uses(i32* %array) {
+; CHECK-LABEL:@can_fold_assume_and_all_uses
+; CHECK: @dummy(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: exit()
+; CHECK-NEXT: %notnull2 = or i1 true, false
+; CHECK-NEXT: @f(i1 %notnull2)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @dummy(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
   call void @exit()
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  %notnull2 = or i1 %notnull, false
+  call void @f(i1 %notnull2)
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
+
+declare void @fz(i8)
+; FIXME: We can fold assume to true, and the use after assume, but we do not do so
+; currently, because of the function call after the assume.
+define void @can_fold_assume2(i32* %array) {
+
+; CHECK-LABEL:@can_fold_assume2
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: znotnull = zext i1 %notnull to i8
+; CHECK-NEXT: @f(i1 %notnull)
+; CHECK-NEXT: @f(i1 true)
+; CHECK-NEXT: @fz(i8 %znotnull)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @f(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
+  %znotnull = zext i1 %notnull to i8
+  call void @f(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  call void @f(i1 %notnull)
+  call void @fz(i8 %znotnull)
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
+
+declare void @llvm.experimental.guard(i1, ...)
+; FIXME: We can fold assume to true, but we do not do so
+; because of the guard following the assume.
+define void @can_fold_assume3(i32* %array){
+
+; CHECK-LABEL:@can_fold_assume3
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: guard(i1 %notnull)
+; CHECK-NEXT: znotnull = zext i1 true to i8
+; CHECK-NEXT: @f(i1 true)
+; CHECK-NEXT: @fz(i8 %znotnull)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @f(i1 %notnull)
   call void @llvm.assume(i1 %notnull)
+  call void(i1, ...) @llvm.experimental.guard(i1 %notnull) [ "deopt"() ]
+  %znotnull = zext i1 %notnull to i8
   br i1 %notnull, label %normal, label %error
 
 normal:
+  call void @f(i1 %notnull)
+  call void @fz(i8 %znotnull)
   ret void
 
 error:
@@ -100,6 +209,26 @@
   ret void
 }
 
+
+; can fold all uses and remove the cond
+define void @can_fold_assume4(i32* %array) {
+; CHECK-LABEL: can_fold_assume4
+; CHECK-NOT: notnull
+; CHECK: dummy(i1 true)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @exit()
+  call void @dummy(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
 ; Function Attrs: nounwind
 declare void @llvm.assume(i1) #1
 
Index: llvm/trunk/test/Transforms/JumpThreading/fold-not-thread.ll
===================================================================
--- llvm/trunk/test/Transforms/JumpThreading/fold-not-thread.ll
+++ llvm/trunk/test/Transforms/JumpThreading/fold-not-thread.ll
@@ -133,10 +133,10 @@
   ret void
 }
 
-; FIXME: Make sure we can do the RAUW for %add...
+; Make sure we can do the RAUW for %add...
 ;
 ; CHECK-LABEL: @rauw_if_possible(
-; CHECK: call void @f4(i32 %add)
+; CHECK: call void @f4(i32 96)
 define void @rauw_if_possible(i32 %value) nounwind {
 entry:
   %cmp = icmp eq i32 %value, 32
Index: llvm/trunk/test/Transforms/JumpThreading/guards.ll
===================================================================
--- llvm/trunk/test/Transforms/JumpThreading/guards.ll
+++ llvm/trunk/test/Transforms/JumpThreading/guards.ll
@@ -182,86 +182,89 @@
   ret void
 }
 
-declare void @never_called()
+declare void @never_called(i1)
 
-; Assume the guard is always taken and we deoptimize, so we never reach the
-; branch below that guard. We should *never* change the behaviour of a guard from
-; `must deoptimize` to `may deoptimize`, since this affects the program
-; semantics.
+; LVI uses guard to identify value of %c2 in branch as true, we cannot replace that
+; guard with guard(true & c1).
 define void @dont_fold_guard(i8* %addr, i32 %i, i32 %length) {
 ; CHECK-LABEL: dont_fold_guard
-; CHECK: experimental.guard(i1 %wide.chk)
-
-entry:
-  br label %BBPred
+; CHECK: %wide.chk = and i1 %c1, %c2
+; CHECK-NEXT: experimental.guard(i1 %wide.chk)
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
+  %c1 = icmp ult i32 %i, %length
+  %c2 = icmp eq i32 %i, 0
+  %wide.chk = and i1 %c1, %c2
+  call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+  br i1 %c2, label %BB1, label %BB2
 
-BBPred:
- %cond = icmp eq i8* %addr, null
- br i1 %cond, label %zero, label %not_zero
+BB1:
+  call void @never_called(i1 %c2)
+  ret void
 
-zero:
-  unreachable
+BB2:
+  ret void
+}
 
-not_zero:
+declare void @dummy(i1) nounwind argmemonly
+; same as dont_fold_guard1 but there's a use immediately after guard and before
+; branch. We can fold that use.
+define void @dont_fold_guard2(i8* %addr, i32 %i, i32 %length) {
+; CHECK-LABEL: dont_fold_guard2
+; CHECK: %wide.chk = and i1 %c1, %c2
+; CHECK-NEXT: experimental.guard(i1 %wide.chk)
+; CHECK-NEXT: dummy(i1 true)
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
   %c1 = icmp ult i32 %i, %length
   %c2 = icmp eq i32 %i, 0
   %wide.chk = and i1 %c1, %c2
   call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
-  br i1 %c2, label %unreachedBB2, label %unreachedBB1
+  call void @dummy(i1 %c2)
+  br i1 %c2, label %BB1, label %BB2
 
-unreachedBB2:
-  call void @never_called()
+BB1:
+  call void @never_called(i1 %c2)
   ret void
 
-unreachedBB1:
+BB2:
   ret void
 }
 
-
 ; same as dont_fold_guard1 but condition %cmp is not an instruction.
 ; We cannot fold the guard under any circumstance.
 ; FIXME: We can merge unreachableBB2 into not_zero.
-define void @dont_fold_guard2(i8* %addr, i1 %cmp, i32 %i, i32 %length) {
-; CHECK-LABEL: dont_fold_guard2
+define void @dont_fold_guard3(i8* %addr, i1 %cmp, i32 %i, i32 %length) {
+; CHECK-LABEL: dont_fold_guard3
 ; CHECK: guard(i1 %cmp)
-
-entry:
-  br label %BBPred
-
-BBPred:
- %cond = icmp eq i8* %addr, null
- br i1 %cond, label %zero, label %not_zero
-
-zero:
-  unreachable
-
-not_zero:
   call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
-  br i1 %cmp, label %unreachedBB2, label %unreachedBB1
+  br i1 %cmp, label %BB1, label %BB2
 
-unreachedBB2:
-  call void @never_called()
+BB1:
+  call void @never_called(i1 %cmp)
   ret void
 
-unreachedBB1:
+BB2:
   ret void
 }
 
+declare void @f(i1)
 ; Same as dont_fold_guard1 but use switch instead of branch.
 ; triggers source code `ProcessThreadableEdges`.
-declare void @f(i1)
-define void @dont_fold_guard3(i1 %cmp1, i32 %i) nounwind {
-; CHECK-LABEL: dont_fold_guard3 
+define void @dont_fold_guard4(i1 %cmp1, i32 %i) nounwind {
+; CHECK-LABEL: dont_fold_guard4 
 ; CHECK-LABEL: L2:
 ; CHECK-NEXT: %cmp = icmp eq i32 %i, 0 
 ; CHECK-NEXT: guard(i1 %cmp)
-; CHECK-NEXT: @f(i1 %cmp)
+; CHECK-NEXT: dummy(i1 true)
+; CHECK-NEXT: @f(i1 true)
 ; CHECK-NEXT: ret void
 entry:
   br i1 %cmp1, label %L0, label %L3 
 L0:
   %cmp = icmp eq i32 %i, 0
   call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  call void @dummy(i1 %cmp)
   switch i1 %cmp, label %L3 [
     i1 false, label %L1
     i1 true, label %L2