Index: lib/Transforms/Scalar/BDCE.cpp
===================================================================
--- lib/Transforms/Scalar/BDCE.cpp
+++ lib/Transforms/Scalar/BDCE.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/BDCE.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/DemandedBits.h"
@@ -35,6 +36,59 @@
 STATISTIC(NumRemoved, "Number of instructions removed (unused)");
 STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
 
+/// If an instruction is trivialized (dead), then the chain of users of that
+/// instruction may need to be cleared of assumptions that can no longer be
+/// guaranteed correct.
+static void clearAssumptionsOfUsers(Instruction *I) {
+  // If a value may be visible outside of this def/use chain, then we don't need
+  // to continue processing its users. Ie, all bits of this value should be
+  // demanded.
+  // TODO: "Any value in the def/use chain that has all bits set as non-dead
+  //        (by the demanded bit analysis) cannot have its value changed: if all
+  //        bits are needed, all bits must keep the same value before/after
+  //        optimization. That means that our 'taint' propagation (of removing
+  //        attributes/assumptions) can end there." - Nuno Lopes (PR33695)
+  auto isExternallyVisible = [](Instruction *I) {
+    switch (I->getOpcode()) {
+      case Instruction::Store:
+      case Instruction::Call:
+      case Instruction::Invoke:
+      case Instruction::AtomicRMW:
+      case Instruction::AtomicCmpXchg:
+      case Instruction::Ret:
+        return true;
+      default:
+        return false;
+    }
+  };
+
+  // Initialize the worklist with eligible direct users.
+  SmallVector<Instruction *, 16> WorkList;
+  for (User *JU : I->users()) {
+    auto *J = dyn_cast<Instruction>(JU);
+    if (J && !isExternallyVisible(J))
+      WorkList.push_back(J);
+  }
+
+  // DFS through subsequent users while tracking visits to avoid cycles.
+  SmallPtrSet<Instruction *, 16> Visited;
+  while (!WorkList.empty()) {
+    Instruction *J = WorkList.pop_back_val();
+
+    // NSW, NUW, and exact are based on operands that might have changed.
+    J->dropPoisonGeneratingFlags();
+    // FIXME: llvm.assume (and range metadata?) may also be invalid now.
+
+    Visited.insert(J);
+
+    for (User *KU : J->users()) {
+      auto *K = dyn_cast<Instruction>(KU);
+      if (K && !Visited.count(K) && !isExternallyVisible(K))
+        WorkList.push_back(K);
+    }
+  }
+}
+
 static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
   SmallVector<Instruction*, 128> Worklist;
   bool Changed = false;
@@ -51,6 +105,9 @@
       // replacing all uses with something else. Then, if they don't need to
       // remain live (because they have side effects, etc.) we can remove them.
       DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+
+      clearAssumptionsOfUsers(&I);
+
       // FIXME: In theory we could substitute undef here instead of zero.
       // This should be reconsidered once we settle on the semantics of
       // undef, poison, etc.
Index: test/Transforms/BDCE/invalidate-assumptions.ll
===================================================================
--- test/Transforms/BDCE/invalidate-assumptions.ll
+++ test/Transforms/BDCE/invalidate-assumptions.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -bdce %s -S | FileCheck %s
 
-; FIXME: The 'nuw' on the subtract allows us to deduce that %setbit is not demanded.
+; The 'nuw' on the subtract allows us to deduce that %setbit is not demanded.
 ; But if we change that value to '0', then the 'nuw' is no longer valid. If we don't
 ; remove the 'nuw', another pass (-instcombine) may make a transform based on an
 ; that incorrect assumption and we can miscompile:
@@ -11,7 +11,7 @@
 ; CHECK-NEXT:    [[SETBIT:%.*]] = or i8 %x, 64
 ; CHECK-NEXT:    [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8
 ; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl i8 0, 1
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]]
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1
 ; CHECK-NEXT:    ret i1 [[TRUNC]]
 ;
@@ -23,7 +23,7 @@
   ret i1 %trunc
 }
 
-; FIXME: Similar to above, but now with more no-wrap.
+; Similar to above, but now with more no-wrap.
 ; https://bugs.llvm.org/show_bug.cgi?id=34037
 
 define i64 @PR34037(i64 %m, i32 %r, i64 %j, i1 %b, i32 %k, i64 %p) {
@@ -34,9 +34,9 @@
 ; CHECK-NEXT:    [[OR:%.*]] = or i64 %j, 0
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i64 0, 29
 ; CHECK-NEXT:    [[CONV1:%.*]] = select i1 %b, i64 7, i64 0
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i64 [[SHL]], [[CONV1]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[SHL]], [[CONV1]]
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i32 %k to i64
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[SUB]], [[CONV2]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[SUB]], [[CONV2]]
 ; CHECK-NEXT:    [[CONV4:%.*]] = and i64 %p, 65535
 ; CHECK-NEXT:    [[AND5:%.*]] = and i64 [[MUL]], [[CONV4]]
 ; CHECK-NEXT:    ret i64 [[AND5]]
@@ -55,3 +55,32 @@
   ret i64 %and5
 }
 
+
+; This is a manufactured example based on the 1st test to prove that the
+; assumption-killing algorithm stops at the call. Ie, it does not remove
+; nsw/nuw from the 'add' because a call demands all bits of its argument.
+
+declare i1 @foo(i1)
+define i1 @poison_on_call_user_is_ok(i1 %b, i8 %x) {
+; CHECK-LABEL: @poison_on_call_user_is_ok(
+; CHECK-NEXT:    [[SETBIT:%.*]] = or i8 %x, 64
+; CHECK-NEXT:    [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8
+; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl i8 0, 1
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1
+; CHECK-NEXT:    [[CALL_RESULT:%.*]] = call i1 @foo(i1 [[TRUNC]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i1 [[CALL_RESULT]], true
+; CHECK-NEXT:    [[MUL:%.*]] = mul i1 [[TRUNC]], [[ADD]]
+; CHECK-NEXT:    ret i1 [[MUL]]
+;
+  %setbit = or i8 %x, 64
+  %little_number = zext i1 %b to i8
+  %big_number = shl i8 %setbit, 1
+  %sub = sub nuw i8 %big_number, %little_number
+  %trunc = trunc i8 %sub to i1
+  %call_result = call i1 @foo(i1 %trunc)
+  %add = add nsw nuw i1 %call_result, 1
+  %mul = mul i1 %trunc, %add
+  ret i1 %mul
+}
+