Index: lib/Transforms/Scalar/BDCE.cpp =================================================================== --- lib/Transforms/Scalar/BDCE.cpp +++ lib/Transforms/Scalar/BDCE.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DemandedBits.h" @@ -35,6 +36,59 @@ STATISTIC(NumRemoved, "Number of instructions removed (unused)"); STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)"); +/// If an instruction is trivialized (dead), then the chain of users of that +/// instruction may need to be cleared of assumptions that can no longer be +/// guaranteed correct. +static void clearAssumptionsOfUsers(Instruction *I) { + // If a value may be visible outside of this def/use chain, then we don't need + // to continue processing its users. Ie, all bits of this value should be + // demanded. + // TODO: "Any value in the def/use chain that has all bits set as non-dead + // (by the demanded bit analysis) cannot have its value changed: if all + // bits are needed, all bits must keep the same value before/after + // optimization. That means that our 'taint' propagation (of removing + // attributes/assumptions) can end there." - Nuno Lopes (PR33695) + auto isExternallyVisible = [](Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Store: + case Instruction::Call: + case Instruction::Invoke: + case Instruction::AtomicRMW: + case Instruction::AtomicCmpXchg: + case Instruction::Ret: + return true; + default: + return false; + } + }; + + // Initialize the worklist with eligible direct users. + SmallVector WorkList; + for (User *JU : I->users()) { + auto *J = dyn_cast(JU); + if (J && !isExternallyVisible(J)) + WorkList.push_back(J); + } + + // DFS through subsequent users while tracking visits to avoid cycles. + SmallPtrSet Visited; + while (!WorkList.empty()) { + Instruction *J = WorkList.pop_back_val(); + + // NSW, NUW, and exact are based on operands that might have changed. + J->dropPoisonGeneratingFlags(); + // FIXME: llvm.assume (and range metadata?) may also be invalid now. + + Visited.insert(J); + + for (User *KU : J->users()) { + auto *K = dyn_cast(KU); + if (K && !Visited.count(K) && !isExternallyVisible(K)) + WorkList.push_back(K); + } + } +} + static bool bitTrackingDCE(Function &F, DemandedBits &DB) { SmallVector Worklist; bool Changed = false; @@ -51,6 +105,9 @@ // replacing all uses with something else. Then, if they don't need to // remain live (because they have side effects, etc.) we can remove them. DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + + clearAssumptionsOfUsers(&I); + // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. Index: test/Transforms/BDCE/invalidate-assumptions.ll =================================================================== --- test/Transforms/BDCE/invalidate-assumptions.ll +++ test/Transforms/BDCE/invalidate-assumptions.ll @@ -1,6 +1,6 @@ ; RUN: opt -bdce %s -S | FileCheck %s -; FIXME: The 'nuw' on the subtract allows us to deduce that %setbit is not demanded. +; The 'nuw' on the subtract allows us to deduce that %setbit is not demanded. ; But if we change that value to '0', then the 'nuw' is no longer valid. If we don't ; remove the 'nuw', another pass (-instcombine) may make a transform based on an ; that incorrect assumption and we can miscompile: @@ -11,7 +11,7 @@ ; CHECK-NEXT: [[SETBIT:%.*]] = or i8 %x, 64 ; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8 ; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl i8 0, 1 -; CHECK-NEXT: [[SUB:%.*]] = sub nuw i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]] ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1 ; CHECK-NEXT: ret i1 [[TRUNC]] ; @@ -23,7 +23,7 @@ ret i1 %trunc } -; FIXME: Similar to above, but now with more no-wrap. +; Similar to above, but now with more no-wrap. ; https://bugs.llvm.org/show_bug.cgi?id=34037 define i64 @PR34037(i64 %m, i32 %r, i64 %j, i1 %b, i32 %k, i64 %p) { @@ -34,9 +34,9 @@ ; CHECK-NEXT: [[OR:%.*]] = or i64 %j, 0 ; CHECK-NEXT: [[SHL:%.*]] = shl i64 0, 29 ; CHECK-NEXT: [[CONV1:%.*]] = select i1 %b, i64 7, i64 0 -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 [[SHL]], [[CONV1]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[SHL]], [[CONV1]] ; CHECK-NEXT: [[CONV2:%.*]] = zext i32 %k to i64 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[SUB]], [[CONV2]] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], [[CONV2]] ; CHECK-NEXT: [[CONV4:%.*]] = and i64 %p, 65535 ; CHECK-NEXT: [[AND5:%.*]] = and i64 [[MUL]], [[CONV4]] ; CHECK-NEXT: ret i64 [[AND5]] @@ -55,3 +55,32 @@ ret i64 %and5 } + +; This is a manufactured example based on the 1st test to prove that the +; assumption-killing algorithm stops at the call. Ie, it does not remove +; nsw/nuw from the 'add' because a call demands all bits of its argument. + +declare i1 @foo(i1) +define i1 @poison_on_call_user_is_ok(i1 %b, i8 %x) { +; CHECK-LABEL: @poison_on_call_user_is_ok( +; CHECK-NEXT: [[SETBIT:%.*]] = or i8 %x, 64 +; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8 +; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl i8 0, 1 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1 +; CHECK-NEXT: [[CALL_RESULT:%.*]] = call i1 @foo(i1 [[TRUNC]]) +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i1 [[CALL_RESULT]], true +; CHECK-NEXT: [[MUL:%.*]] = mul i1 [[TRUNC]], [[ADD]] +; CHECK-NEXT: ret i1 [[MUL]] +; + %setbit = or i8 %x, 64 + %little_number = zext i1 %b to i8 + %big_number = shl i8 %setbit, 1 + %sub = sub nuw i8 %big_number, %little_number + %trunc = trunc i8 %sub to i1 + %call_result = call i1 @foo(i1 %trunc) + %add = add nsw nuw i1 %call_result, 1 + %mul = mul i1 %trunc, %add + ret i1 %mul +} +