Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -253,6 +253,30 @@ return EverChanged; } +/// When the branch that uses the Cond is removed because Cond folds to `Val`, +/// try to replace all uses of Cond with `Val` and erase `Cond`. +/// We can safely erase Cond only if there are no guards as uses of `Cond` +/// in the basic block where `Cond` is defined. This is to avoid circular logic +/// where LVI generates `Val` for `Cond` based on the guard, and then we replace the +/// guard with that `Val`. +static void EraseCondIfNoGuard(Instruction *Cond, Constant *Val) { + using namespace PatternMatch; + auto *GuardDecl = Cond->getModule()->getFunction( + Intrinsic::getName(Intrinsic::experimental_guard)); + if (GuardDecl && !GuardDecl->use_empty()) { + // Traversing the uses requires checking for `Cond` being used directly in + // the guard OR used within a widened check for a guard. To avoid traversing + // two levels of uses, just check for presence of guards within the basic block. + auto *BB = Cond->getParent(); + for (auto &I: *BB) + if (match(&I, m_Intrinsic())) + return; + } + Cond->replaceAllUsesWith(Val); + if (!Cond->mayHaveSideEffects()) + Cond->eraseFromParent(); +} + /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block /// when exceeding the threshold. If duplication is impossible, returns ~0U. @@ -839,8 +863,7 @@ auto *CI = Ret == LazyValueInfo::True ? ConstantInt::getTrue(CondCmp->getType()) : ConstantInt::getFalse(CondCmp->getType()); - CondCmp->replaceAllUsesWith(CI); - CondCmp->eraseFromParent(); + EraseCondIfNoGuard(CondCmp, CI); } return true; } @@ -1331,9 +1354,7 @@ CondInst->getParent() == BB) { // If we just learned Cond is the same value for all uses of the // condition, replace it with a constant value - CondInst->replaceAllUsesWith(OnlyVal); - if (!CondInst->mayHaveSideEffects()) - CondInst->eraseFromParent(); + EraseCondIfNoGuard(CondInst, OnlyVal); } } return true; Index: test/Transforms/JumpThreading/guards.ll =================================================================== --- test/Transforms/JumpThreading/guards.ll +++ test/Transforms/JumpThreading/guards.ll @@ -181,3 +181,94 @@ ; CHECK-NEXT: ret void ret void } + +declare void @never_called() + +; Assume the guard is always taken and we deoptimize, so we never reach the +; branch below that guard. We should *never* change the behaviour of a guard from +; `must deoptimize` to `may deoptimize`, since this affects the program +; semantics. +define void @dont_fold_guard(i8* %addr, i32 %i, i32 %length) { +entry: + br label %BBPred + +BBPred: + %cond = icmp eq i8* %addr, null + br i1 %cond, label %zero, label %not_zero + +zero: + unreachable + +not_zero: + %c1 = icmp ult i32 %i, %length + %c2 = icmp eq i32 %i, 0 + %wide.chk = and i1 %c1, %c2 + call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] + br i1 %c2, label %unreachedBB2, label %unreachedBB1 + +unreachedBB2: + call void @never_called() + ret void + +unreachedBB1: + ret void +} + + +; same as dont_fold_guard1 but condition %cmp is not an instruction. +; We cannot fold the guard under any circumstance. +; FIXME: We can merge unreachableBB2 into not_zero. +define void @dont_fold_guard2(i8* %addr, i1 %cmp, i32 %i, i32 %length) { +; CHECK-LABEL: dont_fold_guard2 +; CHECK: guard(i1 %cmp) + +entry: + br label %BBPred + +BBPred: + %cond = icmp eq i8* %addr, null + br i1 %cond, label %zero, label %not_zero + +zero: + unreachable + +not_zero: + call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ] + br i1 %cmp, label %unreachedBB2, label %unreachedBB1 + +unreachedBB2: + call void @never_called() + ret void + +unreachedBB1: + ret void +} + +; Same as dont_fold_guard1 but use switch instead of branch. +; triggers source code `ProcessThreadableEdges`. +declare void @f(i1) +define void @dont_fold_guard3(i1 %cmp1, i32 %i) nounwind { +; CHECK-LABEL: dont_fold_guard3 +; CHECK-LABEL: L2: +; CHECK-NEXT: %cmp = icmp eq i32 %i, 0 +; CHECK-NEXT: guard(i1 %cmp) +; CHECK-NEXT: @f(i1 %cmp) +; CHECK-NEXT: ret void +entry: + br i1 %cmp1, label %L0, label %L3 +L0: + %cmp = icmp eq i32 %i, 0 + call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ] + switch i1 %cmp, label %L3 [ + i1 false, label %L1 + i1 true, label %L2 + ] + +L1: + ret void +L2: + call void @f(i1 %cmp) + ret void +L3: + ret void +}