Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -1620,16 +1620,19 @@ } // If we have exactly one destination, remember it for efficiency below. - if (PredToDestList.empty()) { - OnlyDest = DestBB; - OnlyVal = Val; - } else { - if (OnlyDest != DestBB) - OnlyDest = MultipleDestSentinel; - // It possible we have same destination, but different value, e.g. default - // case in switchinst. - if (Val != OnlyVal) - OnlyVal = MultipleVal; + // and we can ignore null destination. + if (DestBB) { + if (PredToDestList.empty()) { + OnlyDest = DestBB; + OnlyVal = Val; + } else { + if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + // It possible we have same destination, but different value, e.g. + // default case in switchinst. + if (Val != OnlyVal) + OnlyVal = MultipleVal; + } } // We know where this predecessor is going. @@ -1650,46 +1653,53 @@ // If all the predecessors go to a single known successor, we want to fold, // not thread. By doing so, we do not need to duplicate the current block and // also miss potential opportunities in case we dont/cant duplicate. - if (OnlyDest && OnlyDest != MultipleDestSentinel) { if (PredWithKnownDest == (size_t)pred_size(BB)) { - bool SeenFirstBranchToOnlyDest = false; - std::vector Updates; - Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1); - for (BasicBlock *SuccBB : successors(BB)) { - if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) { - SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. - } else { - SuccBB->removePredecessor(BB, true); // This is unreachable successor. - Updates.push_back({DominatorTree::Delete, BB, SuccBB}); + OnlyDest = OnlyDest ? OnlyDest + : BB->getTerminator()->getSuccessor( + GetBestDestForJumpOnUndef(BB)); + if (OnlyDest != MultipleDestSentinel) { + bool SeenFirstBranchToOnlyDest = false; + std::vector Updates; + Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1); + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) { + SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. + } else { + SuccBB->removePredecessor(BB, + true); // This is unreachable successor. + Updates.push_back({DominatorTree::Delete, BB, SuccBB}); + } } - } - // Finally update the terminator. - TerminatorInst *Term = BB->getTerminator(); - BranchInst::Create(OnlyDest, Term); - Term->eraseFromParent(); - DDT->applyUpdates(Updates); - - // If the condition is now dead due to the removal of the old terminator, - // erase it. - if (auto *CondInst = dyn_cast(Cond)) { - if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) - CondInst->eraseFromParent(); - // We can safely replace *some* uses of the CondInst if it has - // exactly one value as returned by LVI. RAUW is incorrect in the - // presence of guards and assumes, that have the `Cond` as the use. This - // is because we use the guards/assume to reason about the `Cond` value - // at the end of block, but RAUW unconditionally replaces all uses - // including the guards/assumes themselves and the uses before the - // guard/assume. - else if (OnlyVal && OnlyVal != MultipleVal && - CondInst->getParent() == BB) - ReplaceFoldableUses(CondInst, OnlyVal); - } - return true; + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + DDT->applyUpdates(Updates); + + // If the condition is now dead due to the removal of the old + // terminator, erase it. + if (auto *CondInst = dyn_cast(Cond)) { + if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + // We can safely replace *some* uses of the CondInst if it has + // exactly one value as returned by LVI. RAUW is incorrect in the + // presence of guards and assumes, that have the `Cond` as the use. + // This is because we use the guards/assume to reason about the `Cond` + // value at the end of block, but RAUW unconditionally replaces all + // uses including the guards/assumes themselves and the uses before + // the guard/assume. + else if (OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) + ReplaceFoldableUses(CondInst, OnlyVal); + } + return true; } } + // At this point, we did not manage to fold the block. Try to thread some + // predecessors out. + // // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one Index: test/Transforms/JumpThreading/fold-not-thread.ll =================================================================== --- test/Transforms/JumpThreading/fold-not-thread.ll +++ test/Transforms/JumpThreading/fold-not-thread.ll @@ -244,3 +244,93 @@ call void @f3() ret void } + +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. LX is the unreachable block here, and it feeds +; UNDEF to L2. We want to make sure the UNDEF does not make jump threading think +; L2 has multiple destinations. +; CHECK-LABEL: @test_br_folding_not_threading_multiple_preds_with_some_undef( +; CHECK: L3: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +define void @test_br_folding_not_threading_multiple_preds_with_some_undef(i1 %condx, i1 %cond) nounwind { +L0: + br i1 %condx, label %L2, label %L1 + +L1: + br i1 %cond, label %L2, label %L5 + +LX: + br label %L2 + +L2: + %p = phi i1 [ %condx, %L0 ], [ %cond, %L1 ], [ undef, %LX] + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %p, label %L3, label %L4 + +L3: + call void @f3() + ret void +L4: + call void @f3() + ret void +L5: + call void @f3() + ret void +} + +; All values are UNDEF, make sure we fold. We can not thread L2 as its too big. +; CHECK-LABEL: @test_br_folding_not_threading_multiple_preds_with_all_undef( +; CHECK: L3: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +define void @test_br_folding_not_threading_multiple_preds_with_all_undef(i1 %condx, i1 %cond) nounwind { +L0: + br label %L2 + +L1: + br label %L2 + +L2: + %x = phi i1 [ undef, %L0 ], [ undef, %L1 ] + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + %p = icmp eq i1 %x, 0 + br i1 %p, label %L3, label %L4 + +L3: + call void @f3() + ret void +L4: + call void @f3() + ret void +L5: + call void @f3() + ret void +} + Index: test/Transforms/JumpThreading/pr22086.ll =================================================================== --- test/Transforms/JumpThreading/pr22086.ll +++ test/Transforms/JumpThreading/pr22086.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: entry: ; CHECK-NEXT: br label %[[loop:.*]] ; CHECK: [[loop]]: -; CHECK-NEXT: br label %[[loop]] +; CHECK: br label %[[loop]] define void @f() { entry: