Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -801,6 +801,10 @@ return false; } + // FIXME: we should be able to remove most of this code as we have a more + // general form of folding instead of threading in case all predecessors + // go to a single successor. With the exception of TryToUnfoldSelect which + // we should move into ProcessThreadableEdges. if (CmpInst *CondCmp = dyn_cast(CondInst)) { // If we're branching on a conditional, LVI might be able to determine // it's value at the branch instruction. We only handle comparisons @@ -1227,16 +1231,12 @@ BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + unsigned PredWithKnownSuccessor = 0; for (const auto &PredValue : PredValues) { BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. - // If the predecessor ends with an indirect goto, we can't change its - // destination. - if (isa(Pred->getTerminator())) - continue; - Constant *Val = PredValue.first; BasicBlock *DestBB; @@ -1258,6 +1258,15 @@ else if (OnlyDest != DestBB) OnlyDest = MultipleDestSentinel; + // We know where this predecessor will go. + PredWithKnownSuccessor++; + + // If the predecessor ends with an indirect goto, we can't change its + // destination, so do not push it into the PredToDestList. However, it + // is still a predecessor with known successor target. + if (isa(Pred->getTerminator())) + continue; + PredToDestList.push_back(std::make_pair(Pred, DestBB)); } @@ -1265,6 +1274,35 @@ if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we dont/cant duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredWithKnownSuccessor == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest) + continue; + // This is unreachable successor. + SuccBB->removePredecessor(BB, true); + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + auto *CondInst = dyn_cast(Cond); + if (CondInst && CondInst->use_empty()) + CondInst->eraseFromParent(); + // FIXME: in case this instruction is defined in the current BB and it + // resolves to a single value from all predecessors, we can do RAUW. + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one Index: test/Transforms/JumpThreading/basic.ll =================================================================== --- test/Transforms/JumpThreading/basic.ll +++ test/Transforms/JumpThreading/basic.ll @@ -4,6 +4,46 @@ declare i32 @f2() declare void @f3() +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. +; +; CHECK-LABEL: @test_br_folding_not_threading( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading(i1 %cond, i1 %value) nounwind { +entry: + br i1 %cond, label %L0, label %L3 +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} + define i32 @test1(i1 %cond) { ; CHECK-LABEL: @test1(