Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -1250,17 +1250,15 @@ BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + Constant *OnlyVal = nullptr; + Constant *MultipleVal = (Constant *)(intptr_t)~0ULL; + unsigned PredWithKnownSuccessor = 0; for (const auto &PredValue : PredValues) { BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. - // If the predecessor ends with an indirect goto, we can't change its - // destination. - if (isa(Pred->getTerminator())) - continue; - Constant *Val = PredValue.first; BasicBlock *DestBB; @@ -1277,10 +1275,26 @@ } // If we have exactly one destination, remember it for efficiency below. - if (PredToDestList.empty()) + if (PredToDestList.empty()) { OnlyDest = DestBB; - else if (OnlyDest != DestBB) - OnlyDest = MultipleDestSentinel; + OnlyVal = Val; + } else { + if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + // It possible we have same destination, but different value, e.g. default + // case in switchinst. + if (Val != OnlyVal) + OnlyVal = MultipleVal; + } + + // We know where this predecessor will go. + PredWithKnownSuccessor++; + + // If the predecessor ends with an indirect goto, we can't change its + // destination, so do not push it into the PredToDestList. However, it + // is still a predecessor with known successor target. + if (isa(Pred->getTerminator())) + continue; PredToDestList.push_back(std::make_pair(Pred, DestBB)); } @@ -1289,6 +1303,40 @@ if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we dont/cant duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredWithKnownSuccessor == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest) + continue; + // This is unreachable successor. + SuccBB->removePredecessor(BB, true); + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + auto *CondInst = dyn_cast(Cond); + if (CondInst && CondInst->use_empty()) + CondInst->eraseFromParent(); + else if (CondInst && OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) { + // If the fact we just learned is the same value for all uses of the + // condition, replace it with a constant value + CondInst->replaceAllUsesWith(OnlyVal); + CondInst->eraseFromParent(); + } + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one Index: test/Transforms/JumpThreading/basic.ll =================================================================== --- test/Transforms/JumpThreading/basic.ll +++ test/Transforms/JumpThreading/basic.ll @@ -3,7 +3,75 @@ declare i32 @f1() declare i32 @f2() declare void @f3() +declare void @f4(i32) +; Make sure we can do the RAUW for %add... +; +; CHECK-LABEL: @rauw_if_possible( +; CHECK: call void @f4(i32 96) +define void @rauw_if_possible(i32 %value) nounwind { +entry: + %cmp = icmp eq i32 %value, 32 + br i1 %cmp, label %L0, label %L3 +L0: + call i32 @f2() + call i32 @f2() + %add = add i32 %value, 64 + switch i32 %add, label %L3 [ + i32 32, label %L1 + i32 96, label %L2 + ] + +L1: + call void @f3() + ret void +L2: + call void @f4(i32 %add) + ret void +L3: + call void @f3() + ret void +} + +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. +; +; CHECK-LABEL: @test_br_folding_not_threading( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading(i1 %cond, i1 %value) nounwind { +entry: + br i1 %cond, label %L0, label %L3 +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} define i32 @test1(i1 %cond) { ; CHECK-LABEL: @test1(