Index: llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp @@ -1289,6 +1289,33 @@ if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we don't/can't duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredToDestList.size() == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB != OnlyDest) + SuccBB->removePredecessor(BB, true); // This is unreachable successor. + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + auto *CondInst = dyn_cast(Cond); + if (CondInst && CondInst->use_empty()) + CondInst->eraseFromParent(); + // FIXME: in case this instruction is defined in the current BB and it + // resolves to a single value from all predecessors, we can do RAUW. + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one Index: llvm/trunk/test/Transforms/JumpThreading/basic.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/basic.ll +++ llvm/trunk/test/Transforms/JumpThreading/basic.ll @@ -4,6 +4,95 @@ declare i32 @f2() declare void @f3() +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. L2 is the unreachable block here. +; +; CHECK-LABEL: @test_br_folding_not_threading( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading(i1 %cond) nounwind { +entry: + br i1 %cond, label %L0, label %L3 +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} + + +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. L2 is the unreachable block here. +; With more than 1 predecessors. +; +; CHECK-LABEL: @test_br_folding_not_threading_multiple_preds( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading_multiple_preds(i1 %condx, i1 %cond) nounwind { +entry: + br i1 %condx, label %X0, label %X1 + +X0: + br i1 %cond, label %L0, label %L3 + +X1: + br i1 %cond, label %L0, label %L3 + +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} + define i32 @test1(i1 %cond) { ; CHECK-LABEL: @test1(