Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -1289,6 +1289,36 @@ if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we dont/cant duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredToDestList.size() == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + bool SeenFirstBranchToOnlyDest = false; + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. + else + SuccBB->removePredecessor(BB, true); // This is unreachable successor. + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + auto *CondInst = dyn_cast(Cond); + if (CondInst && CondInst->use_empty()) + CondInst->eraseFromParent(); + // FIXME: in case this instruction is defined in the current BB and it + // resolves to a single value from all predecessors, we can do RAUW. + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one Index: test/Transforms/JumpThreading/fold-not-thread.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/fold-not-thread.ll @@ -0,0 +1,135 @@ +; RUN: opt -jump-threading -S -verify < %s | FileCheck %s + +declare i32 @f1() +declare i32 @f2() +declare void @f3() +declare void @f4(i32) + + +; Make sure we update the phi node properly. +; +; CHECK-LABEL: define void @test_br_folding_not_threading_update_phi( +; CHECK: br label %L1 +; Make sure we update the phi node properly here, i.e. we only have 2 predecessors, entry and L0 +; CHECK: %res.0 = phi i32 [ 0, %L0 ], [ 1, %entry ] +define void @test_br_folding_not_threading_update_phi(i32 %val) nounwind { +entry: + %cmp = icmp eq i32 %val, 32 + br i1 %cmp, label %L0, label %L1 +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + switch i32 %val, label %L2 [ + i32 0, label %L1 + i32 32, label %L1 + ] + +L1: + %res.0 = phi i32 [ 0, %L0 ], [ 0, %L0 ], [1, %entry] + call void @f4(i32 %res.0) + ret void +L2: + call void @f3() + ret void +} + +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. L2 is the unreachable block here. +; +; CHECK-LABEL: @test_br_folding_not_threading( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading(i1 %cond) nounwind { +entry: + br i1 %cond, label %L0, label %L3 +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} + + +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. L2 is the unreachable block here. +; With more than 1 predecessors. +; +; CHECK-LABEL: @test_br_folding_not_threading_multiple_preds( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading_multiple_preds(i1 %condx, i1 %cond) nounwind { +entry: + br i1 %condx, label %X0, label %X1 + +X0: + br i1 %cond, label %L0, label %L3 + +X1: + br i1 %cond, label %L0, label %L3 + +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} +