diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1309,6 +1309,19 @@ replaceExitCond(BI, NewCond, DeadInsts); } +static void replaceLoopPHINodesWithPreheaderValues(Loop *L) { + auto *LoopPreheader = L->getLoopPreheader(); + auto *LoopHeader = L->getHeader(); + SmallVector LoopPHINodes; + for (auto &PN : LoopHeader->phis()) { + PN.replaceAllUsesWith(PN.getIncomingValueForBlock(LoopPreheader)); + LoopPHINodes.push_back(&PN); + } + for (auto *PN : LoopPHINodes) { + PN->eraseFromParent(); + } +} + static void replaceWithInvariantCond( const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, @@ -1454,8 +1467,17 @@ bool Changed = false; bool SkipLastIter = false; + bool ExitsOnFirstIter = false; + BasicBlock *FirstIterExitingBB = nullptr; SmallSet DominatingExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { + if (ExitsOnFirstIter) { + // If proved that some earlier exit (FirstIterExitingBB) is taken + // on 1st iteration, then fold this one. + foldExit(L, ExitingBB, true, DeadInsts); + continue; + } + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa(ExitCount)) { // Okay, we do not know the exit count here. Can we at least prove that it @@ -1499,11 +1521,12 @@ // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. - // TODO: Given we know the backedge can't be taken, we should go ahead - // and break it. Or at least, kill all the header phis and simplify. if (ExitCount->isZero()) { foldExit(L, ExitingBB, true, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(L); Changed = true; + ExitsOnFirstIter = true; + FirstIterExitingBB = ExitingBB; continue; } diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p +; RUN: opt < %s -indvars -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare i1 @foo(i8*, i8*) +declare i1 @bar() +declare i1 @baz() + +define i1 @kill_backedge_and_phis(i8* align 1 %lhs, i8* align 1 %rhs, i32 %len) { +; CHECK-LABEL: @kill_backedge_and_phis( +; CHECK-NEXT: entry: +; CHECK-NEXT: %length_not_zero = icmp ne i32 %len, 0 +; CHECK-NEXT: br i1 %length_not_zero, label %loop_preheader, label %exit +; CHECK: loop_preheader: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %iv.wide.next = add nuw nsw i64 0, 1 +; CHECK-NEXT: %left_ptr = getelementptr inbounds i8, i8* %lhs, i32 0 +; CHECK-NEXT: %right_ptr = getelementptr inbounds i8, i8* %rhs, i32 0 +; CHECK-NEXT: %result = call i1 @foo(i8* %left_ptr, i8* %right_ptr) +; CHECK-NEXT: br i1 %result, label %exiting_1, label %exit.loopexit +; CHECK: exiting_1: +; CHECK-NEXT: %iv.wide.is_not_zero = icmp ne i64 0, 0 +; CHECK-NEXT: br i1 false, label %exiting_2, label %exit.loopexit +; CHECK: exiting_2: +; CHECK-NEXT: %bar_ret = call i1 @bar() +; CHECK-NEXT: br i1 false, label %exiting_3, label %exit.loopexit +; CHECK: exiting_3: +; CHECK-NEXT: %baz_ret = call i1 @baz() +; CHECK-NEXT: br i1 false, label %loop, label %exit.loopexit +; CHECK: exit.loopexit: +; CHECK-NEXT: %val.ph = phi i1 [ %iv.wide.is_not_zero, %exiting_3 ], [ %iv.wide.is_not_zero, %exiting_2 ], [ %iv.wide.is_not_zero, %exiting_1 ], [ %result, %loop ] +; CHECK-NEXT: br label %exit +; CHECK: exit: +; CHECK-NEXT: %val = phi i1 [ false, %entry ], [ %val.ph, %exit.loopexit ] +; CHECK-NEXT: ret i1 %val +; +entry: + %length_not_zero = icmp ne i32 %len, 0 + br i1 %length_not_zero, label %loop_preheader, label %exit + +loop_preheader: + br label %loop + +loop: + %iv = phi i32 [ 0, %loop_preheader ], [ %iv.next, %latch ] + %iv.wide = phi i64 [ 0, %loop_preheader ], [ %iv.wide.next, %latch ] + %iv.next = add i32 %iv, 1 + %iv.wide.next = add i64 %iv.wide, 1 + %left_ptr = getelementptr inbounds i8, i8* %lhs, i32 %iv + %right_ptr = getelementptr inbounds i8, i8* %rhs, i32 %iv + %result = call i1 @foo(i8* %left_ptr, i8* %right_ptr) + br i1 %result, label %exiting_1, label %exit + +exiting_1: + %iv.wide.is_not_zero = icmp ne i64 %iv.wide, 0 + br i1 %iv.wide.is_not_zero, label %exiting_2, label %exit + +exiting_2: + %bar_ret = call i1 @bar() + br i1 %bar_ret, label %exiting_3, label %exit + +exiting_3: + %baz_ret = call i1 @baz() + br i1 %baz_ret, label %latch, label %exit + +latch: + %continue = icmp ne i32 %iv.next, %len + br i1 %continue, label %loop, label %exit + +exit: + %val = phi i1 [ %result, %loop ], [ %iv.wide.is_not_zero, %exiting_1 ], + [ %bar_ret, %exiting_2 ], [ %baz_ret, %exiting_3 ], + [ %baz_ret, %latch ], [ 0, %entry ] + ret i1 %val +}