diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -36,6 +36,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" @@ -156,7 +157,8 @@ bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); /// Try to eliminate loop exits based on analyzeable exit counts - bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); + bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter, + const SimplifyQuery& SQ); /// Try to form loop invariant tests for loop exits by changing how many /// iterations of the loop run when that is unobservable. bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); @@ -1309,6 +1311,30 @@ replaceExitCond(BI, NewCond, DeadInsts); } +static void replaceLoopPHINodesWithPreheaderValues( + Loop *L, + SmallVectorImpl &DeadInsts, + const SimplifyQuery& SQ) { + auto *LoopPreheader = L->getLoopPreheader(); + auto *LoopHeader = L->getHeader(); + for (auto &PN : LoopHeader->phis()) { + auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); + SmallVector UsersToUpdate(PN.users()); + for (User *User : UsersToUpdate) { + // replace PHI use with the preheader incoming value and + // try to simplify the result + User->replaceUsesOfWith(&PN, PreheaderIncoming); + if (Instruction *UserInst = dyn_cast(User)) { + if (Value *V = SimplifyInstruction(UserInst, SQ)) { + UI->replaceAllUsesWith(V); + DeadInsts.emplace_back(UserInst); + } + } + } + DeadInsts.emplace_back(&PN); + } +} + static void replaceWithInvariantCond( const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, @@ -1394,7 +1420,8 @@ return true; } -bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { +bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter, + const SimplifyQuery &SQ) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1454,8 +1481,16 @@ bool Changed = false; bool SkipLastIter = false; - SmallSet DominatingExitCounts; + bool ExitsOnFirstIter = false; + SmallSet DominatingExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { + if (ExitsOnFirstIter) { + // If proved that some earlier exit is taken + // on 1st iteration, then fold this one. + foldExit(L, ExitingBB, true, DeadInsts); + continue; + } + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa(ExitCount)) { // Okay, we do not know the exit count here. Can we at least prove that it @@ -1499,11 +1534,14 @@ // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. - // TODO: Given we know the backedge can't be taken, we should go ahead - // and break it. Or at least, kill all the header phis and simplify. + // We know the backedge can't be taken, so we go ahead and break all + // branches in all further exiting blocks. Also we kill all + // header phis and simplify their uses. if (ExitCount->isZero()) { foldExit(L, ExitingBB, true, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(L, DeadInsts, SQ); Changed = true; + ExitsOnFirstIter = true; continue; } @@ -1774,8 +1812,9 @@ // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); + const SimplifyQuery SQ(DL, TLI); // Try to eliminate loop exits based on analyzeable exit counts - if (optimizeLoopExits(L, Rewriter)) { + if (optimizeLoopExits(L, Rewriter, SQ)) { Changed = true; // Given we've changed exit counts, notify SCEV // Some nested loops may share same folded exit basic block, diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p +; RUN: opt < %s -indvars -S | FileCheck %s +; RUN: opt < %s -passes=indvars -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare i1 @foo(i8*, i8*) +declare i1 @bar() +declare i1 @baz() + +define i1 @kill_backedge_and_phis(i8* align 1 %lhs, i8* align 1 %rhs, i32 %len) { +; CHECK-LABEL: @kill_backedge_and_phis( +; CHECK-NEXT: entry: +; CHECK-NEXT: %length_not_zero = icmp ne i32 %len, 0 +; CHECK-NEXT: br i1 %length_not_zero, label %loop_preheader, label %exit +; CHECK: loop_preheader: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %result = call i1 @foo(i8* %lhs, i8* %rhs) +; CHECK-NEXT: br i1 %result, label %exiting_1, label %exit.loopexit +; CHECK: exiting_1: +; CHECK-NEXT: br i1 false, label %exiting_2, label %exit.loopexit +; CHECK: exiting_2: +; CHECK-NEXT: %bar_ret = call i1 @bar() +; CHECK-NEXT: br i1 true, label %exit.loopexit, label %exiting_3 +; CHECK: exiting_3: +; CHECK-NEXT: %baz_ret = call i1 @baz() +; CHECK-NEXT: br i1 false, label %loop, label %exit.loopexit +; CHECK: exit.loopexit: +; CHECK-NEXT: %val.ph = phi i1 [ %baz_ret, %exiting_3 ], [ %bar_ret, %exiting_2 ], [ false, %exiting_1 ], [ %result, %loop ] +; CHECK-NEXT: br label %exit +; CHECK: exit: +; CHECK-NEXT: %val = phi i1 [ false, %entry ], [ %val.ph, %exit.loopexit ] +; CHECK-NEXT: ret i1 %val +; +entry: + %length_not_zero = icmp ne i32 %len, 0 + br i1 %length_not_zero, label %loop_preheader, label %exit + +loop_preheader: + br label %loop + +loop: + %iv = phi i32 [ 0, %loop_preheader ], [ %iv.next, %latch ] + %iv.wide = phi i64 [ 0, %loop_preheader ], [ %iv.wide.next, %latch ] + %iv.next = add i32 %iv, 1 + %iv.wide.next = add i64 %iv.wide, 1 + %left_ptr = getelementptr inbounds i8, i8* %lhs, i32 %iv + %right_ptr = getelementptr inbounds i8, i8* %rhs, i32 %iv + %result = call i1 @foo(i8* %left_ptr, i8* %right_ptr) + br i1 %result, label %exiting_1, label %exit + +exiting_1: + %iv.wide.is_not_zero = icmp ne i64 %iv.wide, 0 + br i1 %iv.wide.is_not_zero, label %exiting_2, label %exit + +exiting_2: + %bar_ret = call i1 @bar() + br i1 %bar_ret, label %exit, label %exiting_3 + +exiting_3: + %baz_ret = call i1 @baz() + br i1 %baz_ret, label %latch, label %exit + +latch: + %continue = icmp ne i32 %iv.next, %len + br i1 %continue, label %loop, label %exit + +exit: + %val = phi i1 [ %result, %loop ], [ %iv.wide.is_not_zero, %exiting_1 ], + [ %bar_ret, %exiting_2 ], [ %baz_ret, %exiting_3 ], + [ %baz_ret, %latch ], [ 0, %entry ] + ret i1 %val +} diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll @@ -14,10 +14,8 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP:%.*]] = phi i8* [ [[TMP4:%.*]], [[BB7:%.*]] ], [ getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 2), [[BB:%.*]] ] -; CHECK-NEXT: [[TMP4]] = getelementptr inbounds i8, i8* [[TMP]], i64 -1 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1 -; CHECK-NEXT: br i1 false, label [[BB7]], label [[BB11:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 1), align 1 +; CHECK-NEXT: br i1 false, label [[BB7:%.*]], label [[BB11:%.*]] ; CHECK: bb7: ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 ; CHECK-NEXT: br i1 true, label [[BB11]], label [[BB3]] diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll --- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll +++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll @@ -8,7 +8,7 @@ ; CHECK: bb: ; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[DOTINT:%.*]], [[BB]] ] ; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) #[[ATTR0:[0-9]+]] ; CHECK-NEXT: [[DOTINT]] = add nuw nsw i32 [[IV_INT]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[DOTINT]], 10000 ; CHECK-NEXT: br i1 [[TMP1]], label [[BB]], label [[RETURN:%.*]] @@ -38,7 +38,7 @@ ; CHECK: bb: ; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[DOTINT:%.*]], [[BB]] ] ; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) #[[ATTR0]] ; CHECK-NEXT: [[DOTINT]] = add nsw i32 [[IV_INT]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[DOTINT]], -1 ; CHECK-NEXT: br i1 [[TMP1]], label [[BB]], label [[RETURN:%.*]] @@ -65,9 +65,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[IV:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[BB]] ] -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV]]) [[ATTR0]] -; CHECK-NEXT: [[TMP1]] = fadd double [[IV]], 1.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double 0.000000e+00) #[[ATTR0]] ; CHECK-NEXT: br i1 false, label [[BB]], label [[RETURN:%.*]] ; CHECK: return: ; CHECK-NEXT: ret void @@ -93,7 +91,7 @@ ; CHECK: bb: ; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ 40, [[ENTRY:%.*]] ], [ [[DOTINT:%.*]], [[BB]] ] ; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) #[[ATTR0]] ; CHECK-NEXT: [[DOTINT]] = add nsw i32 [[IV_INT]], -1 ; CHECK-NEXT: br i1 false, label [[BB]], label [[RETURN:%.*]] ; CHECK: return: @@ -272,7 +270,7 @@ ; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[RETURN:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #[[ATTR0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]] @@ -308,7 +306,7 @@ ; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[RETURN:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #[[ATTR0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]] @@ -344,7 +342,7 @@ ; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[RETURN:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #[[ATTR0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]]