diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1309,6 +1309,17 @@ replaceExitCond(BI, NewCond, DeadInsts); } +static void replaceLoopPHINodesWithPreheaderValues( + Loop *L, SmallVectorImpl &DeadInsts) { + auto *LoopPreheader = L->getLoopPreheader(); + auto *LoopHeader = L->getHeader(); + for (auto &PN : LoopHeader->phis()) { + auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); + PN.replaceAllUsesWith(PreheaderIncoming); + DeadInsts.emplace_back(&PN); + } +} + static void replaceWithInvariantCond( const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, @@ -1454,8 +1465,16 @@ bool Changed = false; bool SkipLastIter = false; - SmallSet DominatingExitCounts; + bool ExitsOnFirstIter = false; + SmallSet DominatingExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { + if (ExitsOnFirstIter) { + // If proved that some earlier exit is taken + // on 1st iteration, then fold this one. + foldExit(L, ExitingBB, true, DeadInsts); + continue; + } + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa(ExitCount)) { // Okay, we do not know the exit count here. Can we at least prove that it @@ -1499,11 +1518,14 @@ // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. - // TODO: Given we know the backedge can't be taken, we should go ahead - // and break it. Or at least, kill all the header phis and simplify. + // We know the backedge can't be taken, so we go ahead and break all + // branches in all further exiting blocks. Also we replace all header + // phis with their values coming from the preheader. if (ExitCount->isZero()) { foldExit(L, ExitingBB, true, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); Changed = true; + ExitsOnFirstIter = true; continue; } @@ -1775,7 +1797,7 @@ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); // Try to eliminate loop exits based on analyzeable exit counts - if (optimizeLoopExits(L, Rewriter)) { + if (optimizeLoopExits(L, Rewriter)) { Changed = true; // Given we've changed exit counts, notify SCEV // Some nested loops may share same folded exit basic block, diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll --- a/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-backedge.ll @@ -15,25 +15,19 @@ ; CHECK: loop_preheader: ; CHECK-NEXT: br label %loop ; CHECK: loop: -; CHECK-NEXT: %iv = phi i32 [ 0, %loop_preheader ], [ %iv.next, %exiting_3 ] -; CHECK-NEXT: %iv.wide = phi i64 [ 0, %loop_preheader ], [ %iv.wide.next, %exiting_3 ] -; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1 -; CHECK-NEXT: %iv.wide.next = add nuw nsw i64 %iv.wide, 1 -; CHECK-NEXT: %left_ptr = getelementptr inbounds i8, i8* %lhs, i32 %iv -; CHECK-NEXT: %right_ptr = getelementptr inbounds i8, i8* %rhs, i32 %iv +; CHECK-NEXT: %left_ptr = getelementptr inbounds i8, i8* %lhs, i32 0 +; CHECK-NEXT: %right_ptr = getelementptr inbounds i8, i8* %rhs, i32 0 ; CHECK-NEXT: %result = call i1 @foo(i8* %left_ptr, i8* %right_ptr) ; CHECK-NEXT: br i1 %result, label %exiting_1, label %exit.loopexit ; CHECK: exiting_1: -; CHECK-NEXT: %iv.wide.is_not_zero = icmp ne i64 %iv.wide, 0 +; CHECK-NEXT: %iv.wide.is_not_zero = icmp ne i64 0, 0 ; CHECK-NEXT: br i1 false, label %exiting_2, label %exit.loopexit ; CHECK: exiting_2: ; CHECK-NEXT: %bar_ret = call i1 @bar() -; CHECK-NEXT: br i1 %bar_ret, label %exit.loopexit, label %exiting_3 +; CHECK-NEXT: br i1 true, label %exit.loopexit, label %exiting_3 ; CHECK: exiting_3: ; CHECK-NEXT: %baz_ret = call i1 @baz() -; CHECK-NEXT: %continue = icmp ne i32 %iv.next, %len -; CHECK-NEXT: %or.cond = select i1 %baz_ret, i1 %continue, i1 false -; CHECK-NEXT: br i1 %or.cond, label %loop, label %exit.loopexit +; CHECK-NEXT: br i1 false, label %loop, label %exit.loopexit ; CHECK: exit.loopexit: ; CHECK-NEXT: %val.ph = phi i1 [ %baz_ret, %exiting_3 ], [ %bar_ret, %exiting_2 ], [ %iv.wide.is_not_zero, %exiting_1 ], [ %result, %loop ] ; CHECK-NEXT: br label %exit diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll @@ -14,10 +14,9 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP:%.*]] = phi i8* [ [[TMP4:%.*]], [[BB7:%.*]] ], [ getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 2), [[BB:%.*]] ] -; CHECK-NEXT: [[TMP4]] = getelementptr inbounds i8, i8* [[TMP]], i64 -1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 2), i64 -1 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1 -; CHECK-NEXT: br i1 false, label [[BB7]], label [[BB11:%.*]] +; CHECK-NEXT: br i1 false, label [[BB7:%.*]], label [[BB11:%.*]] ; CHECK: bb7: ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 ; CHECK-NEXT: br i1 true, label [[BB11]], label [[BB3]] diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll --- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll +++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll @@ -8,7 +8,7 @@ ; CHECK: bb: ; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[DOTINT:%.*]], [[BB]] ] ; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) #[[ATTR0:[0-9]+]] ; CHECK-NEXT: [[DOTINT]] = add nuw nsw i32 [[IV_INT]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[DOTINT]], 10000 ; CHECK-NEXT: br i1 [[TMP1]], label [[BB]], label [[RETURN:%.*]] @@ -38,7 +38,7 @@ ; CHECK: bb: ; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[DOTINT:%.*]], [[BB]] ] ; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) #[[ATTR0]] ; CHECK-NEXT: [[DOTINT]] = add nsw i32 [[IV_INT]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[DOTINT]], -1 ; CHECK-NEXT: br i1 [[TMP1]], label [[BB]], label [[RETURN:%.*]] @@ -65,9 +65,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[IV:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[BB]] ] -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV]]) [[ATTR0]] -; CHECK-NEXT: [[TMP1]] = fadd double [[IV]], 1.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double 0.000000e+00) #[[ATTR0]] ; CHECK-NEXT: br i1 false, label [[BB]], label [[RETURN:%.*]] ; CHECK: return: ; CHECK-NEXT: ret void @@ -93,7 +91,7 @@ ; CHECK: bb: ; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ 40, [[ENTRY:%.*]] ], [ [[DOTINT:%.*]], [[BB]] ] ; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[INDVAR_CONV]]) #[[ATTR0]] ; CHECK-NEXT: [[DOTINT]] = add nsw i32 [[IV_INT]], -1 ; CHECK-NEXT: br i1 false, label [[BB]], label [[RETURN:%.*]] ; CHECK: return: @@ -272,7 +270,7 @@ ; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[RETURN:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #[[ATTR0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]] @@ -308,7 +306,7 @@ ; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[RETURN:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #[[ATTR0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]] @@ -344,7 +342,7 @@ ; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[RETURN:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) [[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #[[ATTR0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]]