diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -333,6 +333,31 @@ return DesiredPeelCount; } +/// This "heuristic" exactly matches implicit behavior which used to exist +/// inside getLoopEstimatedTripCount. It was added here to keep an +/// improvement inside that API from causing peeling to become more agressive. +/// This should probably be removed. +static bool violatesLegacyMultiExitLoopCheck(Loop *L) { + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return true; + + BranchInst *LatchBR = dyn_cast(Latch->getTerminator()); + if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) + return true; + + assert((LatchBR->getSuccessor(0) == L->getHeader() || + LatchBR->getSuccessor(1) == L->getHeader()) && + "At least one edge out of the latch must go to the header"); + + SmallVector ExitBlocks; + L->getUniqueNonLatchExitBlocks(ExitBlocks); + return any_of(ExitBlocks, [](const BasicBlock *EB) { + return !EB->getTerminatingDeoptimizeCall(); + }); +} + + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, @@ -436,6 +461,8 @@ // We only do this in the presence of profile information, since otherwise // our estimates of the trip count are not reliable enough. if (L->getHeader()->getParent()->hasProfileData()) { + if (violatesLegacyMultiExitLoopCheck(L)) + return; Optional PeelCount = getLoopEstimatedTripCount(L); if (!PeelCount) return; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -773,8 +773,8 @@ } -/// Checks if \p L has single exit through latch block except possibly -/// "deoptimizing" exits. Returns branch instruction terminating the loop +/// Checks if \p L has an exiting latch branch. There may also be other +/// exiting blocks. Returns branch instruction terminating the loop /// latch if above check is successful, nullptr otherwise. static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { BasicBlock *Latch = L->getLoopLatch(); @@ -789,21 +789,16 @@ LatchBR->getSuccessor(1) == L->getHeader()) && "At least one edge out of the latch must go to the header"); - SmallVector ExitBlocks; - L->getUniqueNonLatchExitBlocks(ExitBlocks); - if (any_of(ExitBlocks, [](const BasicBlock *EB) { - return !EB->getTerminatingDeoptimizeCall(); - })) - return nullptr; - return LatchBR; } Optional llvm::getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight) { - // Support loops with an exiting latch and other existing exists only - // deoptimize. + // Currently we take the estimate exit count only from the loop latch, + // ignoring other exiting blocks. This can overestimate the trip count + // if we exit through another exit, but can never underestimate it. + // TODO: incorporate information from other exits BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); if (!LatchBranch) return None; @@ -834,8 +829,9 @@ bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedloopInvocationWeight) { - // Support loops with an exiting latch and other existing exists only - // deoptimize. + // At the moment, we currently support changing the estimate trip count of + // the latch branch only. We could extend this API to manipulate estimated + // trip counts for any exit. BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); if (!LatchBranch) return false; diff --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll --- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll @@ -608,139 +608,24 @@ ; ; ENABLED-LABEL: @test3( ; ENABLED-NEXT: entry: -; ENABLED-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 -; ENABLED-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; ENABLED-NEXT: br i1 [[TMP1]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] -; ENABLED: entry.new: -; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] ; ENABLED-NEXT: br label [[HEADER:%.*]] ; ENABLED: header: -; ENABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] -; ENABLED-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ] -; ENABLED-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ] +; ENABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; ENABLED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ] ; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK:%.*]] ; ENABLED: for.exiting_block: -; ENABLED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]] +; ENABLED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42 +; ENABLED-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]] ; ENABLED: latch: ; ENABLED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; ENABLED-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; ENABLED-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; ENABLED-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1 -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]] -; ENABLED: for.exiting_block.1: -; ENABLED-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]] -; ENABLED: latch.1: -; ENABLED-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; ENABLED-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; ENABLED-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; ENABLED-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1 -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]] -; ENABLED: for.exiting_block.2: -; ENABLED-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]] -; ENABLED: latch.2: -; ENABLED-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] -; ENABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; ENABLED-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; ENABLED-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1 -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]] -; ENABLED: for.exiting_block.3: -; ENABLED-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]] -; ENABLED: latch.3: -; ENABLED-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] -; ENABLED-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; ENABLED-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; ENABLED-NEXT: [[NITER_NEXT_3:%.*]] = add nuw nsw i64 [[NITER_NEXT_2]], 1 -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]] -; ENABLED: for.exiting_block.4: -; ENABLED-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]] -; ENABLED: latch.4: -; ENABLED-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] -; ENABLED-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 -; ENABLED-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; ENABLED-NEXT: [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1 -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]] -; ENABLED: for.exiting_block.5: -; ENABLED-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]] -; ENABLED: latch.5: -; ENABLED-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] -; ENABLED-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 -; ENABLED-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; ENABLED-NEXT: [[NITER_NEXT_5:%.*]] = add nuw nsw i64 [[NITER_NEXT_4]], 1 -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]] -; ENABLED: for.exiting_block.6: -; ENABLED-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]] -; ENABLED: latch.6: -; ENABLED-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] -; ENABLED-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 -; ENABLED-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; ENABLED-NEXT: [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1 -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]] -; ENABLED: for.exiting_block.7: -; ENABLED-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]] -; ENABLED: latch.7: -; ENABLED-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] -; ENABLED-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 -; ENABLED-NEXT: [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1 -; ENABLED-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER_NEXT_6]], 1 -; ENABLED-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] -; ENABLED-NEXT: br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]], !prof [[PROF4:![0-9]+]] -; ENABLED: latchexit.unr-lcssa.loopexit: -; ENABLED-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ] -; ENABLED-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ] -; ENABLED-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ] -; ENABLED-NEXT: br label [[LATCHEXIT_UNR_LCSSA]] -; ENABLED: latchexit.unr-lcssa: -; ENABLED-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ] -; ENABLED-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ] -; ENABLED-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ] -; ENABLED-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 -; ENABLED-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[LATCHEXIT:%.*]] -; ENABLED: header.epil.preheader: -; ENABLED-NEXT: br label [[HEADER_EPIL:%.*]] -; ENABLED: header.epil: -; ENABLED-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ] -; ENABLED-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ] -; ENABLED-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ] -; ENABLED-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]] -; ENABLED: for.exiting_block.epil: -; ENABLED-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT2:%.*]], label [[LATCH_EPIL]] -; ENABLED: latch.epil: -; ENABLED-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]] -; ENABLED-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 -; ENABLED-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1 -; ENABLED-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]] -; ENABLED-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 -; ENABLED-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] -; ENABLED-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]] -; ENABLED: latchexit.epilog-lcssa: -; ENABLED-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ] -; ENABLED-NEXT: br label [[LATCHEXIT]] +; ENABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; ENABLED-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]] +; ENABLED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; ENABLED-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; ENABLED-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]], !prof [[PROF4:![0-9]+]] ; ENABLED: latchexit: -; ENABLED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ] +; ENABLED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ] ; ENABLED-NEXT: ret i32 [[SUM_0_LCSSA]] -; ENABLED: otherexit.loopexit: -; ENABLED-NEXT: br label [[OTHEREXIT:%.*]] -; ENABLED: otherexit.loopexit2: -; ENABLED-NEXT: br label [[OTHEREXIT]] ; ENABLED: otherexit: ; ENABLED-NEXT: ret i32 57 ;