diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -73,10 +73,6 @@ "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); -static cl::opt UnrollPeelMultiDeoptExit( - "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, - cl::desc("Allow peeling of loops with multiple deopt exits.")); - static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; // Designates that a Phi is estimated to become invariant after an "infinite" @@ -91,39 +87,31 @@ if (!L->isLoopSimplifyForm()) return false; - if (UnrollPeelMultiDeoptExit) { - SmallVector Exits; - L->getUniqueNonLatchExitBlocks(Exits); - - if (!Exits.empty()) { - // Latch's terminator is a conditional branch, Latch is exiting and - // all non Latch exits ends up with deoptimize. - const BasicBlock *Latch = L->getLoopLatch(); - const BranchInst *T = dyn_cast(Latch->getTerminator()); - return T && T->isConditional() && L->isLoopExiting(Latch) && - all_of(Exits, [](const BasicBlock *BB) { - return BB->getTerminatingDeoptimizeCall(); - }); - } - } - - // Only peel loops that contain a single exit - if (!L->getExitingBlock() || !L->getUniqueExitBlock()) - return false; - // Don't try to peel loops where the latch is not the exiting block. // This can be an indication of two different things: // 1) The loop is not rotated. // 2) The loop contains irreducible control flow that involves the latch. const BasicBlock *Latch = L->getLoopLatch(); - if (Latch != L->getExitingBlock()) + if (!L->isLoopExiting(Latch)) return false; // Peeling is only supported if the latch is a branch. if (!isa(Latch->getTerminator())) return false; - return true; + SmallVector Exits; + L->getUniqueNonLatchExitBlocks(Exits); + // The latch must either be the only exiting block or all non-latch exit + // blocks have either a deopt or unreachable terminator. Both deopt and + // unreachable terminators are a strong indication they are not taken. Note + // that this is a profitability check, not a legality check. Also note that + // LoopPeeling currently can only update the branch weights of latch blocks + // and branch weights to blocks with deopt or unreachable do not need + // updating. + return all_of(Exits, [](const BasicBlock *BB) { + return BB->getTerminatingDeoptimizeCall() || + isa(BB->getTerminator()); + }); } // This function calculates the number of iterations after which the given Phi diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s -; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s ; Regression test for setting the correct idom for exit blocks. diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s -; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s ; Regression test for setting the correct idom for exit blocks. diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -1,7 +1,7 @@ ; REQUIRES: asserts -; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s -; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s -; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s --check-prefixes=CHECK-NO-PEEL +; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -passes='require,function(require,loop-unroll)' 2>&1 | FileCheck %s --check-prefixes=CHECK-NO-PEEL ; Make sure we use the profile information correctly to peel-off 3 iterations ; from the loop, and update the branch weights for the peeled loop properly. diff --git a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll --- a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll @@ -6,25 +6,51 @@ define void @peel_unreachable_exit_and_latch_exit(i32* %ptr, i32 %N, i32 %x) { ; CHECK-LABEL: @peel_unreachable_exit_and_latch_exit( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]] +; CHECK: loop.header.peel.begin: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]] +; CHECK: loop.header.peel: +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ult i32 1, 2 +; CHECK-NEXT: br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]] +; CHECK: else.peel: +; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]] +; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[UNREACHABLE_EXIT:%.*]], label [[LOOP_LATCH_PEEL:%.*]] +; CHECK: then.peel: +; CHECK-NEXT: br label [[LOOP_LATCH_PEEL]] +; CHECK: loop.latch.peel: +; CHECK-NEXT: [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ] +; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1 +; CHECK-NEXT: store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4 +; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1 +; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000 +; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]] +; CHECK: loop.header.peel.next: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]] +; CHECK: loop.header.peel.next1: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], 2 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: else: -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[C_2]], label [[UNREACHABLE_EXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[C_2]], label [[UNREACHABLE_EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]] ; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void +; CHECK: unreachable.exit.loopexit: +; CHECK-NEXT: br label [[UNREACHABLE_EXIT]] ; CHECK: unreachable.exit: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: unreachable