diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -129,6 +129,13 @@ /// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc. void ReplaceInstWithInst(Instruction *From, Instruction *To); +/// Check if we can prove that all paths starting from this block converge +/// to a block that either has a @llvm.experimental.deoptimize call +/// prior to its terminating return instruction or is terminated by unreachable. +/// All blocks in the traversed sequence must have an unique successor, maybe +/// except for the last one. +bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB); + /// Option class for critical edge splitting. /// /// This provides a builder interface for overriding the default options used diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -52,6 +53,12 @@ #define DEBUG_TYPE "basicblock-utils" +static cl::opt MaxDeoptOrUnreachableSuccessorCheckDepth( + "max-deopt-or-unreachable-succ-check-depth", cl::init(8), cl::Hidden, + cl::desc("Set the maximum path length when checking whether a basic block " + "is followed by a block that either has a terminating " + "deoptimizing call or is terminated with an unreachable")); + void llvm::DetatchDeadBlocks( ArrayRef BBs, SmallVectorImpl *Updates, @@ -485,6 +492,20 @@ BI = New; } +bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) { + // Remember visited blocks to avoid infinite loop + SmallPtrSet VisitedBlocks; + unsigned Depth = 0; + while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth && + VisitedBlocks.insert(BB).second) { + if (BB->getTerminatingDeoptimizeCall() || + isa(BB->getTerminator())) + return true; + BB = BB->getUniqueSuccessor(); + } + return false; +} + void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { BasicBlock::iterator BI(From); ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -103,15 +103,15 @@ SmallVector Exits; L->getUniqueNonLatchExitBlocks(Exits); // The latch must either be the only exiting block or all non-latch exit - // blocks have either a deopt or unreachable terminator. Both deopt and - // unreachable terminators are a strong indication they are not taken. Note - // that this is a profitability check, not a legality check. Also note that - // LoopPeeling currently can only update the branch weights of latch blocks - // and branch weights to blocks with deopt or unreachable do not need + // blocks have either a deopt or unreachable terminator or compose a chain of + // blocks where the last one is either deopt or unreachable terminated. Both + // deopt and unreachable terminators are a strong indication they are not + // taken. Note that this is a profitability check, not a legality check. Also + // note that LoopPeeling currently can only update the branch weights of latch + // blocks and branch weights to blocks with deopt or unreachable do not need // updating. return all_of(Exits, [](const BasicBlock *BB) { - return BB->getTerminatingDeoptimizeCall() || - isa(BB->getTerminator()); + return IsBlockFollowedByDeoptOrUnreachable(BB); }); } diff --git a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll --- a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll @@ -193,28 +193,56 @@ define void @peel_exits_to_blocks_branch_to_unreachable_block(i32* %ptr, i32 %N, i32 %x, i1 %c.1) { ; CHECK-LABEL: @peel_exits_to_blocks_branch_to_unreachable_block( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]] +; CHECK: loop.header.peel.begin: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]] +; CHECK: loop.header.peel: +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ult i32 1, 2 +; CHECK-NEXT: br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]] +; CHECK: else.peel: +; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]] +; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[EXIT_2:%.*]], label [[LOOP_LATCH_PEEL:%.*]] +; CHECK: then.peel: +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH_PEEL]] +; CHECK: loop.latch.peel: +; CHECK-NEXT: [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ] +; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1 +; CHECK-NEXT: store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4 +; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1 +; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000 +; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]] +; CHECK: loop.header.peel.next: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]] +; CHECK: loop.header.peel.next1: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], 2 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: else: -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]] ; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void +; CHECK: exit.1.loopexit: +; CHECK-NEXT: br label [[EXIT_1]] ; CHECK: exit.1: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[UNREACHABLE_TERM:%.*]] +; CHECK: exit.2.loopexit: +; CHECK-NEXT: br label [[EXIT_2]] ; CHECK: exit.2: ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[UNREACHABLE_TERM]]