diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -109,6 +110,10 @@ cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden); +static cl::opt FreezeLoopUnswitchCond( + "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden, + cl::desc("If enabled, the freeze instruction will be added to condition " + "of loop unswitch to prevent miscompilation.")); /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. @@ -196,15 +201,15 @@ /// Copy a set of loop invariant values \p ToDuplicate and insert them at the /// end of \p BB and conditionally branch on the copied condition. We only /// branch on a single value. -static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, - ArrayRef Invariants, - bool Direction, - BasicBlock &UnswitchedSucc, - BasicBlock &NormalSucc) { +static void buildPartialUnswitchConditionalBranch( + BasicBlock &BB, ArrayRef Invariants, bool Direction, + BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) { IRBuilder<> IRB(&BB); Value *Cond = Direction ? IRB.CreateOr(Invariants) : IRB.CreateAnd(Invariants); + if (InsertFreeze) + Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr"); IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc); } @@ -565,7 +570,7 @@ "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" " condition!"); buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection, - *UnswitchedBB, *NewPH); + *UnswitchedBB, *NewPH, false); } // Update the dominator tree with the added edge. @@ -2124,6 +2129,13 @@ SE->forgetTopmostLoop(&L); } + bool InsertFreeze = false; + if (FreezeLoopUnswitchCond) { + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(&L); + InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L); + } + // If the edge from this terminator to a successor dominates that successor, // store a map from each block in its dominator subtree to it. This lets us // tell when cloning for a particular successor if a block is dominated by @@ -2198,6 +2210,28 @@ BasicBlock *ClonedPH = ClonedPHs.begin()->second; BI->setSuccessor(ClonedSucc, ClonedPH); BI->setSuccessor(1 - ClonedSucc, LoopPH); + if (InsertFreeze) { + auto Cond = BI->getCondition(); + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT)) { + auto FrozenCond = new FreezeInst(Cond, Cond->getName() + ".fr"); + if (dyn_cast(Cond)) { + if (PHINode *PN = dyn_cast(Cond)) + FrozenCond->insertAfter(PN->getParent()->getFirstNonPHI()); + else if (InvokeInst *II = dyn_cast(Cond)) { + auto *DestBB = dyn_cast(II->getOperand(1)); + FrozenCond->insertAfter(DestBB->getFirstNonPHI()); + } else + FrozenCond->insertAfter(dyn_cast(Cond)); + + Cond->replaceUsesWithIf(FrozenCond, [](Use &U) { + return !isa(U.getUser()); + }); + } else { + FrozenCond->insertBefore(BI); + BI->setCondition(FrozenCond); + } + } + } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); } else { assert(SI && "Must either be a branch or switch!"); @@ -2212,6 +2246,28 @@ else Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); + if (InsertFreeze) { + auto Cond = SI->getCondition(); + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT)) { + auto FrozenCond = new FreezeInst(Cond, Cond->getName() + ".fr"); + if (dyn_cast(Cond)) { + if (PHINode *PN = dyn_cast(Cond)) + FrozenCond->insertAfter(PN->getParent()->getFirstNonPHI()); + else if (InvokeInst *II = dyn_cast(Cond)) { + auto *DestBB = dyn_cast(II->getOperand(1)); + FrozenCond->insertAfter(DestBB->getFirstNonPHI()); + } else + FrozenCond->insertAfter(dyn_cast(Cond)); + + Cond->replaceUsesWithIf(FrozenCond, [](Use &U) { + return !isa(U.getUser()); + }); + } else { + FrozenCond->insertBefore(SI); + SI->setCondition(FrozenCond); + } + } + } // We need to use the set to populate domtree updates as even when there // are multiple cases pointing at the same successor we only want to // remove and insert one edge in the domtree. @@ -2292,7 +2348,7 @@ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); else buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, - *ClonedPH, *LoopPH); + *ClonedPH, *LoopPH, InsertFreeze); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); if (MSSAU) { @@ -2376,17 +2432,28 @@ "Should not be replacing constant values!"); // Use make_early_inc_range here as set invalidates the iterator. for (Use &U : llvm::make_early_inc_range(Invariant->uses())) { - Instruction *UserI = dyn_cast(U.getUser()); - if (!UserI) - continue; - - // Replace it with the 'continue' side if in the main loop body, and the - // unswitched if in the cloned blocks. - if (DT.dominates(LoopPH, UserI->getParent())) - U.set(ContinueReplacement); - else if (ReplaceUnswitched && - DT.dominates(ClonedPH, UserI->getParent())) - U.set(UnswitchedReplacement); + auto ReplaceIfDominated = [&](Use &U) { + Instruction *UserI = dyn_cast(U.getUser()); + if (!UserI) + return; + + // Replace it with the 'continue' side if in the main loop body, and + // the unswitched if in the cloned blocks. + if (DT.dominates(LoopPH, UserI->getParent())) + U.set(ContinueReplacement); + else if (ReplaceUnswitched && + DT.dominates(ClonedPH, UserI->getParent())) + U.set(UnswitchedReplacement); + }; + + ReplaceIfDominated(U); + + // If V is invariant, Freeze(V) is also invariant. As we try to replace + // the use of V to constant, we need to try replace the use of Freeze(V) + // to constant. + if (auto *FI = dyn_cast(U.getUser())) + for (Use &UU : llvm::make_early_inc_range(FI->uses())) + ReplaceIfDominated(UU); } } } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll copy from llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll copy to llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll @@ -1,6 +1,6 @@ -; RUN: opt -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s -; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s -; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s declare i32 @a() declare i32 @b() @@ -9,131 +9,24 @@ declare void @sink1(i32) declare void @sink2(i32) +declare void @sink3(i1) +declare void @sink4(i1) declare i1 @cond() declare i32 @cond.i32() -; Negative test: we cannot unswitch convergent calls. -define void @test_no_unswitch_convergent(i1* %ptr, i1 %cond) { -; CHECK-LABEL: @test_no_unswitch_convergent( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin -; -; We shouldn't have unswitched into any other block either. -; CHECK-NOT: br i1 %cond - -loop_begin: - br i1 %cond, label %loop_a, label %loop_b -; CHECK: loop_begin: -; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b - -loop_a: - call i32 @a() convergent - br label %loop_latch - -loop_b: - call i32 @b() - br label %loop_latch - -loop_latch: - %v = load i1, i1* %ptr - br i1 %v, label %loop_begin, label %loop_exit - -loop_exit: - ret void -} - -; Negative test: we cannot unswitch noduplicate calls. -define void @test_no_unswitch_noduplicate(i1* %ptr, i1 %cond) { -; CHECK-LABEL: @test_no_unswitch_noduplicate( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin -; -; We shouldn't have unswitched into any other block either. -; CHECK-NOT: br i1 %cond - -loop_begin: - br i1 %cond, label %loop_a, label %loop_b -; CHECK: loop_begin: -; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b - -loop_a: - call i32 @a() noduplicate - br label %loop_latch - -loop_b: - call i32 @b() - br label %loop_latch - -loop_latch: - %v = load i1, i1* %ptr - br i1 %v, label %loop_begin, label %loop_exit - -loop_exit: - ret void -} - declare i32 @__CxxFrameHandler3(...) -; Negative test: we cannot unswitch when tokens are used across blocks as we -; might introduce PHIs. -define void @test_no_unswitch_cross_block_token(i1* %ptr, i1 %cond) nounwind personality i32 (...)* @__CxxFrameHandler3 { -; CHECK-LABEL: @test_no_unswitch_cross_block_token( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin -; -; We shouldn't have unswitched into any other block either. -; CHECK-NOT: br i1 %cond - -loop_begin: - br i1 %cond, label %loop_a, label %loop_b -; CHECK: loop_begin: -; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b - -loop_a: - call i32 @a() - br label %loop_cont - -loop_b: - call i32 @b() - br label %loop_cont - -loop_cont: - invoke i32 @a() - to label %loop_latch unwind label %loop_catch - -loop_latch: - br label %loop_begin - -loop_catch: - %catch = catchswitch within none [label %loop_catch_latch, label %loop_exit] unwind to caller - -loop_catch_latch: - %catchpad_latch = catchpad within %catch [] - catchret from %catchpad_latch to label %loop_begin - -loop_exit: - %catchpad_exit = catchpad within %catch [] - catchret from %catchpad_exit to label %exit - -exit: - ret void -} - - -; Non-trivial loop unswitching where there are two distinct trivial conditions -; to unswitch within the loop. -define i32 @test1(i1* %ptr, i1 %cond1, i1 %cond2) { -; CHECK-LABEL: @test1( +define i32 @test1_freeze(i1* %ptr0, i1* %ptr1, i1* %ptr2) { +; CHECK-LABEL: @test1_freeze( entry: + %cond1 = load i1, i1* %ptr1 + %cond2 = load i1, i1* %ptr2 br label %loop_begin ; CHECK-NEXT: entry: +; CHECK-NEXT: %cond1 = load i1, i1* %ptr1, align 1 +; CHECK-NEXT: %cond2 = load i1, i1* %ptr2, align 1 +; CHECK-NEXT: %cond2.fr = freeze i1 %cond2 ; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split loop_begin: @@ -151,7 +44,7 @@ ; CHECK-NEXT: br label %loop_a.us ; ; CHECK: loop_a.us: -; CHECK-NEXT: call i32 @a() +; CHECK-NEXT: %0 = call i32 @a() ; CHECK-NEXT: br label %latch.us ; ; CHECK: latch.us: @@ -162,16 +55,18 @@ ; CHECK-NEXT: br label %loop_exit loop_b: + call i32 @b() br i1 %cond2, label %loop_b_a, label %loop_b_b ; The second unswitched condition. ; ; CHECK: entry.split: -; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split +; CHECK-NEXT: br i1 %cond2.fr, label %entry.split.split.us, label %entry.split.split loop_b_a: - call i32 @b() + call void @sink3(i1 %cond2) br label %latch ; The 'loop_b_a' unswitched loop. +; %cond2 is replaced to true ; ; CHECK: entry.split.split.us: ; CHECK-NEXT: br label %loop_begin.us1 @@ -180,10 +75,11 @@ ; CHECK-NEXT: br label %loop_b.us ; ; CHECK: loop_b.us: +; CHECK-NEXT: %1 = call i32 @b() ; CHECK-NEXT: br label %loop_b_a.us ; ; CHECK: loop_b_a.us: -; CHECK-NEXT: call i32 @b() +; CHECK-NEXT: call void @sink3(i1 true) ; CHECK-NEXT: br label %latch.us2 ; ; CHECK: latch.us2: @@ -194,9 +90,10 @@ ; CHECK-NEXT: br label %loop_exit.split loop_b_b: - call i32 @c() + call void @sink4(i1 %cond2) br label %latch ; The 'loop_b_b' unswitched loop. +; %cond2 is replaced to false ; ; CHECK: entry.split.split: ; CHECK-NEXT: br label %loop_begin @@ -205,10 +102,11 @@ ; CHECK-NEXT: br label %loop_b ; ; CHECK: loop_b: +; CHECK-NEXT: %2 = call i32 @b() ; CHECK-NEXT: br label %loop_b_b ; ; CHECK: loop_b_b: -; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: call void @sink4(i1 false) ; CHECK-NEXT: br label %latch ; ; CHECK: latch: @@ -219,7 +117,7 @@ ; CHECK-NEXT: br label %loop_exit.split latch: - %v = load i1, i1* %ptr + %v = load i1, i1* %ptr0 br i1 %v, label %loop_begin, label %loop_exit loop_exit: @@ -231,567 +129,441 @@ ; CHECK-NEXT: ret } -define i32 @test2(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr, i32* %c.ptr) { -; CHECK-LABEL: @test2( +; Test that when unswitching a deeply nested loop condition in a way that +; produces a non-loop clone that can reach multiple exit blocks which are part +; of different outer loops we correctly divide the cloned loop blocks between +; the outer loops based on reachability. +define i32 @test7a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7a( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split +; CHECK-NEXT: br label %loop_begin loop_begin: - %v = load i1, i1* %ptr - br i1 %cond1, label %loop_a, label %loop_b - -loop_a: %a = load i32, i32* %a.ptr - %ac = load i32, i32* %c.ptr - br i1 %v, label %loop_begin, label %loop_exit -; The 'loop_a' unswitched loop. + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr, align 1 +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c + +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_begin +; The cloned copy that always exits with the adjustments required to fix up +; loop exits. ; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us ; -; CHECK: loop_begin.us: +; CHECK: inner_inner_loop_begin.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br label %loop_a.us +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us ; -; CHECK: loop_a.us: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[AC:.*]] = load i32, i32* %c.ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us.loopexit +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us.loopexit: +; CHECK-NEXT: br label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit ; ; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a.us ] -; CHECK-NEXT: %[[AC_LCSSA:.*]] = phi i32 [ %[[AC]], %loop_a.us ] +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a.us ] ; CHECK-NEXT: br label %loop_exit - -loop_b: - %b = load i32, i32* %b.ptr - %bc = load i32, i32* %c.ptr - br i1 %v, label %loop_begin, label %loop_exit -; The 'loop_b' unswitched loop. ; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit ; -; CHECK: loop_begin: +; The original copy that continues to loop. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br label %loop_b +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b ; -; CHECK: loop_b: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: %[[BC:.*]] = load i32, i32* %c.ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c ; -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] -; CHECK-NEXT: %[[BC_LCSSA:.*]] = phi i32 [ %[[BC]], %loop_b ] -; CHECK-NEXT: br label %loop_exit +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_c: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d +; +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label %inner_inner_loop_exit + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + br label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: br label %loop_begin loop_exit: - %ab.phi = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] - %c.phi = phi i32 [ %ac, %loop_a ], [ %bc, %loop_b ] - %result = add i32 %ab.phi, %c.phi + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa ret i32 %result +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %loop_exit +; ; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ] -; CHECK-NEXT: %[[C_PHI:.*]] = phi i32 [ %[[BC_LCSSA]], %loop_exit.split ], [ %[[AC_LCSSA]], %loop_exit.split.us ] -; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[AB_PHI]], %[[C_PHI]] +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] ; CHECK-NEXT: ret i32 %[[RESULT]] } -; Test a non-trivial unswitch of an exiting edge to an exit block with other -; in-loop predecessors. -define i32 @test3a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test3a( +; Same pattern as @test7a but here the original loop becomes a non-loop that +; can reach multiple exit blocks which are part of different outer loops. +define i32 @test7b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7b( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split +; CHECK-NEXT: br label %loop_begin loop_begin: - %v = load i1, i1* %ptr %a = load i32, i32* %a.ptr - br i1 %cond1, label %loop_exit, label %loop_b -; The 'loop_exit' clone. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr + br label %inner_loop_begin +; CHECK: loop_begin: ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit.split.us -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ] -; CHECK-NEXT: br label %loop_exit +; CHECK-NEXT: br label %inner_loop_begin -loop_b: +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr %b = load i32, i32* %b.ptr - br i1 %v, label %loop_begin, label %loop_exit -; The 'loop_b' unswitched loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_b -; -; CHECK: loop_b: + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] ; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split -; -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] -; CHECK-NEXT: br label %loop_exit +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split -loop_exit: - %ab.phi = phi i32 [ %a, %loop_begin ], [ %b, %loop_b ] - ret i32 %ab.phi -; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[AB_PHI]] -} +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b -; Test a non-trivial unswitch of an exiting edge to an exit block with other -; in-loop predecessors. This is the same as @test3a but with the reversed order -; of successors so that the exiting edge is *not* the cloned edge. -define i32 @test3b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test3b( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c -loop_begin: - %v = load i1, i1* %ptr - %a = load i32, i32* %a.ptr - br i1 %cond1, label %loop_b, label %loop_exit -; The 'loop_b' unswitched loop. +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_inner_loop_begin, label %inner_loop_exit +; The cloned copy that continues looping. ; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us ; -; CHECK: loop_begin.us: +; CHECK: inner_inner_loop_begin.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_b.us +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us ; -; CHECK: loop_b.us: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit ; ; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ] +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] +; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a.us ] ; CHECK-NEXT: br label %loop_exit - -loop_b: - %b = load i32, i32* %b.ptr - br i1 %v, label %loop_begin, label %loop_exit -; The original loop, now non-looping due to unswitching.. ; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit ; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit.split +; The original copy that now always exits and needs adjustments for exit +; blocks. ; -; CHECK: loop_exit.split: -; CHECK-NEXT: br label %loop_exit - -loop_exit: - %ab.phi = phi i32 [ %b, %loop_b ], [ %a, %loop_begin ] - ret i32 %ab.phi -; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A]], %loop_exit.split ], [ %[[B_LCSSA]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[AB_PHI]] -} - -; Test a non-trivial unswitch of an exiting edge to an exit block with no other -; in-loop predecessors. -define void @test4a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test4a( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split - -loop_begin: - %v = load i1, i1* %ptr - %a = load i32, i32* %a.ptr - br i1 %cond1, label %loop_exit1, label %loop_b -; The 'loop_exit' clone. +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin ; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b ; -; CHECK: loop_begin.us: +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin ] +; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit1.split.us +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c ; -; CHECK: loop_exit1.split.us: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ] -; CHECK-NEXT: br label %loop_exit1 - -loop_b: - %b = load i32, i32* %b.ptr - br i1 %v, label %loop_begin, label %loop_exit2 -; The 'loop_b' unswitched loop. +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c.loopexit ; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin +; CHECK: inner_inner_loop_c.loopexit: +; CHECK-NEXT: br label %inner_inner_loop_c ; -; CHECK: loop_begin: +; CHECK: inner_inner_loop_c: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_b +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d ; -; CHECK: loop_b: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit2 - -loop_exit1: - %a.phi = phi i32 [ %a, %loop_begin ] - call void @sink1(i32 %a.phi) - ret void -; CHECK: loop_exit1: -; CHECK-NEXT: call void @sink1(i32 %[[A_LCSSA]]) -; CHECK-NEXT: ret void +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label %inner_inner_loop_exit -loop_exit2: - %b.phi = phi i32 [ %b, %loop_b ] - call void @sink2(i32 %b.phi) - ret void -; CHECK: loop_exit2: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] -; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]]) -; CHECK-NEXT: ret void -} +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin -; Test a non-trivial unswitch of an exiting edge to an exit block with no other -; in-loop predecessors. This is the same as @test4a but with the edges reversed -; so that the exiting edge is *not* the cloned edge. -define void @test4b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test4b( -entry: +inner_loop_exit: br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split - -loop_begin: - %v = load i1, i1* %ptr - %a = load i32, i32* %a.ptr - br i1 %cond1, label %loop_b, label %loop_exit1 -; The 'loop_b' clone. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_b.us +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label %inner_loop_exit.loopexit ; -; CHECK: loop_b.us: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit2.split.us +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit ; -; CHECK: loop_exit2.split.us: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ] -; CHECK-NEXT: br label %loop_exit2 - -loop_b: - %b = load i32, i32* %b.ptr - br i1 %v, label %loop_begin, label %loop_exit2 -; The 'loop_exit' unswitched path. +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label %inner_loop_exit ; -; CHECK: entry.split: +; CHECK: inner_loop_exit: ; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit1 -loop_exit1: - %a.phi = phi i32 [ %a, %loop_begin ] - call void @sink1(i32 %a.phi) - ret void -; CHECK: loop_exit1: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ] -; CHECK-NEXT: call void @sink1(i32 %[[A_PHI]]) -; CHECK-NEXT: ret void - -loop_exit2: - %b.phi = phi i32 [ %b, %loop_b ] - call void @sink2(i32 %b.phi) - ret void -; CHECK: loop_exit2: -; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]]) -; CHECK-NEXT: ret void +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa + ret i32 %result +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] +; CHECK-NEXT: ret i32 %[[RESULT]] } -; Test a non-trivial unswitch of an exiting edge to an exit block with no other -; in-loop predecessors. This is the same as @test4a but with a common merge -; block after the independent loop exits. This requires a different structural -; update to the dominator tree. -define void @test4c(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test4c( +; Test that when the exit block set of an inner loop changes to start at a less +; high level of the loop nest we correctly hoist the loop up the nest. +define i32 @test8a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8a( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split +; CHECK-NEXT: br label %loop_begin loop_begin: - %v = load i1, i1* %ptr %a = load i32, i32* %a.ptr - br i1 %cond1, label %loop_exit1, label %loop_b -; The 'loop_exit' clone. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr + br label %inner_loop_begin +; CHECK: loop_begin: ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit1.split.us -; -; CHECK: loop_exit1.split.us: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ] -; CHECK-NEXT: br label %loop_exit1 +; CHECK-NEXT: br label %inner_loop_begin -loop_b: +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr %b = load i32, i32* %b.ptr - br i1 %v, label %loop_begin, label %loop_exit2 -; The 'loop_b' unswitched loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_b -; -; CHECK: loop_b: + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] ; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit2 +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split -loop_exit1: - %a.phi = phi i32 [ %a, %loop_begin ] - call void @sink1(i32 %a.phi) - br label %exit -; CHECK: loop_exit1: -; CHECK-NEXT: call void @sink1(i32 %[[A_LCSSA]]) -; CHECK-NEXT: br label %exit +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b -loop_exit2: - %b.phi = phi i32 [ %b, %loop_b ] - call void @sink2(i32 %b.phi) - br label %exit -; CHECK: loop_exit2: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] -; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]]) -; CHECK-NEXT: br label %exit +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit -exit: - ret void -; CHECK: exit: -; CHECK-NEXT: ret void -} +inner_inner_loop_b: + br i1 %cond, label %inner_inner_loop_latch, label %inner_inner_loop_exit -; Test that we can unswitch a condition out of multiple layers of a loop nest. -define i32 @test5(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test5( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %loop_begin.split.us, label %entry.split -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: br label %loop_begin.split - -loop_begin: - br label %inner_loop_begin - -inner_loop_begin: - %v = load i1, i1* %ptr - %a = load i32, i32* %a.ptr - br i1 %cond1, label %loop_exit, label %inner_loop_b -; The 'loop_exit' clone. +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is now an exit from the inner loop. ; -; CHECK: loop_begin.split.us: -; CHECK-NEXT: br label %inner_loop_begin.us +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] +; CHECK-NEXT: br label %inner_inner_loop_begin.us ; -; CHECK: inner_loop_begin.us: +; CHECK: inner_inner_loop_begin.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit.loopexit.split.us -; -; CHECK: loop_exit.loopexit.split.us: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] -; CHECK-NEXT: br label %loop_exit - -inner_loop_b: - %b = load i32, i32* %b.ptr - br i1 %v, label %inner_loop_begin, label %loop_latch -; The 'inner_loop_b' unswitched loop. +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us ; -; CHECK: loop_begin.split: -; CHECK-NEXT: br label %inner_loop_begin +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: br label %inner_inner_loop_latch.us ; -; CHECK: inner_loop_begin: +; CHECK: inner_inner_loop_a.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_b +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us ; -; CHECK: inner_loop_b: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_begin, label %loop_latch - -loop_latch: - %b.phi = phi i32 [ %b, %inner_loop_b ] - %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_begin, label %loop_exit -; CHECK: loop_latch: -; CHECK-NEXT: %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_b ] -; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.loopexit1 - -loop_exit: - %ab.phi = phi i32 [ %a, %inner_loop_begin ], [ %b.phi, %loop_latch ] - ret i32 %ab.phi -; CHECK: loop_exit.loopexit: -; CHECK-NEXT: br label %loop_exit +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us ; -; CHECK: loop_exit.loopexit1: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %loop_latch ] -; CHECK-NEXT: br label %loop_exit +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit ; -; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[B_LCSSA]], %loop_exit.loopexit1 ] -; CHECK-NEXT: ret i32 %[[AB_PHI]] -} - -; Test that we can unswitch a condition where we end up only cloning some of -; the nested loops and needing to delete some of the nested loops. -define i32 @test6(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test6( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split - -loop_begin: - %v = load i1, i1* %ptr - br i1 %cond1, label %loop_a, label %loop_b - -loop_a: - br label %loop_a_inner - -loop_a_inner: - %va = load i1, i1* %ptr - %a = load i32, i32* %a.ptr - br i1 %va, label %loop_a_inner, label %loop_a_inner_exit - -loop_a_inner_exit: - %a.lcssa = phi i32 [ %a, %loop_a_inner ] - br label %latch -; The 'loop_a' cloned loop. +; The original region exits the loop earlier. ; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin ; -; CHECK: loop_begin.us: +; CHECK: inner_inner_loop_begin: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br label %loop_a.us -; -; CHECK: loop_a.us: -; CHECK-NEXT: br label %loop_a_inner.us -; -; CHECK: loop_a_inner.us -; CHECK-NEXT: %[[VA:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br i1 %[[VA]], label %loop_a_inner.us, label %loop_a_inner_exit.us +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b ; -; CHECK: loop_a_inner_exit.us: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a_inner.us ] -; CHECK-NEXT: br label %latch.us +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split ; -; CHECK: latch.us: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %loop_a_inner_exit.us ] -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: br label %inner_inner_loop_exit ; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_PHI]], %latch.us ] -; CHECK-NEXT: br label %loop_exit - -loop_b: - br label %loop_b_inner - -loop_b_inner: - %vb = load i1, i1* %ptr - %b = load i32, i32* %b.ptr - br i1 %vb, label %loop_b_inner, label %loop_b_inner_exit - -loop_b_inner_exit: - %b.lcssa = phi i32 [ %b, %loop_b_inner ] - br label %latch +; CHECK: inner_inner_loop_latch: +; CHECK-NEXT: br label %inner_inner_loop_begin -latch: - %ab.phi = phi i32 [ %a.lcssa, %loop_a_inner_exit ], [ %b.lcssa, %loop_b_inner_exit ] - br i1 %v, label %loop_begin, label %loop_exit -; The 'loop_b' unswitched loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br label %loop_b -; -; CHECK: loop_b: -; CHECK-NEXT: br label %loop_b_inner -; -; CHECK: loop_b_inner -; CHECK-NEXT: %[[VB:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[VB]], label %loop_b_inner, label %loop_b_inner_exit +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit ; -; CHECK: loop_b_inner_exit: -; CHECK-NEXT: %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b_inner ] -; CHECK-NEXT: br label %latch +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %inner_loop_exit ; -; CHECK: latch: -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit ; -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %latch ] -; CHECK-NEXT: br label %loop_exit +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin loop_exit: - %ab.lcssa = phi i32 [ %ab.phi, %latch ] - ret i32 %ab.lcssa + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa ; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[AB_PHI]] +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] } -; Test that when unswitching a deeply nested loop condition in a way that -; produces a non-loop clone that can reach multiple exit blocks which are part -; of different outer loops we correctly divide the cloned loop blocks between -; the outer loops based on reachability. -define i32 @test7a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test7a( +; Same pattern as @test8a but where the original loop looses an exit block and +; needs to be hoisted up the nest. +define i32 @test8b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8b( entry: br label %loop_begin ; CHECK-NEXT: entry: @@ -812,8 +584,9 @@ ; CHECK: inner_loop_begin: ; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] ; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] ; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split inner_inner_loop_begin: %v1 = load i1, i1* %ptr @@ -821,20 +594,14 @@ inner_inner_loop_a: %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_exit, label %inner_inner_loop_c + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit inner_inner_loop_b: - %v3 = load i1, i1* %ptr - br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c - -inner_inner_loop_c: - %v4 = load i1, i1* %ptr - br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + br i1 %cond, label %inner_inner_loop_exit, label %inner_inner_loop_latch -inner_inner_loop_d: - br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_begin -; The cloned copy that always exits with the adjustments required to fix up -; loop exits. +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is similar to before but with one earlier exit. ; ; CHECK: inner_loop_begin.split.us: ; CHECK-NEXT: br label %inner_inner_loop_begin.us @@ -844,39 +611,26 @@ ; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us ; ; CHECK: inner_inner_loop_b.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us.loopexit +; CHECK-NEXT: br label %inner_inner_loop_exit.split.us ; ; CHECK: inner_inner_loop_a.us: -; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin.us ] -; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin.us ] -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us -; -; CHECK: inner_inner_loop_c.us.loopexit: -; CHECK-NEXT: br label %inner_inner_loop_c.us -; -; CHECK: inner_inner_loop_c.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us ; -; CHECK: inner_inner_loop_d.us: -; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us ; ; CHECK: inner_inner_loop_exit.split.us: ; CHECK-NEXT: br label %inner_inner_loop_exit ; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a.us ] -; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a.us ] -; CHECK-NEXT: br label %loop_exit -; ; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] ; CHECK-NEXT: br label %inner_loop_exit.loopexit ; -; The original copy that continues to loop. +; The original region is now an exit in the preheader. ; ; CHECK: inner_loop_begin.split: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] ; CHECK-NEXT: br label %inner_inner_loop_begin ; ; CHECK: inner_inner_loop_begin: @@ -885,464 +639,314 @@ ; ; CHECK: inner_inner_loop_a: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split ; ; CHECK: inner_inner_loop_b: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c -; -; CHECK: inner_inner_loop_c: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d +; CHECK-NEXT: br label %inner_inner_loop_latch ; -; CHECK: inner_inner_loop_d: +; CHECK: inner_inner_loop_latch: ; CHECK-NEXT: br label %inner_inner_loop_begin -; -; CHECK: inner_inner_loop_exit.split: -; CHECK-NEXT: br label %inner_inner_loop_exit inner_inner_loop_exit: %a2 = load i32, i32* %a.ptr - %v5 = load i1, i1* %ptr - br i1 %v5, label %inner_loop_exit, label %inner_loop_begin + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin ; CHECK: inner_inner_loop_exit: ; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr ; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin inner_loop_exit: - br label %loop_begin + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin ; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a ] ; CHECK-NEXT: br label %inner_loop_exit.loopexit ; ; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] ; CHECK-NEXT: br label %inner_loop_exit ; ; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] ; CHECK-NEXT: br label %inner_loop_exit ; ; CHECK: inner_loop_exit: -; CHECK-NEXT: br label %loop_begin +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin loop_exit: - %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] - %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] - %result = add i32 %a.lcssa, %b.lcssa - ret i32 %result -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a ] -; CHECK-NEXT: br label %loop_exit -; + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] -; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] -; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] -; CHECK-NEXT: ret i32 %[[RESULT]] +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] } -; Same pattern as @test7a but here the original loop becomes a non-loop that -; can reach multiple exit blocks which are part of different outer loops. -define i32 @test7b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test7b( +; Test that requires re-forming dedicated exits for the cloned loop. +define i32 @test10a(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10a( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split loop_begin: %a = load i32, i32* %a.ptr - br label %inner_loop_begin -; CHECK: loop_begin: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_begin - -inner_loop_begin: - %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] - %cond = load i1, i1* %cond.ptr - %b = load i32, i32* %b.ptr - br label %inner_inner_loop_begin -; CHECK: inner_loop_begin: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] -; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split - -inner_inner_loop_begin: %v1 = load i1, i1* %ptr - br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + br i1 %v1, label %loop_a, label %loop_b -inner_inner_loop_a: +loop_a: %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_exit, label %inner_inner_loop_c - -inner_inner_loop_b: - %v3 = load i1, i1* %ptr - br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c - -inner_inner_loop_c: - %v4 = load i1, i1* %ptr - br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + br i1 %v2, label %loop_exit, label %loop_begin -inner_inner_loop_d: - br i1 %cond, label %inner_inner_loop_begin, label %inner_loop_exit -; The cloned copy that continues looping. -; -; CHECK: inner_loop_begin.split.us: -; CHECK-NEXT: br label %inner_inner_loop_begin.us +loop_b: + br i1 %cond, label %loop_exit, label %loop_begin +; The cloned loop with one edge as a direct exit. ; -; CHECK: inner_inner_loop_begin.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us ; -; CHECK: inner_inner_loop_b.us: +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us ; -; CHECK: inner_inner_loop_a.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us +; CHECK: loop_b.us: +; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin.us ] +; CHECK-NEXT: br label %loop_exit.split.us ; -; CHECK: inner_inner_loop_c.us: +; CHECK: loop_a.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_begin.backedge.us ; -; CHECK: inner_inner_loop_d.us: -; CHECK-NEXT: br label %inner_inner_loop_begin.us +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label %loop_begin.us ; -; CHECK: inner_inner_loop_exit.split.us: -; CHECK-NEXT: br label %inner_inner_loop_exit +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] -; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a.us ] +; CHECK-NEXT: %[[A_PHI_US:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b.us ], [ %[[A_LCSSA_A]], %loop_exit.split.us.loopexit ] ; CHECK-NEXT: br label %loop_exit + +; The original loop without one 'loop_exit' edge. ; -; CHECK: inner_loop_exit.loopexit.split.us: -; CHECK-NEXT: br label %inner_loop_exit.loopexit -; -; The original copy that now always exits and needs adjustments for exit -; blocks. -; -; CHECK: inner_loop_begin.split: -; CHECK-NEXT: br label %inner_inner_loop_begin -; -; CHECK: inner_inner_loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b -; -; CHECK: inner_inner_loop_a: -; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin ] -; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin ] -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c -; -; CHECK: inner_inner_loop_b: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c.loopexit -; -; CHECK: inner_inner_loop_c.loopexit: -; CHECK-NEXT: br label %inner_inner_loop_c +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin ; -; CHECK: inner_inner_loop_c: +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d -; -; CHECK: inner_inner_loop_d: -; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b ; -; CHECK: inner_inner_loop_exit.split: -; CHECK-NEXT: br label %inner_inner_loop_exit - -inner_inner_loop_exit: - %a2 = load i32, i32* %a.ptr - %v5 = load i1, i1* %ptr - br i1 %v5, label %inner_loop_exit, label %inner_loop_begin -; CHECK: inner_inner_loop_exit: -; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK: loop_a: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin - -inner_loop_exit: - br label %loop_begin -; CHECK: inner_loop_exit.loopexit.split: -; CHECK-NEXT: br label %inner_loop_exit.loopexit +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_begin.backedge ; -; CHECK: inner_loop_exit.loopexit: -; CHECK-NEXT: br label %inner_loop_exit +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label %loop_begin ; -; CHECK: inner_loop_exit.loopexit1: -; CHECK-NEXT: br label %inner_loop_exit +; CHECK: loop_b: +; CHECK-NEXT: br label %loop_begin.backedge ; -; CHECK: inner_loop_exit: -; CHECK-NEXT: br label %loop_begin - -loop_exit: - %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] - %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] - %result = add i32 %a.lcssa, %b.lcssa - ret i32 %result ; CHECK: loop_exit.split: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a ] -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] ; CHECK-NEXT: br label %loop_exit -; + +loop_exit: + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] + ret i32 %a.lcssa ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] -; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] -; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] -; CHECK-NEXT: ret i32 %[[RESULT]] +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_PHI_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[A_PHI]] } -; Test that when the exit block set of an inner loop changes to start at a less -; high level of the loop nest we correctly hoist the loop up the nest. -define i32 @test8a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test8a( +; Test that requires re-forming dedicated exits for the original loop. +define i32 @test10b(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10b( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split loop_begin: %a = load i32, i32* %a.ptr - br label %inner_loop_begin -; CHECK: loop_begin: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_begin - -inner_loop_begin: - %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] - %cond = load i1, i1* %cond.ptr - %b = load i32, i32* %b.ptr - br label %inner_inner_loop_begin -; CHECK: inner_loop_begin: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] -; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split - -inner_inner_loop_begin: %v1 = load i1, i1* %ptr - br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + br i1 %v1, label %loop_a, label %loop_b -inner_inner_loop_a: +loop_a: %v2 = load i1, i1* %ptr - br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit - -inner_inner_loop_b: - br i1 %cond, label %inner_inner_loop_latch, label %inner_inner_loop_exit + br i1 %v2, label %loop_begin, label %loop_exit -inner_inner_loop_latch: - br label %inner_inner_loop_begin -; The cloned region is now an exit from the inner loop. +loop_b: + br i1 %cond, label %loop_begin, label %loop_exit +; The cloned loop without one of the exits. ; -; CHECK: inner_loop_begin.split.us: -; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] -; CHECK-NEXT: br label %inner_inner_loop_begin.us +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us ; -; CHECK: inner_inner_loop_begin.us: +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us ; -; CHECK: inner_inner_loop_b.us: -; CHECK-NEXT: br label %inner_inner_loop_latch.us +; CHECK: loop_b.us: +; CHECK-NEXT: br label %loop_begin.backedge.us ; -; CHECK: inner_inner_loop_a.us: +; CHECK: loop_a.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us -; -; CHECK: inner_inner_loop_latch.us: -; CHECK-NEXT: br label %inner_inner_loop_begin.us +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us ; -; CHECK: inner_loop_exit.loopexit.split.us: -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a.us ] -; CHECK-NEXT: br label %inner_loop_exit.loopexit +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label %loop_begin.us ; -; The original region exits the loop earlier. +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit + +; The original loop without one 'loop_exit' edge. ; -; CHECK: inner_loop_begin.split: -; CHECK-NEXT: br label %inner_inner_loop_begin +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin ; -; CHECK: inner_inner_loop_begin: +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b ; -; CHECK: inner_inner_loop_a: +; CHECK: loop_a: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split.loopexit ; -; CHECK: inner_inner_loop_b: -; CHECK-NEXT: br label %inner_inner_loop_exit +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label %loop_begin ; -; CHECK: inner_inner_loop_latch: -; CHECK-NEXT: br label %inner_inner_loop_begin - -inner_inner_loop_exit: - %a2 = load i32, i32* %a.ptr - %v4 = load i1, i1* %ptr - br i1 %v4, label %inner_loop_exit, label %inner_loop_begin -; CHECK: inner_inner_loop_exit: -; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin - -inner_loop_exit: - %v5 = load i1, i1* %ptr - br i1 %v5, label %loop_exit, label %loop_begin -; CHECK: inner_loop_exit.loopexit.split: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] -; CHECK-NEXT: br label %inner_loop_exit.loopexit -; -; CHECK: inner_loop_exit.loopexit: -; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] -; CHECK-NEXT: br label %inner_loop_exit -; -; CHECK: inner_loop_exit.loopexit1: -; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] -; CHECK-NEXT: br label %inner_loop_exit -; -; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin +; CHECK: loop_b: +; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_PHI_SPLIT:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b ], [ %[[A_LCSSA_A]], %loop_exit.split.loopexit ] +; CHECK-NEXT: br label %loop_exit loop_exit: - %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] ret i32 %a.lcssa ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] -; CHECK-NEXT: ret i32 %[[A_LCSSA]] +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_PHI_SPLIT]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[A_PHI]] } -; Same pattern as @test8a but where the original loop looses an exit block and -; needs to be hoisted up the nest. -define i32 @test8b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test8b( +; Check that if a cloned inner loop after unswitching doesn't loop and directly +; exits even an outer loop, we don't add the cloned preheader to the outer +; loop and do add the needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11a( entry: br label %loop_begin ; CHECK-NEXT: entry: ; CHECK-NEXT: br label %loop_begin loop_begin: - %a = load i32, i32* %a.ptr - br label %inner_loop_begin + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph ; CHECK: loop_begin: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_begin +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph -inner_loop_begin: - %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] +inner_loop_ph: %cond = load i1, i1* %cond.ptr - %b = load i32, i32* %b.ptr - br label %inner_inner_loop_begin -; CHECK: inner_loop_begin: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] + br label %inner_loop_begin +; CHECK: inner_loop_ph: ; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_ph.split.us, label %inner_loop_ph.split -inner_inner_loop_begin: - %v1 = load i1, i1* %ptr - br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b +inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %loop_exit, label %inner_loop_a -inner_inner_loop_a: +inner_loop_a: %v2 = load i1, i1* %ptr - br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit - -inner_inner_loop_b: - br i1 %cond, label %inner_inner_loop_exit, label %inner_inner_loop_latch - -inner_inner_loop_latch: - br label %inner_inner_loop_begin -; The cloned region is similar to before but with one earlier exit. -; -; CHECK: inner_loop_begin.split.us: -; CHECK-NEXT: br label %inner_inner_loop_begin.us -; -; CHECK: inner_inner_loop_begin.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us -; -; CHECK: inner_inner_loop_b.us: -; CHECK-NEXT: br label %inner_inner_loop_exit.split.us -; -; CHECK: inner_inner_loop_a.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us -; -; CHECK: inner_inner_loop_latch.us: -; CHECK-NEXT: br label %inner_inner_loop_begin.us -; -; CHECK: inner_inner_loop_exit.split.us: -; CHECK-NEXT: br label %inner_inner_loop_exit + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. ; -; CHECK: inner_loop_exit.loopexit.split.us: -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] -; CHECK-NEXT: br label %inner_loop_exit.loopexit +; CHECK: inner_loop_ph.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] +; CHECK-NEXT: br label %inner_loop_begin.us ; -; The original region is now an exit in the preheader. +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.loopexit.split.us ; -; CHECK: inner_loop_begin.split: -; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] -; CHECK-NEXT: br label %inner_inner_loop_begin +; CHECK: loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] +; CHECK-NEXT: br label %loop_exit.loopexit ; -; CHECK: inner_inner_loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; The original remains a loop losing the exit edge. ; -; CHECK: inner_inner_loop_a: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: br label %inner_loop_begin ; -; CHECK: inner_inner_loop_b: -; CHECK-NEXT: br label %inner_inner_loop_latch +; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_a ; -; CHECK: inner_inner_loop_latch: -; CHECK-NEXT: br label %inner_inner_loop_begin - -inner_inner_loop_exit: - %a2 = load i32, i32* %a.ptr - %v4 = load i1, i1* %ptr - br i1 %v4, label %inner_loop_exit, label %inner_loop_begin -; CHECK: inner_inner_loop_exit: -; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK: inner_loop_a: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit, label %inner_loop_begin inner_loop_exit: - %v5 = load i1, i1* %ptr - br i1 %v5, label %loop_exit, label %loop_begin -; CHECK: inner_loop_exit.loopexit.split: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a ] -; CHECK-NEXT: br label %inner_loop_exit.loopexit -; -; CHECK: inner_loop_exit.loopexit: -; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] -; CHECK-NEXT: br label %inner_loop_exit -; -; CHECK: inner_loop_exit.loopexit1: -; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] -; CHECK-NEXT: br label %inner_loop_exit -; + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit ; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_a ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin loop_exit: - %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] ret i32 %a.lcssa +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] +; CHECK-NEXT: br label %loop_exit +; ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] -; CHECK-NEXT: ret i32 %[[A_LCSSA]] +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ] +; CHECK-NEXT: ret i32 %[[A_PHI]] } -; Test for when unswitching produces a clone of an inner loop but -; the clone no longer has an exiting edge *at all* and loops infinitely. -; Because it doesn't ever exit to the outer loop it is no longer an inner loop -; but needs to be hoisted up the nest to be a top-level loop. -define i32 @test9a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test9a( +; Check that if the original inner loop after unswitching doesn't loop and +; directly exits even an outer loop, we remove the original preheader from the +; outer loop and add needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11b( entry: br label %loop_begin ; CHECK-NEXT: entry: @@ -1350,120 +954,185 @@ loop_begin: %b = load i32, i32* %b.ptr - %cond = load i1, i1* %cond.ptr - br label %inner_loop_begin + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph ; CHECK: loop_begin: ; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph + +inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin +; CHECK: inner_loop_ph: ; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_ph.split.us, label %inner_loop_ph.split inner_loop_begin: + call void @sink1(i32 %b) %a = load i32, i32* %a.ptr - br i1 %cond, label %inner_loop_latch, label %inner_loop_exit + br i1 %cond, label %inner_loop_a, label %loop_exit -inner_loop_latch: - call void @sink1(i32 %b) - br label %inner_loop_begin -; The cloned inner loop ends up as an infinite loop and thus being a top-level -; loop with the preheader as an exit block of the outer loop. +inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. ; -; CHECK: loop_begin.split.us -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ] +; CHECK: inner_loop_ph.split.us: ; CHECK-NEXT: br label %inner_loop_begin.us ; ; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_latch.us +; CHECK-NEXT: br label %inner_loop_a.us ; -; CHECK: inner_loop_latch.us: -; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) -; CHECK-NEXT: br label %inner_loop_begin.us +; CHECK: inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.split.us, label %inner_loop_begin.us +; +; CHECK: inner_loop_exit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit ; -; The original loop becomes boring non-loop code. +; The original remains a loop losing the exit edge. ; -; CHECK: loop_begin.split +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] ; CHECK-NEXT: br label %inner_loop_begin ; ; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_exit +; CHECK-NEXT: br label %loop_exit.loopexit inner_loop_exit: - %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ] - %v = load i1, i1* %ptr - br i1 %v, label %loop_begin, label %loop_exit + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit ; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin loop_exit: - %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] ret i32 %a.lcssa +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ] +; CHECK-NEXT: br label %loop_exit +; ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] -; CHECK-NEXT: ret i32 %[[A_LCSSA]] +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[A_LCSSA_US]], %loop_exit.loopexit1 ] +; CHECK-NEXT: ret i32 %[[A_PHI]] } -; The same core pattern as @test9a, but instead of the cloned loop becoming an -; infinite loop, the original loop has its only exit unswitched and the -; original loop becomes infinite and must be hoisted out of the loop nest. -define i32 @test9b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test9b( +; Like test11a, but checking that when the whole thing is wrapped in yet +; another loop, we correctly attribute the cloned preheader to that outermost +; loop rather than only handling the case where the preheader is not in any loop +; at all. +define i32 @test12a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12a( entry: br label %loop_begin ; CHECK-NEXT: entry: ; CHECK-NEXT: br label %loop_begin loop_begin: - %b = load i32, i32* %b.ptr - %cond = load i1, i1* %cond.ptr br label %inner_loop_begin ; CHECK: loop_begin: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph +; CHECK: inner_loop_begin: ; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph + +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin +; CHECK: inner_inner_loop_ph: ; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split -inner_loop_begin: +inner_inner_loop_begin: + call void @sink1(i32 %b) %a = load i32, i32* %a.ptr - br i1 %cond, label %inner_loop_exit, label %inner_loop_latch + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_a -inner_loop_latch: - call void @sink1(i32 %b) - br label %inner_loop_begin -; The cloned inner loop becomes a boring non-loop. +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. ; -; CHECK: loop_begin.split.us -; CHECK-NEXT: br label %inner_loop_begin.us +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] +; CHECK-NEXT: br label %inner_inner_loop_begin.us ; -; CHECK: inner_loop_begin.us: +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_exit.split.us +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split.us ; -; CHECK: inner_loop_exit.split.us -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] -; CHECK-NEXT: br label %inner_loop_exit +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit ; -; The original loop becomes an infinite loop and thus a top-level loop with the -; preheader as an exit block for the outer loop. +; The original remains a loop losing the exit edge. ; -; CHECK: loop_begin.split -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ] -; CHECK-NEXT: br label %inner_loop_begin +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: br label %inner_inner_loop_begin ; -; CHECK: inner_loop_begin: +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_latch +; CHECK-NEXT: br label %inner_inner_loop_a ; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit, label %inner_inner_loop_begin + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 + +inner_loop_latch: + br label %inner_loop_begin ; CHECK: inner_loop_latch: -; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) ; CHECK-NEXT: br label %inner_loop_begin inner_loop_exit: - %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ] - %v = load i1, i1* %ptr - br i1 %v, label %loop_begin, label %loop_exit + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; ; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr ; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit @@ -1471,1573 +1140,429 @@ %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] ret i32 %a.lcssa ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ] +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] ; CHECK-NEXT: ret i32 %[[A_LCSSA]] } -; Test that requires re-forming dedicated exits for the cloned loop. -define i32 @test10a(i1* %ptr, i1 %cond, i32* %a.ptr) { -; CHECK-LABEL: @test10a( +; Like test11b, but checking that when the whole thing is wrapped in yet +; another loop, we correctly sink the preheader to the outermost loop rather +; than only handling the case where the preheader is completely removed from +; a loop. +define i32 @test12b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12b( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split +; CHECK-NEXT: br label %loop_begin loop_begin: - %a = load i32, i32* %a.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %loop_a, label %loop_b - -loop_a: - %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_exit, label %loop_begin - -loop_b: - br i1 %cond, label %loop_exit, label %loop_begin -; The cloned loop with one edge as a direct exit. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us -; -; CHECK: loop_b.us: -; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin.us ] -; CHECK-NEXT: br label %loop_exit.split.us -; -; CHECK: loop_a.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_begin.backedge.us -; -; CHECK: loop_begin.backedge.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_exit.split.us.loopexit: -; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a.us ] -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_PHI_US:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b.us ], [ %[[A_LCSSA_A]], %loop_exit.split.us.loopexit ] -; CHECK-NEXT: br label %loop_exit - -; The original loop without one 'loop_exit' edge. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; + br label %inner_loop_begin ; CHECK: loop_begin: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b -; -; CHECK: loop_a: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_begin.backedge -; -; CHECK: loop_begin.backedge: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_b: -; CHECK-NEXT: br label %loop_begin.backedge -; -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] -; CHECK-NEXT: br label %loop_exit +; CHECK-NEXT: br label %inner_loop_begin -loop_exit: - %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] - ret i32 %a.lcssa -; CHECK: loop_exit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_PHI_US]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[A_PHI]] -} +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph -; Test that requires re-forming dedicated exits for the original loop. -define i32 @test10b(i1* %ptr, i1 %cond, i32* %a.ptr) { -; CHECK-LABEL: @test10b( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin +; CHECK: inner_inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split -loop_begin: +inner_inner_loop_begin: + call void @sink1(i32 %b) %a = load i32, i32* %a.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %loop_a, label %loop_b + br i1 %cond, label %inner_inner_loop_a, label %inner_loop_exit -loop_a: +inner_inner_loop_a: %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_begin, label %loop_exit - -loop_b: - br i1 %cond, label %loop_begin, label %loop_exit -; The cloned loop without one of the exits. + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. ; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us ; -; CHECK: loop_begin.us: +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us -; -; CHECK: loop_b.us: -; CHECK-NEXT: br label %loop_begin.backedge.us +; CHECK-NEXT: br label %inner_inner_loop_a.us ; -; CHECK: loop_a.us: +; CHECK: inner_inner_loop_a.us: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_begin.us ; -; CHECK: loop_begin.backedge.us: -; CHECK-NEXT: br label %loop_begin.us +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_inner_loop_exit ; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] -; CHECK-NEXT: br label %loop_exit - -; The original loop without one 'loop_exit' edge. +; The original remains a loop losing the exit edge. ; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] +; CHECK-NEXT: br label %inner_inner_loop_begin ; -; CHECK: loop_begin: +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) ; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_exit.loopexit + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit +; CHECK: inner_inner_loop_exit: ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 + +inner_loop_latch: + br label %inner_loop_begin +; CHECK: inner_loop_latch: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin ] +; CHECK-NEXT: br label %inner_loop_exit ; -; CHECK: loop_a: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split.loopexit -; -; CHECK: loop_begin.backedge: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_b: -; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin ] -; CHECK-NEXT: br label %loop_exit.split -; -; CHECK: loop_exit.split.loopexit: -; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a ] -; CHECK-NEXT: br label %loop_exit.split -; -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[A_PHI_SPLIT:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b ], [ %[[A_LCSSA_A]], %loop_exit.split.loopexit ] -; CHECK-NEXT: br label %loop_exit - -loop_exit: - %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] - ret i32 %a.lcssa -; CHECK: loop_exit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_PHI_SPLIT]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[A_PHI]] -} - -; Check that if a cloned inner loop after unswitching doesn't loop and directly -; exits even an outer loop, we don't add the cloned preheader to the outer -; loop and do add the needed LCSSA phi nodes for the new exit block from the -; outer loop. -define i32 @test11a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test11a( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin - -loop_begin: - %b = load i32, i32* %b.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %loop_latch, label %inner_loop_ph -; CHECK: loop_begin: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph - -inner_loop_ph: - %cond = load i1, i1* %cond.ptr - br label %inner_loop_begin -; CHECK: inner_loop_ph: -; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split - -inner_loop_begin: - call void @sink1(i32 %b) - %a = load i32, i32* %a.ptr - br i1 %cond, label %loop_exit, label %inner_loop_a - -inner_loop_a: - %v2 = load i1, i1* %ptr - br i1 %v2, label %inner_loop_exit, label %inner_loop_begin -; The cloned path doesn't actually loop and is an exit from the outer loop as -; well. -; -; CHECK: inner_loop_ph.split.us: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] -; CHECK-NEXT: br label %inner_loop_begin.us -; -; CHECK: inner_loop_begin.us: -; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit.loopexit.split.us -; -; CHECK: loop_exit.loopexit.split.us: -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] -; CHECK-NEXT: br label %loop_exit.loopexit -; -; The original remains a loop losing the exit edge. -; -; CHECK: inner_loop_ph.split: -; CHECK-NEXT: br label %inner_loop_begin -; -; CHECK: inner_loop_begin: -; CHECK-NEXT: call void @sink1(i32 %[[B]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_a -; -; CHECK: inner_loop_a: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit, label %inner_loop_begin - -inner_loop_exit: - %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] - %v3 = load i1, i1* %ptr - br i1 %v3, label %loop_latch, label %loop_exit -; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_a ] -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 - -loop_latch: - br label %loop_begin -; CHECK: loop_latch: -; CHECK-NEXT: br label %loop_begin - -loop_exit: - %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] - ret i32 %a.lcssa -; CHECK: loop_exit.loopexit: -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit.loopexit1: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ] -; CHECK-NEXT: ret i32 %[[A_PHI]] -} - -; Check that if the original inner loop after unswitching doesn't loop and -; directly exits even an outer loop, we remove the original preheader from the -; outer loop and add needed LCSSA phi nodes for the new exit block from the -; outer loop. -define i32 @test11b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test11b( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin - -loop_begin: - %b = load i32, i32* %b.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %loop_latch, label %inner_loop_ph -; CHECK: loop_begin: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph - -inner_loop_ph: - %cond = load i1, i1* %cond.ptr - br label %inner_loop_begin -; CHECK: inner_loop_ph: -; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split - -inner_loop_begin: - call void @sink1(i32 %b) - %a = load i32, i32* %a.ptr - br i1 %cond, label %inner_loop_a, label %loop_exit - -inner_loop_a: - %v2 = load i1, i1* %ptr - br i1 %v2, label %inner_loop_exit, label %inner_loop_begin -; The cloned path continues to loop without the exit out of the entire nest. -; -; CHECK: inner_loop_ph.split.us: -; CHECK-NEXT: br label %inner_loop_begin.us -; -; CHECK: inner_loop_begin.us: -; CHECK-NEXT: call void @sink1(i32 %[[B]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_a.us -; -; CHECK: inner_loop_a.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.split.us, label %inner_loop_begin.us -; -; CHECK: inner_loop_exit.split.us: -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_a.us ] -; CHECK-NEXT: br label %inner_loop_exit -; -; The original remains a loop losing the exit edge. -; -; CHECK: inner_loop_ph.split: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] -; CHECK-NEXT: br label %inner_loop_begin -; -; CHECK: inner_loop_begin: -; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %loop_exit.loopexit - -inner_loop_exit: - %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] - %v3 = load i1, i1* %ptr - br i1 %v3, label %loop_latch, label %loop_exit -; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 - -loop_latch: - br label %loop_begin -; CHECK: loop_latch: -; CHECK-NEXT: br label %loop_begin - -loop_exit: - %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] - ret i32 %a.lcssa -; CHECK: loop_exit.loopexit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ] -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit.loopexit1: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ] -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[A_LCSSA_US]], %loop_exit.loopexit1 ] -; CHECK-NEXT: ret i32 %[[A_PHI]] -} - -; Like test11a, but checking that when the whole thing is wrapped in yet -; another loop, we correctly attribute the cloned preheader to that outermost -; loop rather than only handling the case where the preheader is not in any loop -; at all. -define i32 @test12a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test12a( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin - -loop_begin: - br label %inner_loop_begin -; CHECK: loop_begin: -; CHECK-NEXT: br label %inner_loop_begin - -inner_loop_begin: - %b = load i32, i32* %b.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph -; CHECK: inner_loop_begin: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph - -inner_inner_loop_ph: - %cond = load i1, i1* %cond.ptr - br label %inner_inner_loop_begin -; CHECK: inner_inner_loop_ph: -; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split - -inner_inner_loop_begin: - call void @sink1(i32 %b) - %a = load i32, i32* %a.ptr - br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_a - -inner_inner_loop_a: - %v2 = load i1, i1* %ptr - br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin -; The cloned path doesn't actually loop and is an exit from the outer loop as -; well. -; -; CHECK: inner_inner_loop_ph.split.us: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] -; CHECK-NEXT: br label %inner_inner_loop_begin.us -; -; CHECK: inner_inner_loop_begin.us: -; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_exit.loopexit.split.us -; -; CHECK: inner_loop_exit.loopexit.split.us: -; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin.us ] -; CHECK-NEXT: br label %inner_loop_exit.loopexit -; -; The original remains a loop losing the exit edge. -; -; CHECK: inner_inner_loop_ph.split: -; CHECK-NEXT: br label %inner_inner_loop_begin -; -; CHECK: inner_inner_loop_begin: -; CHECK-NEXT: call void @sink1(i32 %[[B]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_inner_loop_a -; -; CHECK: inner_inner_loop_a: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit, label %inner_inner_loop_begin - -inner_inner_loop_exit: - %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] - %v3 = load i1, i1* %ptr - br i1 %v3, label %inner_loop_latch, label %inner_loop_exit -; CHECK: inner_inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a ] -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 - -inner_loop_latch: - br label %inner_loop_begin -; CHECK: inner_loop_latch: -; CHECK-NEXT: br label %inner_loop_begin - -inner_loop_exit: - %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] - %v4 = load i1, i1* %ptr - br i1 %v4, label %loop_begin, label %loop_exit -; CHECK: inner_loop_exit.loopexit: -; CHECK-NEXT: br label %inner_loop_exit -; -; CHECK: inner_loop_exit.loopexit1: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_exit ] -; CHECK-NEXT: br label %inner_loop_exit -; -; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ] -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit - -loop_exit: - %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] - ret i32 %a.lcssa -; CHECK: loop_exit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] -; CHECK-NEXT: ret i32 %[[A_LCSSA]] -} - -; Like test11b, but checking that when the whole thing is wrapped in yet -; another loop, we correctly sink the preheader to the outermost loop rather -; than only handling the case where the preheader is completely removed from -; a loop. -define i32 @test12b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test12b( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin - -loop_begin: - br label %inner_loop_begin -; CHECK: loop_begin: -; CHECK-NEXT: br label %inner_loop_begin - -inner_loop_begin: - %b = load i32, i32* %b.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph -; CHECK: inner_loop_begin: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph - -inner_inner_loop_ph: - %cond = load i1, i1* %cond.ptr - br label %inner_inner_loop_begin -; CHECK: inner_inner_loop_ph: -; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr -; CHECK-NEXT: br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split - -inner_inner_loop_begin: - call void @sink1(i32 %b) - %a = load i32, i32* %a.ptr - br i1 %cond, label %inner_inner_loop_a, label %inner_loop_exit - -inner_inner_loop_a: - %v2 = load i1, i1* %ptr - br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin -; The cloned path continues to loop without the exit out of the entire nest. -; -; CHECK: inner_inner_loop_ph.split.us: -; CHECK-NEXT: br label %inner_inner_loop_begin.us -; -; CHECK: inner_inner_loop_begin.us: -; CHECK-NEXT: call void @sink1(i32 %[[B]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_inner_loop_a.us -; -; CHECK: inner_inner_loop_a.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_begin.us -; -; CHECK: inner_inner_loop_exit.split.us: -; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a.us ] -; CHECK-NEXT: br label %inner_inner_loop_exit -; -; The original remains a loop losing the exit edge. -; -; CHECK: inner_inner_loop_ph.split: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] -; CHECK-NEXT: br label %inner_inner_loop_begin -; -; CHECK: inner_inner_loop_begin: -; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: br label %inner_loop_exit.loopexit - -inner_inner_loop_exit: - %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] - %v3 = load i1, i1* %ptr - br i1 %v3, label %inner_loop_latch, label %inner_loop_exit -; CHECK: inner_inner_loop_exit: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 - -inner_loop_latch: - br label %inner_loop_begin -; CHECK: inner_loop_latch: -; CHECK-NEXT: br label %inner_loop_begin - -inner_loop_exit: - %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] - %v4 = load i1, i1* %ptr - br i1 %v4, label %loop_begin, label %loop_exit -; CHECK: inner_loop_exit.loopexit: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin ] -; CHECK-NEXT: br label %inner_loop_exit -; -; CHECK: inner_loop_exit.loopexit1: -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit ] -; CHECK-NEXT: br label %inner_loop_exit -; -; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit1 ] -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit - -loop_exit: - %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] - ret i32 %a.lcssa -; CHECK: loop_exit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] -; CHECK-NEXT: ret i32 %[[A_LCSSA]] -} - -; Test where the cloned loop has an inner loop that has to be traversed to form -; the cloned loop, and where this inner loop has multiple blocks, and where the -; exiting block that connects the inner loop to the cloned loop is not the header -; block. This ensures that we correctly handle interesting corner cases of -; traversing back to the header when establishing the cloned loop. -define i32 @test13a(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test13a( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split - -loop_begin: - %a = load i32, i32* %a.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %loop_a, label %loop_b - -loop_a: - %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_exit, label %loop_latch - -loop_b: - %b = load i32, i32* %b.ptr - br i1 %cond, label %loop_b_inner_ph, label %loop_exit - -loop_b_inner_ph: - br label %loop_b_inner_header - -loop_b_inner_header: - %v3 = load i1, i1* %ptr - br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body - -loop_b_inner_body: - %v4 = load i1, i1* %ptr - br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit - -loop_b_inner_latch: - br label %loop_b_inner_header - -loop_b_inner_exit: - br label %loop_latch - -loop_latch: - br label %loop_begin -; The cloned loop contains an inner loop within it. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us -; -; CHECK: loop_b.us: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br label %loop_b_inner_ph.us -; -; CHECK: loop_b_inner_ph.us: -; CHECK-NEXT: br label %loop_b_inner_header.us -; -; CHECK: loop_b_inner_header.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_body.us -; -; CHECK: loop_b_inner_body.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_exit.us -; -; CHECK: loop_b_inner_exit.us: -; CHECK-NEXT: br label %loop_latch.us -; -; CHECK: loop_b_inner_latch.us: -; CHECK-NEXT: br label %loop_b_inner_header.us -; -; CHECK: loop_a.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %loop_latch.us -; -; CHECK: loop_latch.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] -; CHECK-NEXT: br label %loop_exit -; -; And the original loop no longer contains an inner loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b -; -; CHECK: loop_a: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.loopexit, label %loop_latch -; -; CHECK: loop_b: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br label %loop_exit.split -; -; CHECK: loop_latch: -; CHECK-NEXT: br label %loop_begin - -loop_exit: - %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] - ret i32 %lcssa -; CHECK: loop_exit.split.loopexit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] -; CHECK-NEXT: br label %loop_exit.split -; -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B]], %loop_b ], [ %[[A_LCSSA]], %loop_exit.split.loopexit ] -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[AB_PHI]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[AB_PHI_US]] -} - -; Test where the original loop has an inner loop that has to be traversed to -; rebuild the loop, and where this inner loop has multiple blocks, and where -; the exiting block that connects the inner loop to the original loop is not -; the header block. This ensures that we correctly handle interesting corner -; cases of traversing back to the header when re-establishing the original loop -; still exists after unswitching. -define i32 @test13b(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { -; CHECK-LABEL: @test13b( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split - -loop_begin: - %a = load i32, i32* %a.ptr - %v1 = load i1, i1* %ptr - br i1 %v1, label %loop_a, label %loop_b - -loop_a: - %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_exit, label %loop_latch - -loop_b: - %b = load i32, i32* %b.ptr - br i1 %cond, label %loop_exit, label %loop_b_inner_ph - -loop_b_inner_ph: - br label %loop_b_inner_header - -loop_b_inner_header: - %v3 = load i1, i1* %ptr - br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body - -loop_b_inner_body: - %v4 = load i1, i1* %ptr - br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit - -loop_b_inner_latch: - br label %loop_b_inner_header - -loop_b_inner_exit: - br label %loop_latch - -loop_latch: - br label %loop_begin -; The cloned loop doesn't contain an inner loop. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us -; -; CHECK: loop_b.us: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br label %loop_exit.split.us -; -; CHECK: loop_a.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_latch.us -; -; CHECK: loop_latch.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_exit.split.us.loopexit: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] -; CHECK-NEXT: br label %loop_exit.split.us -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[B]], %loop_b.us ], [ %[[A_LCSSA_US]], %loop_exit.split.us.loopexit ] -; CHECK-NEXT: br label %loop_exit -; -; But the original loop contains an inner loop that must be traversed.; -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b -; -; CHECK: loop_a: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_latch -; -; CHECK: loop_b: -; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr -; CHECK-NEXT: br label %loop_b_inner_ph -; -; CHECK: loop_b_inner_ph: -; CHECK-NEXT: br label %loop_b_inner_header -; -; CHECK: loop_b_inner_header: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_body -; -; CHECK: loop_b_inner_body: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_exit -; -; CHECK: loop_b_inner_latch: -; CHECK-NEXT: br label %loop_b_inner_header -; -; CHECK: loop_b_inner_exit: -; CHECK-NEXT: br label %loop_latch -; -; CHECK: loop_latch: -; CHECK-NEXT: br label %loop_begin - -loop_exit: - %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] - ret i32 %lcssa -; CHECK: loop_exit.split: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[AB_PHI_US]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[AB_PHI]] -} - -define i32 @test20(i32* %var, i32 %cond1, i32 %cond2) { -; CHECK-LABEL: @test20( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 %cond2, label %[[ENTRY_SPLIT_EXIT:.*]] [ -; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] -; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_A]] -; CHECK-NEXT: i32 13, label %[[ENTRY_SPLIT_B:.*]] -; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_A]] -; CHECK-NEXT: i32 42, label %[[ENTRY_SPLIT_C:.*]] -; CHECK-NEXT: ] - -loop_begin: - %var_val = load i32, i32* %var - switch i32 %cond2, label %loop_exit [ - i32 0, label %loop_a - i32 1, label %loop_a - i32 13, label %loop_b - i32 2, label %loop_a - i32 42, label %loop_c - ] - -loop_a: - call i32 @a() - br label %loop_latch -; Unswitched 'a' loop. -; -; CHECK: [[ENTRY_SPLIT_A]]: -; CHECK-NEXT: br label %[[LOOP_BEGIN_A:.*]] -; -; CHECK: [[LOOP_BEGIN_A]]: -; CHECK-NEXT: %{{.*}} = load i32, i32* %var -; CHECK-NEXT: br label %[[LOOP_A:.*]] -; -; CHECK: [[LOOP_A]]: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]] -; -; CHECK: [[LOOP_LATCH_A]]: -; CHECK: br label %[[LOOP_BEGIN_A]] - -loop_b: - call i32 @b() - br label %loop_latch -; Unswitched 'b' loop. -; -; CHECK: [[ENTRY_SPLIT_B]]: -; CHECK-NEXT: br label %[[LOOP_BEGIN_B:.*]] -; -; CHECK: [[LOOP_BEGIN_B]]: -; CHECK-NEXT: %{{.*}} = load i32, i32* %var -; CHECK-NEXT: br label %[[LOOP_B:.*]] -; -; CHECK: [[LOOP_B]]: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]] -; -; CHECK: [[LOOP_LATCH_B]]: -; CHECK: br label %[[LOOP_BEGIN_B]] - -loop_c: - call i32 @c() noreturn nounwind - br label %loop_latch -; Unswitched 'c' loop. -; -; CHECK: [[ENTRY_SPLIT_C]]: -; CHECK-NEXT: br label %[[LOOP_BEGIN_C:.*]] -; -; CHECK: [[LOOP_BEGIN_C]]: -; CHECK-NEXT: %{{.*}} = load i32, i32* %var -; CHECK-NEXT: br label %[[LOOP_C:.*]] -; -; CHECK: [[LOOP_C]]: -; CHECK-NEXT: call i32 @c() -; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]] -; -; CHECK: [[LOOP_LATCH_C]]: -; CHECK: br label %[[LOOP_BEGIN_C]] - -loop_latch: - br label %loop_begin - -loop_exit: - %lcssa = phi i32 [ %var_val, %loop_begin ] - ret i32 %lcssa -; Unswitched exit edge (no longer a loop). -; -; CHECK: [[ENTRY_SPLIT_EXIT]]: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i32, i32* %var -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %[[V]], %loop_begin ] -; CHECK-NEXT: ret i32 %[[LCSSA]] -} - -; Negative test: we do not switch when the loop contains unstructured control -; flows as it would significantly complicate the process as novel loops might -; be formed, etc. -define void @test_no_unswitch_unstructured_cfg(i1* %ptr, i1 %cond) { -; CHECK-LABEL: @test_no_unswitch_unstructured_cfg( -entry: - br label %loop_begin - -loop_begin: - br i1 %cond, label %loop_left, label %loop_right - -loop_left: - %v1 = load i1, i1* %ptr - br i1 %v1, label %loop_right, label %loop_merge - -loop_right: - %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_left, label %loop_merge - -loop_merge: - %v3 = load i1, i1* %ptr - br i1 %v3, label %loop_latch, label %loop_exit - -loop_latch: - br label %loop_begin - -loop_exit: - ret void -} - -; A test reduced out of 403.gcc with interesting nested loops that trigger -; multiple unswitches. A key component of this test is that there are multiple -; paths to reach an inner loop after unswitching, and one of them is via the -; predecessors of the unswitched loop header. That can allow us to find the loop -; through multiple different paths. -define void @test21(i1 %a, i1 %b) { -; CHECK-LABEL: @test21( -bb: - br label %bb3 -; CHECK-NOT: br i1 %a -; -; CHECK: br i1 %a, label %[[BB_SPLIT_US:.*]], label %[[BB_SPLIT:.*]] -; -; CHECK-NOT: br i1 %a -; CHECK-NOT: br i1 %b -; -; CHECK: [[BB_SPLIT]]: -; CHECK: br i1 %b -; -; CHECK-NOT: br i1 %a -; CHECK-NOT: br i1 %b - -bb3: - %tmp1.0 = phi i32 [ 0, %bb ], [ %tmp1.3, %bb23 ] - br label %bb7 - -bb7: - %tmp.0 = phi i1 [ true, %bb3 ], [ false, %bb19 ] - %tmp1.1 = phi i32 [ %tmp1.0, %bb3 ], [ %tmp1.2.lcssa, %bb19 ] - br i1 %tmp.0, label %bb11.preheader, label %bb23 - -bb11.preheader: - br i1 %a, label %bb19, label %bb14.lr.ph - -bb14.lr.ph: - br label %bb14 - -bb14: - %tmp2.02 = phi i32 [ 0, %bb14.lr.ph ], [ 1, %bb14 ] - br i1 %b, label %bb11.bb19_crit_edge, label %bb14 - -bb11.bb19_crit_edge: - %split = phi i32 [ %tmp2.02, %bb14 ] - br label %bb19 - -bb19: - %tmp1.2.lcssa = phi i32 [ %split, %bb11.bb19_crit_edge ], [ %tmp1.1, %bb11.preheader ] - %tmp21 = icmp eq i32 %tmp1.2.lcssa, 0 - br i1 %tmp21, label %bb23, label %bb7 - -bb23: - %tmp1.3 = phi i32 [ %tmp1.2.lcssa, %bb19 ], [ %tmp1.1, %bb7 ] - br label %bb3 -} - -; A test reduced out of 400.perlbench that when unswitching the `%stop` -; condition clones a loop nest outside of a containing loop. This excercises a -; different cloning path from our other test cases and in turn verifying the -; resulting structure can catch any failures to correctly clone these nested -; loops. -declare void @f() -declare void @g() -declare i32 @h(i32 %arg) -define void @test22(i32 %arg) { -; CHECK-LABEL: define void @test22( -entry: - br label %loop1.header - -loop1.header: - %stop = phi i1 [ true, %loop1.latch ], [ false, %entry ] - %i = phi i32 [ %i.lcssa, %loop1.latch ], [ %arg, %entry ] -; CHECK: %[[I:.*]] = phi i32 [ %{{.*}}, %loop1.latch ], [ %arg, %entry ] - br i1 %stop, label %loop1.exit, label %loop1.body.loop2.ph -; CHECK: br i1 %stop, label %loop1.exit, label %loop1.body.loop2.ph - -loop1.body.loop2.ph: - br label %loop2.header -; Just check that the we unswitched the key condition and that leads to the -; inner loop header. -; -; CHECK: loop1.body.loop2.ph: -; CHECK-NEXT: br i1 %stop, label %[[SPLIT_US:.*]], label %[[SPLIT:.*]] -; -; CHECK: [[SPLIT_US]]: -; CHECK-NEXT: br label %[[LOOP2_HEADER_US:.*]] -; -; CHECK: [[LOOP2_HEADER_US]]: -; CHECK-NEXT: %{{.*}} = phi i32 [ %[[I]], %[[SPLIT_US]] ] -; -; CHECK: [[SPLIT]]: -; CHECK-NEXT: br label %[[LOOP2_HEADER:.*]] -; -; CHECK: [[LOOP2_HEADER]]: -; CHECK-NEXT: %{{.*}} = phi i32 [ %[[I]], %[[SPLIT]] ] - -loop2.header: - %i.inner = phi i32 [ %i, %loop1.body.loop2.ph ], [ %i.next, %loop2.latch ] - br label %loop3.header - -loop3.header: - %sw = call i32 @h(i32 %i.inner) - switch i32 %sw, label %loop3.exit [ - i32 32, label %loop3.header - i32 59, label %loop2.latch - i32 36, label %loop1.latch - ] - -loop2.latch: - %i.next = add i32 %i.inner, 1 - br i1 %stop, label %loop2.exit, label %loop2.header - -loop1.latch: - %i.lcssa = phi i32 [ %i.inner, %loop3.header ] - br label %loop1.header - -loop3.exit: - call void @f() - ret void - -loop2.exit: - call void @g() - ret void - -loop1.exit: - call void @g() - ret void -} - -; Test that when we are unswitching and need to rebuild the loop block set we -; correctly skip past inner loops. We want to use the inner loop to efficiently -; skip whole subregions of the outer loop blocks but just because the header of -; the outer loop is also the preheader of an inner loop shouldn't confuse this -; walk. -define void @test23(i1 %arg, i1* %ptr) { -; CHECK-LABEL: define void @test23( -entry: - br label %outer.header -; CHECK: entry: -; CHECK-NEXT: br i1 %arg, -; -; Just verify that we unswitched the correct bits. We should call `@f` twice in -; one unswitch and `@f` and then `@g` in the other. -; CHECK: call void -; CHECK-SAME: @f -; CHECK: call void -; CHECK-SAME: @f -; -; CHECK: call void -; CHECK-SAME: @f -; CHECK: call void -; CHECK-SAME: @g - -outer.header: - br label %inner.header - -inner.header: - call void @f() - br label %inner.latch - -inner.latch: - %inner.cond = load i1, i1* %ptr - br i1 %inner.cond, label %inner.header, label %outer.body - -outer.body: - br i1 %arg, label %outer.body.left, label %outer.body.right - -outer.body.left: - call void @f() - br label %outer.latch - -outer.body.right: - call void @g() - br label %outer.latch - -outer.latch: - %outer.cond = load i1, i1* %ptr - br i1 %outer.cond, label %outer.header, label %exit - -exit: - ret void -} - -; Non-trivial loop unswitching where there are two invariant conditions, but the -; second one is only in the cloned copy of the loop after unswitching. -define i32 @test24(i1* %ptr, i1 %cond1, i1 %cond2) { -; CHECK-LABEL: @test24( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split - -loop_begin: - br i1 %cond1, label %loop_a, label %loop_b - -loop_a: - br i1 %cond2, label %loop_a_a, label %loop_a_c -; The second unswitched condition. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br i1 %cond2, label %entry.split.us.split.us, label %entry.split.us.split - -loop_a_a: - call i32 @a() - br label %latch -; The 'loop_a_a' unswitched loop. -; -; CHECK: entry.split.us.split.us: -; CHECK-NEXT: br label %loop_begin.us.us -; -; CHECK: loop_begin.us.us: -; CHECK-NEXT: br label %loop_a.us.us -; -; CHECK: loop_a.us.us: -; CHECK-NEXT: br label %loop_a_a.us.us -; -; CHECK: loop_a_a.us.us: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %latch.us.us -; -; CHECK: latch.us.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us.us, label %loop_exit.split.us.split.us -; -; CHECK: loop_exit.split.us.split.us: -; CHECK-NEXT: br label %loop_exit.split - -loop_a_c: - call i32 @c() - br label %latch -; The 'loop_a_c' unswitched loop. -; -; CHECK: entry.split.us.split: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: br label %loop_a.us -; -; CHECK: loop_a.us: -; CHECK-NEXT: br label %loop_a_c.us -; -; CHECK: loop_a_c.us: -; CHECK-NEXT: call i32 @c() -; CHECK-NEXT: br label %latch -; -; CHECK: latch.us: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us.split -; -; CHECK: loop_exit.split.us.split: -; CHECK-NEXT: br label %loop_exit.split - -loop_b: - call i32 @b() - br label %latch -; The 'loop_b' unswitched loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: br label %loop_b -; -; CHECK: loop_b: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch -; -; CHECK: latch: -; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split -; -; CHECK: loop_exit.split: -; CHECK-NEXT: br label %loop_exit - -latch: - %v = load i1, i1* %ptr - br i1 %v, label %loop_begin, label %loop_exit - -loop_exit: - ret i32 0 -; CHECK: loop_exit: -; CHECK-NEXT: ret -} - -; Non-trivial partial loop unswitching of an invariant input to an 'or'. -define i32 @test25(i1* %ptr, i1 %cond) { -; CHECK-LABEL: @test25( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split - -loop_begin: - %v1 = load i1, i1* %ptr - %cond_or = or i1 %v1, %cond - br i1 %cond_or, label %loop_a, label %loop_b - -loop_a: - call i32 @a() - br label %latch -; The 'loop_a' unswitched loop. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: br label %loop_a.us -; -; CHECK: loop_a.us: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %latch.us -; -; CHECK: latch.us: -; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V2_US]], label %loop_begin.us, label %loop_exit.split.us -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: br label %loop_exit - -loop_b: - call i32 @b() - br label %latch -; The original loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr -; CHECK-NEXT: %[[OR:.*]] = or i1 %[[V1]], false -; CHECK-NEXT: br i1 %[[OR]], label %loop_a, label %loop_b -; -; CHECK: loop_a: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %latch +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit ; -; CHECK: loop_b: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch - -latch: - %v2 = load i1, i1* %ptr - br i1 %v2, label %loop_begin, label %loop_exit -; CHECK: latch: -; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr -; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.split +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit1 ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit loop_exit: - ret i32 0 -; CHECK: loop_exit.split: -; CHECK-NEXT: br label %loop_exit -; + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa ; CHECK: loop_exit: -; CHECK-NEXT: ret +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] } -; Non-trivial partial loop unswitching of multiple invariant inputs to an `and` -; chain. -define i32 @test26(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) { -; CHECK-LABEL: @test26( +; Test where the cloned loop has an inner loop that has to be traversed to form +; the cloned loop, and where this inner loop has multiple blocks, and where the +; exiting block that connects the inner loop to the cloned loop is not the header +; block. This ensures that we correctly handle interesting corner cases of +; traversing back to the header when establishing the cloned loop. +define i32 @test13a(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13a( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[INV_AND:.*]] = and i1 %cond3, %cond1 -; CHECK-NEXT: br i1 %[[INV_AND]], label %entry.split, label %entry.split.us +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split loop_begin: - %v1 = load i1, i1* %ptr1 - %v2 = load i1, i1* %ptr2 - %cond_and1 = and i1 %v1, %cond1 - %cond_or1 = or i1 %v2, %cond2 - %cond_and2 = and i1 %cond_and1, %cond_or1 - %cond_and3 = and i1 %cond_and2, %cond3 - br i1 %cond_and3, label %loop_a, label %loop_b -; The 'loop_b' unswitched loop. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: br label %loop_b.us -; -; CHECK: loop_b.us: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch.us -; -; CHECK: latch.us: -; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3 -; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: br label %loop_exit - -; The original loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1 -; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2 -; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V1]], true -; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V2]], %cond2 -; CHECK-NEXT: %[[AND2:.*]] = and i1 %[[AND1]], %[[OR1]] -; CHECK-NEXT: %[[AND3:.*]] = and i1 %[[AND2]], true -; CHECK-NEXT: br i1 %[[AND3]], label %loop_a, label %loop_b + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b loop_a: - call i32 @a() - br label %latch -; CHECK: loop_a: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %latch + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch loop_b: - call i32 @b() - br label %latch -; CHECK: loop_b: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_b_inner_ph, label %loop_exit -latch: - %v3 = load i1, i1* %ptr3 - br i1 %v3, label %loop_begin, label %loop_exit -; CHECK: latch: -; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3 -; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split +loop_b_inner_ph: + br label %loop_b_inner_header -loop_exit: - ret i32 0 -; CHECK: loop_exit.split: -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: ret -} +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body -; Non-trivial partial loop unswitching of multiple invariant inputs to an `or` -; chain. Basically an inverted version of corresponding `and` test (test26). -define i32 @test27(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) { -; CHECK-LABEL: @test27( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[INV_OR:.*]] = or i1 %cond3, %cond1 -; CHECK-NEXT: br i1 %[[INV_OR]], label %entry.split.us, label %entry.split +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit -loop_begin: - %v1 = load i1, i1* %ptr1 - %v2 = load i1, i1* %ptr2 - %cond_or1 = or i1 %v1, %cond1 - %cond_and1 = and i1 %v2, %cond2 - %cond_or2 = or i1 %cond_or1, %cond_and1 - %cond_or3 = or i1 %cond_or2, %cond3 - br i1 %cond_or3, label %loop_b, label %loop_a -; The 'loop_b' unswitched loop. +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop contains an inner loop within it. ; ; CHECK: entry.split.us: ; CHECK-NEXT: br label %loop_begin.us ; ; CHECK: loop_begin.us: -; CHECK-NEXT: br label %loop_b.us +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us ; ; CHECK: loop_b.us: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch.us +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_b_inner_ph.us ; -; CHECK: latch.us: -; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3 -; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us +; CHECK: loop_b_inner_ph.us: +; CHECK-NEXT: br label %loop_b_inner_header.us +; +; CHECK: loop_b_inner_header.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_body.us +; +; CHECK: loop_b_inner_body.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_exit.us +; +; CHECK: loop_b_inner_exit.us: +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_b_inner_latch.us: +; CHECK-NEXT: br label %loop_b_inner_header.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: br label %loop_begin.us ; ; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] ; CHECK-NEXT: br label %loop_exit - -; The original loop. +; +; And the original loop no longer contains an inner loop. ; ; CHECK: entry.split: ; CHECK-NEXT: br label %loop_begin ; ; CHECK: loop_begin: -; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1 -; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2 -; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V1]], false -; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V2]], %cond2 -; CHECK-NEXT: %[[OR2:.*]] = or i1 %[[OR1]], %[[AND1]] -; CHECK-NEXT: %[[OR3:.*]] = or i1 %[[OR2]], false -; CHECK-NEXT: br i1 %[[OR3]], label %loop_b, label %loop_a - -loop_a: - call i32 @a() - br label %latch +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; ; CHECK: loop_a: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %latch - -loop_b: - call i32 @b() - br label %latch +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.loopexit, label %loop_latch +; ; CHECK: loop_b: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch - -latch: - %v3 = load i1, i1* %ptr3 - br i1 %v3, label %loop_begin, label %loop_exit -; CHECK: latch: -; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3 -; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin loop_exit: - ret i32 0 + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit.split +; ; CHECK: loop_exit.split: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B]], %loop_b ], [ %[[A_LCSSA]], %loop_exit.split.loopexit ] ; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit: -; CHECK-NEXT: ret +; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[AB_PHI]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI_US]] } -; Non-trivial unswitching of a switch. -define i32 @test28(i1* %ptr, i32 %cond) { -; CHECK-LABEL: @test28( +; Test where the original loop has an inner loop that has to be traversed to +; rebuild the loop, and where this inner loop has multiple blocks, and where +; the exiting block that connects the inner loop to the original loop is not +; the header block. This ensures that we correctly handle interesting corner +; cases of traversing back to the header when re-establishing the original loop +; still exists after unswitching. +define i32 @test13b(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13b( entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 %cond, label %[[ENTRY_SPLIT_LATCH:.*]] [ -; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] -; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]] -; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_C:.*]] -; CHECK-NEXT: ] +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split loop_begin: - switch i32 %cond, label %latch [ - i32 0, label %loop_a - i32 1, label %loop_b - i32 2, label %loop_c - ] + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b loop_a: - call i32 @a() - br label %latch -; Unswitched 'a' loop. -; -; CHECK: [[ENTRY_SPLIT_A]]: -; CHECK-NEXT: br label %[[LOOP_BEGIN_A:.*]] + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_exit, label %loop_b_inner_ph + +loop_b_inner_ph: + br label %loop_b_inner_header + +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body + +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit + +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop doesn't contain an inner loop. ; -; CHECK: [[LOOP_BEGIN_A]]: -; CHECK-NEXT: br label %[[LOOP_A:.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us ; -; CHECK: [[LOOP_A]]: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]] +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us ; -; CHECK: [[LOOP_LATCH_A]]: -; CHECK-NEXT: %[[V_A:.*]] = load i1, i1* %ptr -; CHECK: br i1 %[[V_A]], label %[[LOOP_BEGIN_A]], label %[[LOOP_EXIT_A:.*]] +; CHECK: loop_b.us: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_exit.split.us ; -; CHECK: [[LOOP_EXIT_A]]: -; CHECK-NEXT: br label %loop_exit - -loop_b: - call i32 @b() - br label %latch -; Unswitched 'b' loop. +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_latch.us ; -; CHECK: [[ENTRY_SPLIT_B]]: -; CHECK-NEXT: br label %[[LOOP_BEGIN_B:.*]] +; CHECK: loop_latch.us: +; CHECK-NEXT: br label %loop_begin.us ; -; CHECK: [[LOOP_BEGIN_B]]: -; CHECK-NEXT: br label %[[LOOP_B:.*]] +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit.split.us ; -; CHECK: [[LOOP_B]]: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[B]], %loop_b.us ], [ %[[A_LCSSA_US]], %loop_exit.split.us.loopexit ] +; CHECK-NEXT: br label %loop_exit ; -; CHECK: [[LOOP_LATCH_B]]: -; CHECK-NEXT: %[[V_B:.*]] = load i1, i1* %ptr -; CHECK: br i1 %[[V_B]], label %[[LOOP_BEGIN_B]], label %[[LOOP_EXIT_B:.*]] +; But the original loop contains an inner loop that must be traversed.; ; -; CHECK: [[LOOP_EXIT_B]]: -; CHECK-NEXT: br label %loop_exit - -loop_c: - call i32 @c() - br label %latch -; Unswitched 'c' loop. +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin ; -; CHECK: [[ENTRY_SPLIT_C]]: -; CHECK-NEXT: br label %[[LOOP_BEGIN_C:.*]] +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b ; -; CHECK: [[LOOP_BEGIN_C]]: -; CHECK-NEXT: br label %[[LOOP_C:.*]] +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_latch ; -; CHECK: [[LOOP_C]]: -; CHECK-NEXT: call i32 @c() -; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]] +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_b_inner_ph ; -; CHECK: [[LOOP_LATCH_C]]: -; CHECK-NEXT: %[[V_C:.*]] = load i1, i1* %ptr -; CHECK: br i1 %[[V_C]], label %[[LOOP_BEGIN_C]], label %[[LOOP_EXIT_C:.*]] +; CHECK: loop_b_inner_ph: +; CHECK-NEXT: br label %loop_b_inner_header ; -; CHECK: [[LOOP_EXIT_C]]: -; CHECK-NEXT: br label %loop_exit - -latch: - %v = load i1, i1* %ptr - br i1 %v, label %loop_begin, label %loop_exit -; Unswitched the 'latch' only loop. +; CHECK: loop_b_inner_header: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_body ; -; CHECK: [[ENTRY_SPLIT_LATCH]]: -; CHECK-NEXT: br label %[[LOOP_BEGIN_LATCH:.*]] +; CHECK: loop_b_inner_body: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_exit ; -; CHECK: [[LOOP_BEGIN_LATCH]]: -; CHECK-NEXT: br label %[[LOOP_LATCH_LATCH:.*]] +; CHECK: loop_b_inner_latch: +; CHECK-NEXT: br label %loop_b_inner_header ; -; CHECK: [[LOOP_LATCH_LATCH]]: -; CHECK-NEXT: %[[V_LATCH:.*]] = load i1, i1* %ptr -; CHECK: br i1 %[[V_LATCH]], label %[[LOOP_BEGIN_LATCH]], label %[[LOOP_EXIT_LATCH:.*]] +; CHECK: loop_b_inner_exit: +; CHECK-NEXT: br label %loop_latch ; -; CHECK: [[LOOP_EXIT_LATCH]]: -; CHECK-NEXT: br label %loop_exit +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin loop_exit: - ret i32 0 + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit +; ; CHECK: loop_exit: -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[AB_PHI_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; A test reduced out of 400.perlbench that when unswitching the `%stop` +; condition clones a loop nest outside of a containing loop. This excercises a +; different cloning path from our other test cases and in turn verifying the +; resulting structure can catch any failures to correctly clone these nested +; loops. +declare void @f() +declare void @g() +declare i32 @h(i32 %arg) + +; Test that when we are unswitching and need to rebuild the loop block set we +; correctly skip past inner loops. We want to use the inner loop to efficiently +; skip whole subregions of the outer loop blocks but just because the header of +; the outer loop is also the preheader of an inner loop shouldn't confuse this +; walk. +define void @test23(i1 %arg, i1* %ptr) { +; CHECK-LABEL: define void @test23( +entry: + br label %outer.header +; CHECK: entry: +; CHECK-NEXT: %arg.fr = freeze i1 %arg +; CHECK-NEXT: br i1 %arg.fr, +; +; Just verify that we unswitched the correct bits. We should call `@f` twice in +; one unswitch and `@f` and then `@g` in the other. +; CHECK: call void +; CHECK-SAME: @f +; CHECK: call void +; CHECK-SAME: @f +; +; CHECK: call void +; CHECK-SAME: @f +; CHECK: call void +; CHECK-SAME: @g + +outer.header: + br label %inner.header + +inner.header: + call void @f() + br label %inner.latch + +inner.latch: + %inner.cond = load i1, i1* %ptr + br i1 %inner.cond, label %inner.header, label %outer.body + +outer.body: + br i1 %arg, label %outer.body.left, label %outer.body.right + +outer.body.left: + call void @f() + br label %outer.latch + +outer.body.right: + call void @g() + br label %outer.latch + +outer.latch: + %outer.cond = load i1, i1* %ptr + br i1 %outer.cond, label %outer.header, label %exit + +exit: + ret void } ; A test case designed to exercise unusual properties of switches: they @@ -3049,7 +1574,8 @@ entry: br label %header ; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_C:.*]] [ +; CHECK-NEXT: %arg.fr = freeze i32 %arg +; CHECK-NEXT: switch i32 %arg.fr, label %[[ENTRY_SPLIT_C:.*]] [ ; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] ; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_A]] ; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_B:.*]] @@ -3232,7 +1758,8 @@ entry: br label %header ; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_EXIT:.*]] [ +; CHECK-NEXT: %arg.fr = freeze i32 %arg +; CHECK-NEXT: switch i32 %arg.fr, label %[[ENTRY_SPLIT_EXIT:.*]] [ ; CHECK-NEXT: i32 -1, label %[[ENTRY_SPLIT_EXIT]] ; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] ; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]] @@ -3411,7 +1938,8 @@ br label %c.header ; CHECK: b.header: ; CHECK-NEXT: %v1 = call i1 @cond() -; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] ; ; CHECK: [[B_HEADER_SPLIT_US]]: ; CHECK-NEXT: br label %[[C_HEADER_US:.*]] @@ -3486,7 +2014,8 @@ ; CHECK: b.header: ; CHECK-NEXT: %x.b = load i32, i32* %ptr ; CHECK-NEXT: %v1 = call i1 @cond() -; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] ; ; CHECK: [[B_HEADER_SPLIT_US]]: ; CHECK-NEXT: br label %[[C_HEADER_US:.*]] @@ -3574,7 +2103,8 @@ ; CHECK: b.header: ; CHECK-NEXT: %x.b = load i32, i32* %ptr ; CHECK-NEXT: %v1 = call i1 @cond() -; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] ; ; CHECK: [[B_HEADER_SPLIT_US]]: ; CHECK-NEXT: br label %[[C_HEADER_US:.*]] @@ -3654,7 +2184,8 @@ ; CHECK: b.header: ; CHECK-NEXT: %x.b = load i32, i32* %ptr ; CHECK-NEXT: %v1 = call i1 @cond() -; CHECK-NEXT: br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] ; ; CHECK: [[B_HEADER_SPLIT_US]]: ; CHECK-NEXT: br label %[[C_HEADER_US:.*]] @@ -3752,7 +2283,8 @@ br label %d.header ; CHECK: c.header: ; CHECK-NEXT: %v1 = call i1 @cond() -; CHECK-NEXT: br i1 %v1, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]] +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]] ; ; CHECK: [[C_HEADER_SPLIT_US]]: ; CHECK-NEXT: br label %[[D_HEADER_US:.*]] @@ -3865,7 +2397,8 @@ ; CHECK: c.header: ; CHECK-NEXT: %x.c = load i32, i32* %ptr ; CHECK-NEXT: %v1 = call i1 @cond() -; CHECK-NEXT: br i1 %v1, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]] +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]] ; ; CHECK: [[C_HEADER_SPLIT_US]]: ; CHECK-NEXT: br label %[[D_HEADER_US:.*]] @@ -3947,7 +2480,8 @@ ; CHECK: b.header: ; CHECK-NEXT: %x.b = load i32, i32* %ptr ; CHECK-NEXT: %v1 = call i32 @cond.i32() -; CHECK-NEXT: switch i32 %v1, label %[[B_HEADER_SPLIT:.*]] [ +; CHECK-NEXT: %v1.fr = freeze i32 %v1 +; CHECK-NEXT: switch i32 %v1.fr, label %[[B_HEADER_SPLIT:.*]] [ ; CHECK-NEXT: i32 1, label %[[B_HEADER_SPLIT_US:.*]] ; CHECK-NEXT: i32 2, label %[[B_HEADER_SPLIT_US]] ; CHECK-NEXT: i32 3, label %[[B_HEADER_SPLIT_US]] @@ -4008,351 +2542,3 @@ ; CHECK: exit: ; CHECK-NEXT: ret void } - -; A devilish pattern. This is a crafty, crafty test case designed to risk -; creating indirect cycles with trivial and non-trivial unswitching. The inner -; loop has a switch with a trivial exit edge that can be unswitched, but the -; rest of the switch cannot be unswitched because its cost is too high. -; However, the unswitching of the trivial edge creates a new switch in the -; outer loop. *This* switch isn't trivial, but has a low cost to unswitch. When -; we unswitch this switch from the outer loop, we will remove it completely and -; create a clone of the inner loop on one side. This clone will then again be -; viable for unswitching the inner-most loop. This lets us check that the -; unswitching doesn't end up cycling infinitely even when the cycle is -; indirect and due to revisiting a loop after cloning. -define void @test31(i32 %arg) { -; CHECK-LABEL: define void @test31( -entry: - br label %outer.header -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT:.*]] [ -; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_US:.*]] -; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_US]] -; CHECK-NEXT: ] -; -; CHECK: [[ENTRY_SPLIT_US]]: -; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_US_SPLIT:.*]] [ -; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_US_SPLIT_US:.*]] -; CHECK-NEXT: ] - -outer.header: - br label %inner.header - -inner.header: - switch i32 %arg, label %inner.loopexit1 [ - i32 1, label %inner.body1 - i32 2, label %inner.body2 - ] - -inner.body1: - %a = call i32 @a() - br label %inner.latch -; The (super convoluted) fully unswitched loop around `@a`. -; -; CHECK: [[ENTRY_SPLIT_US_SPLIT_US]]: -; CHECK-NEXT: br label %[[OUTER_HEADER_US_US:.*]] -; -; CHECK: [[OUTER_HEADER_US_US]]: -; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_US_US:.*]] -; -; CHECK: [[OUTER_LATCH_US_US:.*]]: -; CHECK-NEXT: %[[OUTER_COND_US_US:.*]] = call i1 @cond() -; CHECK-NEXT: br i1 %[[OUTER_COND_US_US]], label %[[OUTER_HEADER_US_US]], label %[[EXIT_SPLIT_US_SPLIT_US:.*]] -; -; CHECK: [[OUTER_HEADER_SPLIT_US_US]]: -; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_SPLIT_US_US_US:.*]] -; -; CHECK: [[INNER_LOOPEXIT2_US_US:.*]]: -; CHECK-NEXT: br label %[[OUTER_LATCH_US_US]] -; -; CHECK: [[OUTER_HEADER_SPLIT_SPLIT_US_US_US]]: -; CHECK-NEXT: br label %[[INNER_HEADER_US_US_US:.*]] -; -; CHECK: [[INNER_HEADER_US_US_US]]: -; CHECK-NEXT: br label %[[INNER_BODY1_US_US_US:.*]] -; -; CHECK: [[INNER_BODY1_US_US_US]]: -; CHECK-NEXT: %[[A:.*]] = call i32 @a() -; CHECK-NEXT: br label %[[INNER_LATCH_US_US_US:.*]] -; -; CHECK: [[INNER_LATCH_US_US_US]]: -; CHECK-NEXT: %[[PHI_A:.*]] = phi i32 [ %[[A]], %[[INNER_BODY1_US_US_US]] ] -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 %[[PHI_A]]) -; CHECK-NEXT: %[[INNER_COND_US_US_US:.*]] = call i1 @cond() -; CHECK-NEXT: br i1 %[[INNER_COND_US_US_US]], label %[[INNER_HEADER_US_US_US]], label %[[INNER_LOOPEXIT2_SPLIT_US_US_US:.*]] -; -; CHECK: [[INNER_LOOPEXIT2_SPLIT_US_US_US]]: -; CHECK-NEXT: br label %[[INNER_LOOPEXIT2_US_US]] -; -; CHECK: [[EXIT_SPLIT_US_SPLIT_US]]: -; CHECK-NEXT: br label %[[EXIT_SPLIT_US:.*]] - - -inner.body2: - %b = call i32 @b() - br label %inner.latch -; The fully unswitched loop around `@b`. -; -; CHECK: [[ENTRY_SPLIT_US_SPLIT]]: -; CHECK-NEXT: br label %[[OUTER_HEADER_US:.*]] -; -; CHECK: [[OUTER_HEADER_US]]: -; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_US:.*]] -; -; CHECK: [[INNER_HEADER_US:.*]]: -; CHECK-NEXT: br label %[[INNER_BODY2_US:.*]] -; -; CHECK: [[INNER_BODY2_US]]: -; CHECK-NEXT: %[[B:.*]] = call i32 @b() -; CHECK-NEXT: br label %[[INNER_LATCH_US:.*]] -; -; CHECK: [[INNER_LATCH_US]]: -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 0) -; CHECK-NEXT: call void @sink1(i32 %[[B]]) -; CHECK-NEXT: %[[INNER_COND_US:.*]] = call i1 @cond() -; CHECK-NEXT: br i1 %[[INNER_COND_US]], label %[[INNER_HEADER_US]], label %[[INNER_LOOPEXIT2_SPLIT_US:.*]] -; -; CHECK: [[INNER_LOOPEXIT2_SPLIT_US]]: -; CHECK-NEXT: br label %[[INNER_LOOPEXIT2_US:.*]] -; -; CHECK: [[OUTER_LATCH_US:.*]]: -; CHECK-NEXT: %[[OUTER_COND_US:.*]] = call i1 @cond() -; CHECK-NEXT: br i1 %[[OUTER_COND_US]], label %[[OUTER_HEADER_US]], label %[[EXIT_SPLIT_US_SPLIT:.*]] -; -; CHECK: [[OUTER_HEADER_SPLIT_US]]: -; CHECK-NEXT: br label %[[OUTER_HEADER_SPLIT_SPLIT_US:.*]] -; -; CHECK: [[OUTER_HEADER_SPLIT_SPLIT_US]]: -; CHECK-NEXT: br label %[[INNER_HEADER_US]] -; -; CHECK: [[INNER_LOOPEXIT2_US]]: -; CHECK-NEXT: br label %[[OUTER_LATCH_US]] -; -; CHECK: [[EXIT_SPLIT_US]]: -; CHECK-NEXT: br label %exit - -inner.latch: - %phi = phi i32 [ %a, %inner.body1 ], [ %b, %inner.body2 ] - ; Make 10 junk calls here to ensure we're over the "50" cost threshold of - ; non-trivial unswitching for this inner switch. - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 0) - call void @sink1(i32 %phi) - %inner.cond = call i1 @cond() - br i1 %inner.cond, label %inner.header, label %inner.loopexit2 - -inner.loopexit1: - br label %outer.latch -; The unswitched `loopexit1` path. -; -; CHECK: [[ENTRY_SPLIT]]: -; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] -; -; CHECK: outer.header: -; CHECK-NEXT: br label %inner.loopexit1 -; -; CHECK: inner.loopexit1: -; CHECK-NEXT: br label %outer.latch -; -; CHECK: outer.latch: -; CHECK-NEXT: %outer.cond = call i1 @cond() -; CHECK-NEXT: br i1 %outer.cond, label %outer.header, label %[[EXIT_SPLIT:.*]] -; -; CHECK: [[EXIT_SPLIT]]: -; CHECK-NEXT: br label %exit - -inner.loopexit2: - br label %outer.latch - -outer.latch: - %outer.cond = call i1 @cond() - br i1 %outer.cond, label %outer.header, label %exit - -exit: - ret void -; CHECK: exit: -; CHECK-NEXT: ret void -} - -; Non-trivial partial loop unswitching of multiple invariant inputs to an `and` -; chain (select version). -define i32 @test32(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2) { -; CHECK-LABEL: @test32( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[INV_AND:.*]] = and i1 %cond2, %cond1 -; CHECK-NEXT: br i1 %[[INV_AND]], label %entry.split, label %entry.split.us - -loop_begin: - %v1 = load i1, i1* %ptr1 - %v2 = load i1, i1* %ptr2 - %cond_and1 = select i1 %v1, i1 %cond1, i1 false - %cond_and2 = select i1 %cond_and1, i1 %cond2, i1 false - br i1 %cond_and2, label %loop_a, label %loop_b -; The 'loop_b' unswitched loop. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[V2_US]] = load i1, i1* %ptr2, align 1 -; CHECK-NEXT: br label %loop_b.us -; -; CHECK: loop_b.us: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch.us -; -; CHECK: latch.us: -; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3, align 1 -; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: br label %loop_exit - -; The original loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1 -; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2 -; CHECK-NEXT: %[[AND1:.*]] = select i1 %[[V1]], i1 true, i1 false -; CHECK-NEXT: %[[AND2:.*]] = select i1 %[[AND1]], i1 true, i1 false -; CHECK-NEXT: br i1 %[[V1]], label %loop_a, label %loop_b - -loop_a: - call i32 @a() - br label %latch -; CHECK: loop_a: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %latch - -loop_b: - call i32 @b() - br label %latch -; CHECK: loop_b: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch - -latch: - %v3 = load i1, i1* %ptr3 - br i1 %v3, label %loop_begin, label %loop_exit -; CHECK: latch: -; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3, align 1 -; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split - -loop_exit: - ret i32 0 -; CHECK: loop_exit.split: -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: ret -} - -; Non-trivial partial loop unswitching of multiple invariant inputs to an `or` -; chain (select version). -define i32 @test33(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2) { -; CHECK-LABEL: @test33( -entry: - br label %loop_begin -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[INV_OR:.*]] = or i1 %cond2, %cond1 -; CHECK-NEXT: br i1 %[[INV_OR]], label %entry.split.us, label %entry.split - -loop_begin: - %v1 = load i1, i1* %ptr1 - %v2 = load i1, i1* %ptr2 - %cond_and1 = select i1 %v1, i1 true, i1 %cond1 - %cond_and2 = select i1 %cond_and1, i1 true, i1 %cond2 - br i1 %cond_and2, label %loop_b, label %loop_a -; The 'loop_b' unswitched loop. -; -; CHECK: entry.split.us: -; CHECK-NEXT: br label %loop_begin.us -; -; CHECK: loop_begin.us: -; CHECK-NEXT: %[[V2_US]] = load i1, i1* %ptr2, align 1 -; CHECK-NEXT: br label %loop_b.us -; -; CHECK: loop_b.us: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch.us -; -; CHECK: latch.us: -; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3, align 1 -; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us -; -; CHECK: loop_exit.split.us: -; CHECK-NEXT: br label %loop_exit - -; The original loop. -; -; CHECK: entry.split: -; CHECK-NEXT: br label %loop_begin -; -; CHECK: loop_begin: -; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1 -; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2 -; CHECK-NEXT: %[[AND1:.*]] = select i1 %[[V1]], i1 true, i1 false -; CHECK-NEXT: %[[AND2:.*]] = select i1 %[[AND1]], i1 true, i1 false -; CHECK-NEXT: br i1 %[[V1]], label %loop_b, label %loop_a - -loop_a: - call i32 @a() - br label %latch -; CHECK: loop_a: -; CHECK-NEXT: call i32 @a() -; CHECK-NEXT: br label %latch - -loop_b: - call i32 @b() - br label %latch -; CHECK: loop_b: -; CHECK-NEXT: call i32 @b() -; CHECK-NEXT: br label %latch - -latch: - %v3 = load i1, i1* %ptr3 - br i1 %v3, label %loop_begin, label %loop_exit -; CHECK: latch: -; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3, align 1 -; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split - -loop_exit: - ret i32 0 -; CHECK: loop_exit.split: -; CHECK-NEXT: br label %loop_exit -; -; CHECK: loop_exit: -; CHECK-NEXT: ret -} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll @@ -9,6 +9,8 @@ declare void @sink1(i32) declare void @sink2(i32) +declare void @sink3(i1) +declare void @sink4(i1) declare i1 @cond() declare i32 @cond.i32()