diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -109,6 +110,10 @@ cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden); +static cl::opt FreezeLoopUnswitchCond( + "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden, + cl::desc("If enabled, the freeze instruction will be added to condition " + "of loop unswitch to prevent miscompilation.")); /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. @@ -196,15 +201,15 @@ /// Copy a set of loop invariant values \p ToDuplicate and insert them at the /// end of \p BB and conditionally branch on the copied condition. We only /// branch on a single value. -static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, - ArrayRef Invariants, - bool Direction, - BasicBlock &UnswitchedSucc, - BasicBlock &NormalSucc) { +static void buildPartialUnswitchConditionalBranch( + BasicBlock &BB, ArrayRef Invariants, bool Direction, + BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) { IRBuilder<> IRB(&BB); Value *Cond = Direction ? IRB.CreateOr(Invariants) : IRB.CreateAnd(Invariants); + if (InsertFreeze) + Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr"); IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc); } @@ -565,7 +570,7 @@ "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" " condition!"); buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection, - *UnswitchedBB, *NewPH); + *UnswitchedBB, *NewPH, false); } // Update the dominator tree with the added edge. @@ -2124,6 +2129,13 @@ SE->forgetTopmostLoop(&L); } + bool InsertFreeze = false; + if (FreezeLoopUnswitchCond) { + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(&L); + InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L); + } + // If the edge from this terminator to a successor dominates that successor, // store a map from each block in its dominator subtree to it. This lets us // tell when cloning for a particular successor if a block is dominated by @@ -2198,6 +2210,28 @@ BasicBlock *ClonedPH = ClonedPHs.begin()->second; BI->setSuccessor(ClonedSucc, ClonedPH); BI->setSuccessor(1 - ClonedSucc, LoopPH); + if (InsertFreeze) { + auto Cond = BI->getCondition(); + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT)) { + auto FrozenCond = new FreezeInst(Cond, Cond->getName() + ".fr"); + if (dyn_cast(Cond)) { + if (PHINode *PN = dyn_cast(Cond)) + FrozenCond->insertAfter(PN->getParent()->getFirstNonPHI()); + else if (InvokeInst *II = dyn_cast(Cond)) { + auto *DestBB = dyn_cast(II->getOperand(1)); + FrozenCond->insertAfter(DestBB->getFirstNonPHI()); + } else + FrozenCond->insertAfter(dyn_cast(Cond)); + + Cond->replaceUsesWithIf(FrozenCond, [](Use &U) { + return !isa(U.getUser()); + }); + } else { + FrozenCond->insertBefore(BI); + BI->setCondition(FrozenCond); + } + } + } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); } else { assert(SI && "Must either be a branch or switch!"); @@ -2212,6 +2246,28 @@ else Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); + if (InsertFreeze) { + auto Cond = SI->getCondition(); + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT)) { + auto FrozenCond = new FreezeInst(Cond, Cond->getName() + ".fr"); + if (dyn_cast(Cond)) { + if (PHINode *PN = dyn_cast(Cond)) + FrozenCond->insertAfter(PN->getParent()->getFirstNonPHI()); + else if (InvokeInst *II = dyn_cast(Cond)) { + auto *DestBB = dyn_cast(II->getOperand(1)); + FrozenCond->insertAfter(DestBB->getFirstNonPHI()); + } else + FrozenCond->insertAfter(dyn_cast(Cond)); + + Cond->replaceUsesWithIf(FrozenCond, [](Use &U) { + return !isa(U.getUser()); + }); + } else { + FrozenCond->insertBefore(SI); + SI->setCondition(FrozenCond); + } + } + } // We need to use the set to populate domtree updates as even when there // are multiple cases pointing at the same successor we only want to // remove and insert one edge in the domtree. @@ -2292,7 +2348,7 @@ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); else buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, - *ClonedPH, *LoopPH); + *ClonedPH, *LoopPH, InsertFreeze); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); if (MSSAU) { @@ -2376,17 +2432,28 @@ "Should not be replacing constant values!"); // Use make_early_inc_range here as set invalidates the iterator. for (Use &U : llvm::make_early_inc_range(Invariant->uses())) { - Instruction *UserI = dyn_cast(U.getUser()); - if (!UserI) - continue; - - // Replace it with the 'continue' side if in the main loop body, and the - // unswitched if in the cloned blocks. - if (DT.dominates(LoopPH, UserI->getParent())) - U.set(ContinueReplacement); - else if (ReplaceUnswitched && - DT.dominates(ClonedPH, UserI->getParent())) - U.set(UnswitchedReplacement); + auto ReplaceIfDominated = [&](Use &U) { + Instruction *UserI = dyn_cast(U.getUser()); + if (!UserI) + return; + + // Replace it with the 'continue' side if in the main loop body, and + // the unswitched if in the cloned blocks. + if (DT.dominates(LoopPH, UserI->getParent())) + U.set(ContinueReplacement); + else if (ReplaceUnswitched && + DT.dominates(ClonedPH, UserI->getParent())) + U.set(UnswitchedReplacement); + }; + + ReplaceIfDominated(U); + + // If V is invariant, Freeze(V) is also invariant. As we try to replace + // the use of V to constant, we need to try replace the use of Freeze(V) + // to constant. + if (auto *FI = dyn_cast(U.getUser())) + for (Use &UU : llvm::make_early_inc_range(FI->uses())) + ReplaceIfDominated(UU); } } } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll @@ -0,0 +1,2544 @@ +; RUN: opt -freeze-loop-unswitch-cond -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s + +declare i32 @a() +declare i32 @b() +declare i32 @c() +declare i32 @d() + +declare void @sink1(i32) +declare void @sink2(i32) +declare void @sink3(i1) +declare void @sink4(i1) + +declare i1 @cond() +declare i32 @cond.i32() + +declare i32 @__CxxFrameHandler3(...) + +define i32 @test1_freeze(i1* %ptr0, i1* %ptr1, i1* %ptr2) { +; CHECK-LABEL: @test1_freeze( +entry: + %cond1 = load i1, i1* %ptr1 + %cond2 = load i1, i1* %ptr2 + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: %cond1 = load i1, i1* %ptr1, align 1 +; CHECK-NEXT: %cond2 = load i1, i1* %ptr2, align 1 +; CHECK-NEXT: %cond2.fr = freeze i1 %cond2 +; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split + +loop_begin: + br i1 %cond1, label %loop_a, label %loop_b + +loop_a: + call i32 @a() + br label %latch +; The 'loop_a' unswitched loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: br label %loop_a.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %0 = call i32 @a() +; CHECK-NEXT: br label %latch.us +; +; CHECK: latch.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label %loop_exit + +loop_b: + call i32 @b() + br i1 %cond2, label %loop_b_a, label %loop_b_b +; The second unswitched condition. +; +; CHECK: entry.split: +; CHECK-NEXT: br i1 %cond2.fr, label %entry.split.split.us, label %entry.split.split + +loop_b_a: + call void @sink3(i1 %cond2) + br label %latch +; The 'loop_b_a' unswitched loop. +; %cond2 is replaced to true +; +; CHECK: entry.split.split.us: +; CHECK-NEXT: br label %loop_begin.us1 +; +; CHECK: loop_begin.us1: +; CHECK-NEXT: br label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %1 = call i32 @b() +; CHECK-NEXT: br label %loop_b_a.us +; +; CHECK: loop_b_a.us: +; CHECK-NEXT: call void @sink3(i1 true) +; CHECK-NEXT: br label %latch.us2 +; +; CHECK: latch.us2: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us +; +; CHECK: loop_exit.split.split.us: +; CHECK-NEXT: br label %loop_exit.split + +loop_b_b: + call void @sink4(i1 %cond2) + br label %latch +; The 'loop_b_b' unswitched loop. +; %cond2 is replaced to false +; +; CHECK: entry.split.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: br label %loop_b +; +; CHECK: loop_b: +; CHECK-NEXT: %2 = call i32 @b() +; CHECK-NEXT: br label %loop_b_b +; +; CHECK: loop_b_b: +; CHECK-NEXT: call void @sink4(i1 false) +; CHECK-NEXT: br label %latch +; +; CHECK: latch: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split +; +; CHECK: loop_exit.split.split: +; CHECK-NEXT: br label %loop_exit.split + +latch: + %v = load i1, i1* %ptr0 + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret i32 0 +; CHECK: loop_exit.split: +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: ret +} + +; Test that when unswitching a deeply nested loop condition in a way that +; produces a non-loop clone that can reach multiple exit blocks which are part +; of different outer loops we correctly divide the cloned loop blocks between +; the outer loops based on reachability. +define i32 @test7a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr, align 1 +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c + +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_begin +; The cloned copy that always exits with the adjustments required to fix up +; loop exits. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us.loopexit +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us.loopexit: +; CHECK-NEXT: br label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original copy that continues to loop. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_c: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d +; +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label %inner_inner_loop_exit + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + br label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa + ret i32 %result +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] +; CHECK-NEXT: ret i32 %[[RESULT]] +} + +; Same pattern as @test7a but here the original loop becomes a non-loop that +; can reach multiple exit blocks which are part of different outer loops. +define i32 @test7b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c + +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_inner_loop_begin, label %inner_loop_exit +; The cloned copy that continues looping. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us +; +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us +; +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] +; CHECK-NEXT: %[[B_LCSSA_US:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original copy that now always exits and needs adjustments for exit +; blocks. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin ] +; CHECK-NEXT: %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c.loopexit +; +; CHECK: inner_inner_loop_c.loopexit: +; CHECK-NEXT: br label %inner_inner_loop_c +; +; CHECK: inner_inner_loop_c: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d +; +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label %inner_inner_loop_exit + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + br label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa + ret i32 %result +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]] +; CHECK-NEXT: ret i32 %[[RESULT]] +} + +; Test that when the exit block set of an inner loop changes to start at a less +; high level of the loop nest we correctly hoist the loop up the nest. +define i32 @test8a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit + +inner_inner_loop_b: + br i1 %cond, label %inner_inner_loop_latch, label %inner_inner_loop_exit + +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is now an exit from the inner loop. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: br label %inner_inner_loop_latch.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us +; +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original region exits the loop earlier. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: inner_inner_loop_latch: +; CHECK-NEXT: br label %inner_inner_loop_begin + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Same pattern as @test8a but where the original loop looses an exit block and +; needs to be hoisted up the nest. +define i32 @test8b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_begin.split.us, label %inner_loop_begin.split + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit + +inner_inner_loop_b: + br i1 %cond, label %inner_inner_loop_exit, label %inner_inner_loop_latch + +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is similar to before but with one earlier exit. +; +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us +; +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: br label %inner_inner_loop_exit.split.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us +; +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original region is now an exit in the preheader. +; +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ] +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split +; +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: br label %inner_inner_loop_latch +; +; CHECK: inner_inner_loop_latch: +; CHECK-NEXT: br label %inner_inner_loop_begin + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A2]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin + +inner_loop_exit: + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit, label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Test that requires re-forming dedicated exits for the cloned loop. +define i32 @test10a(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_begin + +loop_b: + br i1 %cond, label %loop_exit, label %loop_begin +; The cloned loop with one edge as a direct exit. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin.us ] +; CHECK-NEXT: br label %loop_exit.split.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_begin.backedge.us +; +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_PHI_US:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b.us ], [ %[[A_LCSSA_A]], %loop_exit.split.us.loopexit ] +; CHECK-NEXT: br label %loop_exit + +; The original loop without one 'loop_exit' edge. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_begin.backedge +; +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_b: +; CHECK-NEXT: br label %loop_begin.backedge +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_PHI_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[A_PHI]] +} + +; Test that requires re-forming dedicated exits for the original loop. +define i32 @test10b(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_begin, label %loop_exit + +loop_b: + br i1 %cond, label %loop_begin, label %loop_exit +; The cloned loop without one of the exits. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: br label %loop_begin.backedge.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us +; +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit + +; The original loop without one 'loop_exit' edge. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split.loopexit +; +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_b: +; CHECK-NEXT: %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_PHI_SPLIT:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b ], [ %[[A_LCSSA_A]], %loop_exit.split.loopexit ] +; CHECK-NEXT: br label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_PHI_SPLIT]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[A_PHI]] +} + +; Check that if a cloned inner loop after unswitching doesn't loop and directly +; exits even an outer loop, we don't add the cloned preheader to the outer +; loop and do add the needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph +; CHECK: loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph + +inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin +; CHECK: inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_ph.split.us, label %inner_loop_ph.split + +inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %loop_exit, label %inner_loop_a + +inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. +; +; CHECK: inner_loop_ph.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] +; CHECK-NEXT: br label %inner_loop_begin.us +; +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.loopexit.split.us +; +; CHECK: loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ] +; CHECK-NEXT: br label %loop_exit.loopexit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: br label %inner_loop_begin +; +; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_a +; +; CHECK: inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit, label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_a ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ] +; CHECK-NEXT: ret i32 %[[A_PHI]] +} + +; Check that if the original inner loop after unswitching doesn't loop and +; directly exits even an outer loop, we remove the original preheader from the +; outer loop and add needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph +; CHECK: loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %inner_loop_ph + +inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin +; CHECK: inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_loop_ph.split.us, label %inner_loop_ph.split + +inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_a, label %loop_exit + +inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. +; +; CHECK: inner_loop_ph.split.us: +; CHECK-NEXT: br label %inner_loop_begin.us +; +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_a.us +; +; CHECK: inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_exit.split.us, label %inner_loop_begin.us +; +; CHECK: inner_loop_exit.split.us: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_a.us ] +; CHECK-NEXT: br label %inner_loop_exit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ] +; CHECK-NEXT: br label %inner_loop_begin +; +; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %loop_exit.loopexit + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[A_LCSSA_US]], %loop_exit.loopexit1 ] +; CHECK-NEXT: ret i32 %[[A_PHI]] +} + +; Like test11a, but checking that when the whole thing is wrapped in yet +; another loop, we correctly attribute the cloned preheader to that outermost +; loop rather than only handling the case where the preheader is not in any loop +; at all. +define i32 @test12a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph + +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin +; CHECK: inner_inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split + +inner_inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_a + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. +; +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_exit.loopexit.split.us +; +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin.us ] +; CHECK-NEXT: br label %inner_loop_exit.loopexit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_inner_loop_a +; +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit, label %inner_inner_loop_begin + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 + +inner_loop_latch: + br label %inner_loop_begin +; CHECK: inner_loop_latch: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Like test11b, but checking that when the whole thing is wrapped in yet +; another loop, we correctly sink the preheader to the outermost loop rather +; than only handling the case where the preheader is completely removed from +; a loop. +define i32 @test12b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + br label %inner_loop_begin +; CHECK: loop_begin: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph +; CHECK: inner_loop_begin: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph + +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin +; CHECK: inner_inner_loop_ph: +; CHECK-NEXT: %[[COND:.*]] = load i1, i1* %cond.ptr +; CHECK-NEXT: %[[COND_FR:.*]] = freeze i1 %[[COND]] +; CHECK-NEXT: br i1 %[[COND_FR]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split + +inner_inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_inner_loop_a, label %inner_loop_exit + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. +; +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: br label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 %[[B]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_inner_loop_a.us +; +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_begin.us +; +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a.us ] +; CHECK-NEXT: br label %inner_inner_loop_exit +; +; The original remains a loop losing the exit edge. +; +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ] +; CHECK-NEXT: br label %inner_inner_loop_begin +; +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 %[[B_LCSSA]]) +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: br label %inner_loop_exit.loopexit + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 + +inner_loop_latch: + br label %inner_loop_begin +; CHECK: inner_loop_latch: +; CHECK-NEXT: br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit ] +; CHECK-NEXT: br label %inner_loop_exit +; +; CHECK: inner_loop_exit: +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit1 ] +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +; CHECK: loop_exit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: ret i32 %[[A_LCSSA]] +} + +; Test where the cloned loop has an inner loop that has to be traversed to form +; the cloned loop, and where this inner loop has multiple blocks, and where the +; exiting block that connects the inner loop to the cloned loop is not the header +; block. This ensures that we correctly handle interesting corner cases of +; traversing back to the header when establishing the cloned loop. +define i32 @test13a(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13a( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_b_inner_ph, label %loop_exit + +loop_b_inner_ph: + br label %loop_b_inner_header + +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body + +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit + +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop contains an inner loop within it. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_b_inner_ph.us +; +; CHECK: loop_b_inner_ph.us: +; CHECK-NEXT: br label %loop_b_inner_header.us +; +; CHECK: loop_b_inner_header.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_body.us +; +; CHECK: loop_b_inner_body.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_exit.us +; +; CHECK: loop_b_inner_exit.us: +; CHECK-NEXT: br label %loop_latch.us +; +; CHECK: loop_b_inner_latch.us: +; CHECK-NEXT: br label %loop_b_inner_header.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us, label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit +; +; And the original loop no longer contains an inner loop. +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.loopexit, label %loop_latch +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[B]], %loop_b ], [ %[[A_LCSSA]], %loop_exit.split.loopexit ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[AB_PHI]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI_US]] +} + +; Test where the original loop has an inner loop that has to be traversed to +; rebuild the loop, and where this inner loop has multiple blocks, and where +; the exiting block that connects the inner loop to the original loop is not +; the header block. This ensures that we correctly handle interesting corner +; cases of traversing back to the header when re-establishing the original loop +; still exists after unswitching. +define i32 @test13b(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13b( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_exit, label %loop_b_inner_ph + +loop_b_inner_ph: + br label %loop_b_inner_header + +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body + +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit + +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop doesn't contain an inner loop. +; +; CHECK: entry.split.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_begin.us: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a.us, label %loop_b.us +; +; CHECK: loop_b.us: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_exit.split.us +; +; CHECK: loop_a.us: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_latch.us +; +; CHECK: loop_latch.us: +; CHECK-NEXT: br label %loop_begin.us +; +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ] +; CHECK-NEXT: br label %loop_exit.split.us +; +; CHECK: loop_exit.split.us: +; CHECK-NEXT: %[[AB_PHI_US:.*]] = phi i32 [ %[[B]], %loop_b.us ], [ %[[A_LCSSA_US]], %loop_exit.split.us.loopexit ] +; CHECK-NEXT: br label %loop_exit +; +; But the original loop contains an inner loop that must be traversed.; +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[A:.*]] = load i32, i32* %a.ptr +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_a, label %loop_b +; +; CHECK: loop_a: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_exit.split, label %loop_latch +; +; CHECK: loop_b: +; CHECK-NEXT: %[[B:.*]] = load i32, i32* %b.ptr +; CHECK-NEXT: br label %loop_b_inner_ph +; +; CHECK: loop_b_inner_ph: +; CHECK-NEXT: br label %loop_b_inner_header +; +; CHECK: loop_b_inner_header: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_body +; +; CHECK: loop_b_inner_body: +; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr +; CHECK-NEXT: br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_exit +; +; CHECK: loop_b_inner_latch: +; CHECK-NEXT: br label %loop_b_inner_header +; +; CHECK: loop_b_inner_exit: +; CHECK-NEXT: br label %loop_latch +; +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit: + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +; CHECK: loop_exit.split: +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ] +; CHECK-NEXT: br label %loop_exit +; +; CHECK: loop_exit: +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[AB_PHI_US]], %loop_exit.split.us ] +; CHECK-NEXT: ret i32 %[[AB_PHI]] +} + +; A test reduced out of 400.perlbench that when unswitching the `%stop` +; condition clones a loop nest outside of a containing loop. This excercises a +; different cloning path from our other test cases and in turn verifying the +; resulting structure can catch any failures to correctly clone these nested +; loops. +declare void @f() +declare void @g() +declare i32 @h(i32 %arg) + +; Test that when we are unswitching and need to rebuild the loop block set we +; correctly skip past inner loops. We want to use the inner loop to efficiently +; skip whole subregions of the outer loop blocks but just because the header of +; the outer loop is also the preheader of an inner loop shouldn't confuse this +; walk. +define void @test23(i1 %arg, i1* %ptr) { +; CHECK-LABEL: define void @test23( +entry: + br label %outer.header +; CHECK: entry: +; CHECK-NEXT: %arg.fr = freeze i1 %arg +; CHECK-NEXT: br i1 %arg.fr, +; +; Just verify that we unswitched the correct bits. We should call `@f` twice in +; one unswitch and `@f` and then `@g` in the other. +; CHECK: call void +; CHECK-SAME: @f +; CHECK: call void +; CHECK-SAME: @f +; +; CHECK: call void +; CHECK-SAME: @f +; CHECK: call void +; CHECK-SAME: @g + +outer.header: + br label %inner.header + +inner.header: + call void @f() + br label %inner.latch + +inner.latch: + %inner.cond = load i1, i1* %ptr + br i1 %inner.cond, label %inner.header, label %outer.body + +outer.body: + br i1 %arg, label %outer.body.left, label %outer.body.right + +outer.body.left: + call void @f() + br label %outer.latch + +outer.body.right: + call void @g() + br label %outer.latch + +outer.latch: + %outer.cond = load i1, i1* %ptr + br i1 %outer.cond, label %outer.header, label %exit + +exit: + ret void +} + +; A test case designed to exercise unusual properties of switches: they +; can introduce multiple edges to successors. These need lots of special case +; handling as they get collapsed in many cases (domtree, the unswitch itself) +; but not in all cases (the PHI node operands). +define i32 @test29(i32 %arg) { +; CHECK-LABEL: @test29( +entry: + br label %header +; CHECK-NEXT: entry: +; CHECK-NEXT: %arg.fr = freeze i32 %arg +; CHECK-NEXT: switch i32 %arg.fr, label %[[ENTRY_SPLIT_C:.*]] [ +; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_A]] +; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_B:.*]] +; CHECK-NEXT: i32 3, label %[[ENTRY_SPLIT_C]] +; CHECK-NEXT: ] + +header: + %tmp = call i32 @d() + %cmp1 = icmp eq i32 %tmp, 0 + ; We set up a chain through all the successors of the switch that doesn't + ; involve the switch so that we can have interesting PHI nodes in them. + br i1 %cmp1, label %body.a, label %dispatch + +dispatch: + ; Switch with multiple successors. We arrange the last successor to be the + ; default to make the test case easier to read. This has a duplicate edge + ; both to the default destination (which is completely superfluous but + ; technically valid IR) and to a regular successor. + switch i32 %arg, label %body.c [ + i32 0, label %body.a + i32 1, label %body.a + i32 2, label %body.b + i32 3, label %body.c + ] + +body.a: + %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ], [ %tmp, %dispatch ] + %tmp.a = call i32 @a() + %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a + br label %body.b +; Unswitched 'a' loop. +; +; CHECK: [[ENTRY_SPLIT_A]]: +; CHECK-NEXT: br label %[[HEADER_A:.*]] +; +; CHECK: [[HEADER_A]]: +; CHECK-NEXT: %[[TMP_A:.*]] = call i32 @d() +; CHECK-NEXT: %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0 +; CHECK-NEXT: br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]] +; +; CHECK: [[DISPATCH_A]]: +; CHECK-NEXT: br label %[[BODY_A_A]] +; +; CHECK: [[BODY_A_A]]: +; CHECK-NEXT: %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ] +; CHECK-NEXT: %[[TMP_A_A:.*]] = call i32 @a() +; CHECK-NEXT: %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]] +; CHECK-NEXT: br label %[[BODY_B_A:.*]] +; +; CHECK: [[BODY_B_A]]: +; CHECK-NEXT: %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ] +; CHECK-NEXT: %[[TMP_B_A:.*]] = call i32 @b() +; CHECK-NEXT: %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]] +; CHECK-NEXT: br label %[[BODY_C_A:.*]] +; +; CHECK: [[BODY_C_A]]: +; CHECK-NEXT: %[[TMP_C_PHI_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[BODY_B_A]] ] +; CHECK-NEXT: %[[TMP_C_A:.*]] = call i32 @c() +; CHECK-NEXT: %[[TMP_C_SUM_A:.*]] = add i32 %[[TMP_C_PHI_A]], %[[TMP_C_A]] +; CHECK-NEXT: br label %[[LATCH_A:.*]] +; +; CHECK: [[LATCH_A]]: +; CHECK-NEXT: %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_C_SUM_A]], 42 +; CHECK: br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]] +; +; CHECK: [[LOOP_EXIT_A]]: +; CHECK-NEXT: %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_C_SUM_A]], %[[LATCH_A]] ] +; CHECK-NEXT: br label %exit + +body.b: + %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ] + %tmp.b = call i32 @b() + %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b + br label %body.c +; Unswitched 'b' loop. +; +; CHECK: [[ENTRY_SPLIT_B]]: +; CHECK-NEXT: br label %[[HEADER_B:.*]] +; +; CHECK: [[HEADER_B]]: +; CHECK-NEXT: %[[TMP_B:.*]] = call i32 @d() +; CHECK-NEXT: %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0 +; CHECK-NEXT: br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]] +; +; CHECK: [[DISPATCH_B]]: +; CHECK-NEXT: br label %[[BODY_B_B:.*]] +; +; CHECK: [[BODY_A_B]]: +; CHECK-NEXT: %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ] +; CHECK-NEXT: %[[TMP_A_B:.*]] = call i32 @a() +; CHECK-NEXT: %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]] +; CHECK-NEXT: br label %[[BODY_B_B:.*]] +; +; CHECK: [[BODY_B_B]]: +; CHECK-NEXT: %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ] +; CHECK-NEXT: %[[TMP_B_B:.*]] = call i32 @b() +; CHECK-NEXT: %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]] +; CHECK-NEXT: br label %[[BODY_C_B:.*]] +; +; CHECK: [[BODY_C_B]]: +; CHECK-NEXT: %[[TMP_C_PHI_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[BODY_B_B]] ] +; CHECK-NEXT: %[[TMP_C_B:.*]] = call i32 @c() +; CHECK-NEXT: %[[TMP_C_SUM_B:.*]] = add i32 %[[TMP_C_PHI_B]], %[[TMP_C_B]] +; CHECK-NEXT: br label %[[LATCH_B:.*]] +; +; CHECK: [[LATCH_B]]: +; CHECK-NEXT: %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_C_SUM_B]], 42 +; CHECK: br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]] +; +; CHECK: [[LOOP_EXIT_B]]: +; CHECK-NEXT: %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_C_SUM_B]], %[[LATCH_B]] ] +; CHECK-NEXT: br label %[[EXIT_SPLIT:.*]] + +body.c: + %tmp.c.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.b.sum, %body.b ] + %tmp.c = call i32 @c() + %tmp.c.sum = add i32 %tmp.c.phi, %tmp.c + br label %latch +; Unswitched 'c' loop. +; +; CHECK: [[ENTRY_SPLIT_C]]: +; CHECK-NEXT: br label %[[HEADER_C:.*]] +; +; CHECK: [[HEADER_C]]: +; CHECK-NEXT: %[[TMP_C:.*]] = call i32 @d() +; CHECK-NEXT: %[[CMP1_C:.*]] = icmp eq i32 %[[TMP_C]], 0 +; CHECK-NEXT: br i1 %[[CMP1_C]], label %[[BODY_A_C:.*]], label %[[DISPATCH_C:.*]] +; +; CHECK: [[DISPATCH_C]]: +; CHECK-NEXT: br label %[[BODY_C_C:.*]] +; +; CHECK: [[BODY_A_C]]: +; CHECK-NEXT: %[[TMP_A_PHI_C:.*]] = phi i32 [ 0, %[[HEADER_C]] ] +; CHECK-NEXT: %[[TMP_A_C:.*]] = call i32 @a() +; CHECK-NEXT: %[[TMP_A_SUM_C:.*]] = add i32 %[[TMP_A_PHI_C]], %[[TMP_A_C]] +; CHECK-NEXT: br label %[[BODY_B_C:.*]] +; +; CHECK: [[BODY_B_C]]: +; CHECK-NEXT: %[[TMP_B_PHI_C:.*]] = phi i32 [ %[[TMP_A_SUM_C]], %[[BODY_A_C]] ] +; CHECK-NEXT: %[[TMP_B_C:.*]] = call i32 @b() +; CHECK-NEXT: %[[TMP_B_SUM_C:.*]] = add i32 %[[TMP_B_PHI_C]], %[[TMP_B_C]] +; CHECK-NEXT: br label %[[BODY_C_C:.*]] +; +; CHECK: [[BODY_C_C]]: +; CHECK-NEXT: %[[TMP_C_PHI_C:.*]] = phi i32 [ %[[TMP_C]], %[[DISPATCH_C]] ], [ %[[TMP_B_SUM_C]], %[[BODY_B_C]] ] +; CHECK-NEXT: %[[TMP_C_C:.*]] = call i32 @c() +; CHECK-NEXT: %[[TMP_C_SUM_C:.*]] = add i32 %[[TMP_C_PHI_C]], %[[TMP_C_C]] +; CHECK-NEXT: br label %[[LATCH_C:.*]] +; +; CHECK: [[LATCH_C]]: +; CHECK-NEXT: %[[CMP2_C:.*]] = icmp slt i32 %[[TMP_C_SUM_C]], 42 +; CHECK: br i1 %[[CMP2_C]], label %[[HEADER_C]], label %[[LOOP_EXIT_C:.*]] +; +; CHECK: [[LOOP_EXIT_C]]: +; CHECK-NEXT: %[[LCSSA_C:.*]] = phi i32 [ %[[TMP_C_SUM_C]], %[[LATCH_C]] ] +; CHECK-NEXT: br label %[[EXIT_SPLIT]] + +latch: + %cmp2 = icmp slt i32 %tmp.c.sum, 42 + br i1 %cmp2, label %header, label %exit + +exit: + %lcssa.phi = phi i32 [ %tmp.c.sum, %latch ] + ret i32 %lcssa.phi +; CHECK: [[EXIT_SPLIT]]: +; CHECK-NEXT: %[[EXIT_PHI1:.*]] = phi i32 [ %[[LCSSA_C]], %[[LOOP_EXIT_C]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ] +; CHECK-NEXT: br label %exit + +; CHECK: exit: +; CHECK-NEXT: %[[EXIT_PHI2:.*]] = phi i32 [ %[[EXIT_PHI1]], %[[EXIT_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ] +; CHECK-NEXT: ret i32 %[[EXIT_PHI2]] +} + +; Similar to @test29 but designed to have one of the duplicate edges be +; a loop exit edge as those can in some cases be special. Among other things, +; this includes an LCSSA phi with multiple entries despite being a dedicated +; exit block. +define i32 @test30(i32 %arg) { +; CHECK-LABEL: define i32 @test30( +entry: + br label %header +; CHECK-NEXT: entry: +; CHECK-NEXT: %arg.fr = freeze i32 %arg +; CHECK-NEXT: switch i32 %arg.fr, label %[[ENTRY_SPLIT_EXIT:.*]] [ +; CHECK-NEXT: i32 -1, label %[[ENTRY_SPLIT_EXIT]] +; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]] +; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_B]] +; CHECK-NEXT: ] + +header: + %tmp = call i32 @d() + %cmp1 = icmp eq i32 %tmp, 0 + br i1 %cmp1, label %body.a, label %dispatch + +dispatch: + switch i32 %arg, label %loop.exit1 [ + i32 -1, label %loop.exit1 + i32 0, label %body.a + i32 1, label %body.b + i32 2, label %body.b + ] + +body.a: + %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ] + %tmp.a = call i32 @a() + %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a + br label %body.b +; Unswitched 'a' loop. +; +; CHECK: [[ENTRY_SPLIT_A]]: +; CHECK-NEXT: br label %[[HEADER_A:.*]] +; +; CHECK: [[HEADER_A]]: +; CHECK-NEXT: %[[TMP_A:.*]] = call i32 @d() +; CHECK-NEXT: %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0 +; CHECK-NEXT: br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]] +; +; CHECK: [[DISPATCH_A]]: +; CHECK-NEXT: br label %[[BODY_A_A]] +; +; CHECK: [[BODY_A_A]]: +; CHECK-NEXT: %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ] +; CHECK-NEXT: %[[TMP_A_A:.*]] = call i32 @a() +; CHECK-NEXT: %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]] +; CHECK-NEXT: br label %[[BODY_B_A:.*]] +; +; CHECK: [[BODY_B_A]]: +; CHECK-NEXT: %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ] +; CHECK-NEXT: %[[TMP_B_A:.*]] = call i32 @b() +; CHECK-NEXT: %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]] +; CHECK-NEXT: br label %[[LATCH_A:.*]] +; +; CHECK: [[LATCH_A]]: +; CHECK-NEXT: %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_B_SUM_A]], 42 +; CHECK: br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]] +; +; CHECK: [[LOOP_EXIT_A]]: +; CHECK-NEXT: %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[LATCH_A]] ] +; CHECK-NEXT: br label %loop.exit2 + +body.b: + %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ] + %tmp.b = call i32 @b() + %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b + br label %latch +; Unswitched 'b' loop. +; +; CHECK: [[ENTRY_SPLIT_B]]: +; CHECK-NEXT: br label %[[HEADER_B:.*]] +; +; CHECK: [[HEADER_B]]: +; CHECK-NEXT: %[[TMP_B:.*]] = call i32 @d() +; CHECK-NEXT: %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0 +; CHECK-NEXT: br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]] +; +; CHECK: [[DISPATCH_B]]: +; CHECK-NEXT: br label %[[BODY_B_B]] +; +; CHECK: [[BODY_A_B]]: +; CHECK-NEXT: %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ] +; CHECK-NEXT: %[[TMP_A_B:.*]] = call i32 @a() +; CHECK-NEXT: %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]] +; CHECK-NEXT: br label %[[BODY_B_B:.*]] +; +; CHECK: [[BODY_B_B]]: +; CHECK-NEXT: %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ] +; CHECK-NEXT: %[[TMP_B_B:.*]] = call i32 @b() +; CHECK-NEXT: %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]] +; CHECK-NEXT: br label %[[LATCH_B:.*]] +; +; CHECK: [[LATCH_B]]: +; CHECK-NEXT: %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_B_SUM_B]], 42 +; CHECK: br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]] +; +; CHECK: [[LOOP_EXIT_B]]: +; CHECK-NEXT: %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[LATCH_B]] ] +; CHECK-NEXT: br label %[[LOOP_EXIT2_SPLIT:.*]] + +latch: + %cmp2 = icmp slt i32 %tmp.b.sum, 42 + br i1 %cmp2, label %header, label %loop.exit2 + +loop.exit1: + %l1.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ] + br label %exit +; Unswitched 'exit' loop. +; +; CHECK: [[ENTRY_SPLIT_EXIT]]: +; CHECK-NEXT: br label %[[HEADER_EXIT:.*]] +; +; CHECK: [[HEADER_EXIT]]: +; CHECK-NEXT: %[[TMP_EXIT:.*]] = call i32 @d() +; CHECK-NEXT: %[[CMP1_EXIT:.*]] = icmp eq i32 %[[TMP_EXIT]], 0 +; CHECK-NEXT: br i1 %[[CMP1_EXIT]], label %[[BODY_A_EXIT:.*]], label %[[DISPATCH_EXIT:.*]] +; +; CHECK: [[DISPATCH_EXIT]]: +; CHECK-NEXT: %[[TMP_LCSSA:.*]] = phi i32 [ %[[TMP_EXIT]], %[[HEADER_EXIT]] ] +; CHECK-NEXT: br label %loop.exit1 +; +; CHECK: [[BODY_A_EXIT]]: +; CHECK-NEXT: %[[TMP_A_PHI_EXIT:.*]] = phi i32 [ 0, %[[HEADER_EXIT]] ] +; CHECK-NEXT: %[[TMP_A_EXIT:.*]] = call i32 @a() +; CHECK-NEXT: %[[TMP_A_SUM_EXIT:.*]] = add i32 %[[TMP_A_PHI_EXIT]], %[[TMP_A_EXIT]] +; CHECK-NEXT: br label %[[BODY_B_EXIT:.*]] +; +; CHECK: [[BODY_B_EXIT]]: +; CHECK-NEXT: %[[TMP_B_PHI_EXIT:.*]] = phi i32 [ %[[TMP_A_SUM_EXIT]], %[[BODY_A_EXIT]] ] +; CHECK-NEXT: %[[TMP_B_EXIT:.*]] = call i32 @b() +; CHECK-NEXT: %[[TMP_B_SUM_EXIT:.*]] = add i32 %[[TMP_B_PHI_EXIT]], %[[TMP_B_EXIT]] +; CHECK-NEXT: br label %[[LATCH_EXIT:.*]] +; +; CHECK: [[LATCH_EXIT]]: +; CHECK-NEXT: %[[CMP2_EXIT:.*]] = icmp slt i32 %[[TMP_B_SUM_EXIT]], 42 +; CHECK: br i1 %[[CMP2_EXIT]], label %[[HEADER_EXIT]], label %[[LOOP_EXIT_EXIT:.*]] +; +; CHECK: loop.exit1: +; CHECK-NEXT: %[[L1_PHI:.*]] = phi i32 [ %[[TMP_LCSSA]], %[[DISPATCH_EXIT]] ] +; CHECK-NEXT: br label %exit +; +; CHECK: [[LOOP_EXIT_EXIT]]: +; CHECK-NEXT: %[[L2_PHI:.*]] = phi i32 [ %[[TMP_B_SUM_EXIT]], %[[LATCH_EXIT]] ] +; CHECK-NEXT: br label %[[LOOP_EXIT2_SPLIT]] + +loop.exit2: + %l2.phi = phi i32 [ %tmp.b.sum, %latch ] + br label %exit +; CHECK: [[LOOP_EXIT2_SPLIT]]: +; CHECK-NEXT: %[[LOOP_EXIT_PHI1:.*]] = phi i32 [ %[[L2_PHI]], %[[LOOP_EXIT_EXIT]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ] +; CHECK-NEXT: br label %loop.exit2 +; +; CHECK: loop.exit2: +; CHECK-NEXT: %[[LOOP_EXIT_PHI2:.*]] = phi i32 [ %[[LOOP_EXIT_PHI1]], %[[LOOP_EXIT2_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ] +; CHECK-NEXT: br label %exit + +exit: + %l.phi = phi i32 [ %l1.phi, %loop.exit1 ], [ %l2.phi, %loop.exit2 ] + ret i32 %l.phi +; CHECK: exit: +; CHECK-NEXT: %[[EXIT_PHI:.*]] = phi i32 [ %[[L1_PHI]], %loop.exit1 ], [ %[[LOOP_EXIT_PHI2]], %loop.exit2 ] +; CHECK-NEXT: ret i32 %[[EXIT_PHI]] +} + +; Unswitch will not actually change the loop nest from: +; A < B < C +define void @hoist_inner_loop0() { +; CHECK-LABEL: define void @hoist_inner_loop0( +entry: + br label %a.header +; CHECK: entry: +; CHECK-NEXT: br label %a.header + +a.header: + br label %b.header +; CHECK: a.header: +; CHECK-NEXT: br label %b.header + +b.header: + %v1 = call i1 @cond() + br label %c.header +; CHECK: b.header: +; CHECK-NEXT: %v1 = call i1 @cond() +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; +; CHECK: [[B_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[C_HEADER_US:.*]] +; +; CHECK: [[C_HEADER_US]]: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %[[B_LATCH_SPLIT_US:.*]] +; +; CHECK: [[B_LATCH_SPLIT_US]]: +; CHECK-NEXT: br label %b.latch +; +; CHECK: [[B_HEADER_SPLIT]]: +; CHECK-NEXT: br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.latch +; CHECK: c.header: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %c.latch + +c.latch: + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %b.latch +; CHECK: c.latch: +; CHECK-NEXT: %v2 = call i1 @cond() +; CHECK-NEXT: br i1 %v2, label %c.header, label %[[B_LATCH_SPLIT:.*]] + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.latch +; CHECK: [[B_LATCH_SPLIT]]: +; CHECK-NEXT: br label %b.latch +; +; CHECK: b.latch: +; CHECK-NEXT: %v3 = call i1 @cond() +; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch + +a.latch: + br label %a.header +; CHECK: a.latch: +; CHECK-NEXT: br label %a.header + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} + +; Unswitch will transform the loop nest from: +; A < B < C +; into +; A < (B, C) +define void @hoist_inner_loop1(i32* %ptr) { +; CHECK-LABEL: define void @hoist_inner_loop1( +entry: + br label %a.header +; CHECK: entry: +; CHECK-NEXT: br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header +; CHECK: a.header: +; CHECK-NEXT: %x.a = load i32, i32* %ptr +; CHECK-NEXT: br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %c.header +; CHECK: b.header: +; CHECK-NEXT: %x.b = load i32, i32* %ptr +; CHECK-NEXT: %v1 = call i1 @cond() +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; +; CHECK: [[B_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[C_HEADER_US:.*]] +; +; CHECK: [[C_HEADER_US]]: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %[[B_LATCH_US:.*]] +; +; CHECK: [[B_LATCH_US]]: +; CHECK-NEXT: br label %b.latch +; +; CHECK: [[B_HEADER_SPLIT]]: +; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ] +; CHECK-NEXT: br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.latch +; CHECK: c.header: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %c.latch + +c.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %a.exit.c +; CHECK: c.latch: +; CHECK-NEXT: store i32 %x.a, i32* %ptr +; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr +; CHECK-NEXT: %v2 = call i1 @cond() +; CHECK-NEXT: br i1 %v2, label %c.header, label %a.exit.c + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.exit.b +; CHECK: b.latch: +; CHECK-NEXT: %v3 = call i1 @cond() +; CHECK-NEXT: br i1 %v3, label %b.header, label %a.exit.b + +a.exit.c: + br label %a.latch +; CHECK: a.exit.c +; CHECK-NEXT: br label %a.latch + +a.exit.b: + br label %a.latch +; CHECK: a.exit.b: +; CHECK-NEXT: br label %a.latch + +a.latch: + br label %a.header +; CHECK: a.latch: +; CHECK-NEXT: br label %a.header + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} + +; Unswitch will transform the loop nest from: +; A < B < C +; into +; (A < B), C +define void @hoist_inner_loop2(i32* %ptr) { +; CHECK-LABEL: define void @hoist_inner_loop2( +entry: + br label %a.header +; CHECK: entry: +; CHECK-NEXT: br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header +; CHECK: a.header: +; CHECK-NEXT: %x.a = load i32, i32* %ptr +; CHECK-NEXT: br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %c.header +; CHECK: b.header: +; CHECK-NEXT: %x.b = load i32, i32* %ptr +; CHECK-NEXT: %v1 = call i1 @cond() +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; +; CHECK: [[B_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[C_HEADER_US:.*]] +; +; CHECK: [[C_HEADER_US]]: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %[[B_LATCH_US:.*]] +; +; CHECK: [[B_LATCH_US]]: +; CHECK-NEXT: br label %b.latch +; +; CHECK: [[B_HEADER_SPLIT]]: +; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ] +; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ] +; CHECK-NEXT: br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.latch +; CHECK: c.header: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %c.latch + +c.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %exit +; CHECK: c.latch: +; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr +; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr +; CHECK-NEXT: %v2 = call i1 @cond() +; CHECK-NEXT: br i1 %v2, label %c.header, label %exit + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.latch +; CHECK: b.latch: +; CHECK-NEXT: %v3 = call i1 @cond() +; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch + +a.latch: + br label %a.header +; CHECK: a.latch: +; CHECK-NEXT: br label %a.header + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} + +; Same as @hoist_inner_loop2 but with a nested loop inside the hoisted loop. +; Unswitch will transform the loop nest from: +; A < B < C < D +; into +; (A < B), (C < D) +define void @hoist_inner_loop3(i32* %ptr) { +; CHECK-LABEL: define void @hoist_inner_loop3( +entry: + br label %a.header +; CHECK: entry: +; CHECK-NEXT: br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header +; CHECK: a.header: +; CHECK-NEXT: %x.a = load i32, i32* %ptr +; CHECK-NEXT: br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %c.header +; CHECK: b.header: +; CHECK-NEXT: %x.b = load i32, i32* %ptr +; CHECK-NEXT: %v1 = call i1 @cond() +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]] +; +; CHECK: [[B_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[C_HEADER_US:.*]] +; +; CHECK: [[C_HEADER_US]]: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %[[B_LATCH_US:.*]] +; +; CHECK: [[B_LATCH_US]]: +; CHECK-NEXT: br label %b.latch +; +; CHECK: [[B_HEADER_SPLIT]]: +; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ] +; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ] +; CHECK-NEXT: br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.body +; CHECK: c.header: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %c.body + +c.body: + %x.c = load i32, i32* %ptr + br label %d.header +; CHECK: c.body: +; CHECK-NEXT: %x.c = load i32, i32* %ptr +; CHECK-NEXT: br label %d.header + +d.header: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + store i32 %x.c, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %d.header, label %c.latch +; CHECK: d.header: +; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr +; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr +; CHECK-NEXT: store i32 %x.c, i32* %ptr +; CHECK-NEXT: %v2 = call i1 @cond() +; CHECK-NEXT: br i1 %v2, label %d.header, label %c.latch + +c.latch: + %v3 = call i1 @cond() + br i1 %v3, label %c.header, label %exit +; CHECK: c.latch: +; CHECK-NEXT: %v3 = call i1 @cond() +; CHECK-NEXT: br i1 %v3, label %c.header, label %exit + +b.latch: + %v4 = call i1 @cond() + br i1 %v4, label %b.header, label %a.latch +; CHECK: b.latch: +; CHECK-NEXT: %v4 = call i1 @cond() +; CHECK-NEXT: br i1 %v4, label %b.header, label %a.latch + +a.latch: + br label %a.header +; CHECK: a.latch: +; CHECK-NEXT: br label %a.header + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} + +; This test is designed to exercise checking multiple remaining exits from the +; loop being unswitched. +; Unswitch will transform the loop nest from: +; A < B < C < D +; into +; A < B < (C, D) +define void @hoist_inner_loop4() { +; CHECK-LABEL: define void @hoist_inner_loop4( +entry: + br label %a.header +; CHECK: entry: +; CHECK-NEXT: br label %a.header + +a.header: + br label %b.header +; CHECK: a.header: +; CHECK-NEXT: br label %b.header + +b.header: + br label %c.header +; CHECK: b.header: +; CHECK-NEXT: br label %c.header + +c.header: + %v1 = call i1 @cond() + br label %d.header +; CHECK: c.header: +; CHECK-NEXT: %v1 = call i1 @cond() +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]] +; +; CHECK: [[C_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[D_HEADER_US:.*]] +; +; CHECK: [[D_HEADER_US]]: +; CHECK-NEXT: call i32 @d() +; CHECK-NEXT: br label %[[C_LATCH_US:.*]] +; +; CHECK: [[C_LATCH_US]]: +; CHECK-NEXT: br label %c.latch +; +; CHECK: [[C_HEADER_SPLIT]]: +; CHECK-NEXT: br label %d.header + +d.header: + call i32 @d() + br i1 %v1, label %c.latch, label %d.exiting1 +; CHECK: d.header: +; CHECK-NEXT: call i32 @d() +; CHECK-NEXT: br label %d.exiting1 + +d.exiting1: + %v2 = call i1 @cond() + br i1 %v2, label %d.exiting2, label %a.latch +; CHECK: d.exiting1: +; CHECK-NEXT: %v2 = call i1 @cond() +; CHECK-NEXT: br i1 %v2, label %d.exiting2, label %a.latch + +d.exiting2: + %v3 = call i1 @cond() + br i1 %v3, label %d.exiting3, label %loopexit.d +; CHECK: d.exiting2: +; CHECK-NEXT: %v3 = call i1 @cond() +; CHECK-NEXT: br i1 %v3, label %d.exiting3, label %loopexit.d + +d.exiting3: + %v4 = call i1 @cond() + br i1 %v4, label %d.latch, label %b.latch +; CHECK: d.exiting3: +; CHECK-NEXT: %v4 = call i1 @cond() +; CHECK-NEXT: br i1 %v4, label %d.latch, label %b.latch + +d.latch: + br label %d.header +; CHECK: d.latch: +; CHECK-NEXT: br label %d.header + +c.latch: + %v5 = call i1 @cond() + br i1 %v5, label %c.header, label %loopexit.c +; CHECK: c.latch: +; CHECK-NEXT: %v5 = call i1 @cond() +; CHECK-NEXT: br i1 %v5, label %c.header, label %loopexit.c + +b.latch: + br label %b.header +; CHECK: b.latch: +; CHECK-NEXT: br label %b.header + +a.latch: + br label %a.header +; CHECK: a.latch: +; CHECK-NEXT: br label %a.header + +loopexit.d: + br label %exit +; CHECK: loopexit.d: +; CHECK-NEXT: br label %exit + +loopexit.c: + br label %exit +; CHECK: loopexit.c: +; CHECK-NEXT: br label %exit + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} + +; Unswitch will transform the loop nest from: +; A < B < C < D +; into +; A < ((B < C), D) +define void @hoist_inner_loop5(i32* %ptr) { +; CHECK-LABEL: define void @hoist_inner_loop5( +entry: + br label %a.header +; CHECK: entry: +; CHECK-NEXT: br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header +; CHECK: a.header: +; CHECK-NEXT: %x.a = load i32, i32* %ptr +; CHECK-NEXT: br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + br label %c.header +; CHECK: b.header: +; CHECK-NEXT: %x.b = load i32, i32* %ptr +; CHECK-NEXT: br label %c.header + +c.header: + %x.c = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %d.header +; CHECK: c.header: +; CHECK-NEXT: %x.c = load i32, i32* %ptr +; CHECK-NEXT: %v1 = call i1 @cond() +; CHECK-NEXT: %v1.fr = freeze i1 %v1 +; CHECK-NEXT: br i1 %v1.fr, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]] +; +; CHECK: [[C_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[D_HEADER_US:.*]] +; +; CHECK: [[D_HEADER_US]]: +; CHECK-NEXT: call i32 @d() +; CHECK-NEXT: br label %[[C_LATCH_US:.*]] +; +; CHECK: [[C_LATCH_US]]: +; CHECK-NEXT: br label %c.latch +; +; CHECK: [[C_HEADER_SPLIT]]: +; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %c.header ] +; CHECK-NEXT: %[[X_C_LCSSA:.*]] = phi i32 [ %x.c, %c.header ] +; CHECK-NEXT: br label %d.header + +d.header: + call i32 @d() + br i1 %v1, label %c.latch, label %d.latch +; CHECK: d.header: +; CHECK-NEXT: call i32 @d() +; CHECK-NEXT: br label %d.latch + +d.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + store i32 %x.c, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %d.header, label %a.latch +; CHECK: d.latch: +; CHECK-NEXT: store i32 %x.a, i32* %ptr +; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr +; CHECK-NEXT: store i32 %[[X_C_LCSSA]], i32* %ptr +; CHECK-NEXT: %v2 = call i1 @cond() +; CHECK-NEXT: br i1 %v2, label %d.header, label %a.latch + +c.latch: + %v3 = call i1 @cond() + br i1 %v3, label %c.header, label %b.latch +; CHECK: c.latch: +; CHECK-NEXT: %v3 = call i1 @cond() +; CHECK-NEXT: br i1 %v3, label %c.header, label %b.latch + +b.latch: + br label %b.header +; CHECK: b.latch: +; CHECK-NEXT: br label %b.header + +a.latch: + br label %a.header +; CHECK: a.latch: +; CHECK-NEXT: br label %a.header + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +} + +define void @hoist_inner_loop_switch(i32* %ptr) { +; CHECK-LABEL: define void @hoist_inner_loop_switch( +entry: + br label %a.header +; CHECK: entry: +; CHECK-NEXT: br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header +; CHECK: a.header: +; CHECK-NEXT: %x.a = load i32, i32* %ptr +; CHECK-NEXT: br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i32 @cond.i32() + br label %c.header +; CHECK: b.header: +; CHECK-NEXT: %x.b = load i32, i32* %ptr +; CHECK-NEXT: %v1 = call i32 @cond.i32() +; CHECK-NEXT: %v1.fr = freeze i32 %v1 +; CHECK-NEXT: switch i32 %v1.fr, label %[[B_HEADER_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[B_HEADER_SPLIT_US:.*]] +; CHECK-NEXT: i32 2, label %[[B_HEADER_SPLIT_US]] +; CHECK-NEXT: i32 3, label %[[B_HEADER_SPLIT_US]] +; CHECK-NEXT: ] +; +; CHECK: [[B_HEADER_SPLIT_US]]: +; CHECK-NEXT: br label %[[C_HEADER_US:.*]] +; +; CHECK: [[C_HEADER_US]]: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %[[B_LATCH_US:.*]] +; +; CHECK: [[B_LATCH_US]]: +; CHECK-NEXT: br label %b.latch +; +; CHECK: [[B_HEADER_SPLIT]]: +; CHECK-NEXT: %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ] +; CHECK-NEXT: %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ] +; CHECK-NEXT: br label %c.header + +c.header: + call i32 @c() + switch i32 %v1, label %c.latch [ + i32 1, label %b.latch + i32 2, label %b.latch + i32 3, label %b.latch + ] +; CHECK: c.header: +; CHECK-NEXT: call i32 @c() +; CHECK-NEXT: br label %c.latch + +c.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %exit +; CHECK: c.latch: +; CHECK-NEXT: store i32 %[[X_A_LCSSA]], i32* %ptr +; CHECK-NEXT: store i32 %[[X_B_LCSSA]], i32* %ptr +; CHECK-NEXT: %v2 = call i1 @cond() +; CHECK-NEXT: br i1 %v2, label %c.header, label %exit + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.latch +; CHECK: b.latch: +; CHECK-NEXT: %v3 = call i1 @cond() +; CHECK-NEXT: br i1 %v3, label %b.header, label %a.latch + +a.latch: + br label %a.header +; CHECK: a.latch: +; CHECK-NEXT: br label %a.header + +exit: + ret void +; CHECK: exit: +; CHECK-NEXT: ret void +}