Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -103,6 +103,214 @@ cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it.")); +static cl::opt + MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", + cl::desc("Max number of memory uses to explore during " + "partial unswitching analysis"), + cl::init(100), cl::Hidden); + +/// Struct to hold information about a partially invariant condition. +struct IVConditionInfo { + /// Instructions that need to be duplicated and checked for the unswitching + /// condition. + TinyPtrVector InstToDuplicate; + + /// Constant to indicate for which value the condition is invariant. + Constant *KnownValue = nullptr; + + /// True if the partially invariant path is no-op (=does not have any + /// side-effects and no loop value is used outside the loop). + bool PathIsNoop = true; + + /// If the partially invariant path reaches a single exit block, ExitForPath + /// is set to that block. Otherwise it is nullptr. + BasicBlock *ExitForPath = nullptr; +}; + +/// Check if the loop header has a conditional branch that is not +/// loop-invariant, because it involves load instructions. If all paths from +/// either the true or false successor to the header or loop exists do not +/// modify the memory feeding the condition, perform 'partial unswitching'. That +/// is, duplicate the instructions feeding the condition in the pre-header. Then +/// unswitch on the duplicated condition. The condition is now known in the +/// unswitched version for the 'invariant' path through the original loop. +/// +/// If the branch condition of the header is partially invariant, return a pair +/// containing the instructions to duplicate and a boolean Constant to update +/// the condition in the loops created for the true or false successors. +static Optional +hasPartialIVCondition(Loop *L, MemorySSAUpdater *MSSAU, AAResults *AA) { + auto *TI = dyn_cast(L->getHeader()->getTerminator()); + if (!TI || !TI->isConditional()) + return {}; + + auto *CondI = dyn_cast(TI->getCondition()); + // The case with the condition outside the loop should already be handled + // earlier. + if (!CondI || !L->contains(CondI)) + return {}; + + TinyPtrVector InstToDuplicate; + InstToDuplicate.push_back(CondI); + + SmallVector WorkList; + WorkList.append(CondI->op_begin(), CondI->op_end()); + + SmallVector AccessesToCheck; + SmallVector AccessedLocs; + MemorySSA *MSSA = MSSAU->getMemorySSA(); + while (!WorkList.empty()) { + Instruction *I = dyn_cast(WorkList.pop_back_val()); + if (!I || !L->contains(I)) + continue; + + // TODO: support additional instructions. + if (!isa(I) && !isa(I)) + return {}; + + // Do not duplicate volatile and atomic loads. + if (auto *LI = dyn_cast(I)) + if (LI->isVolatile() || LI->isAtomic()) + return {}; + + InstToDuplicate.push_back(I); + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { + if (auto *MemUse = dyn_cast_or_null(MA)) { + // Queue the defining access to check for alias checks. + AccessesToCheck.push_back(MemUse->getDefiningAccess()); + AccessedLocs.push_back(MemoryLocation::get(I)); + } else { + // MemoryDefs may clobber the location or may be atomic memory + // operations. Bail out. + return {}; + } + } + WorkList.append(I->op_begin(), I->op_end()); + } + + if (InstToDuplicate.empty()) + return {}; + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + auto HasNoClobbersOnPath = + [L, AA, &AccessedLocs, &ExitingBlocks, + &InstToDuplicate](BasicBlock *Succ, BasicBlock *Header, + SmallVector AccessesToCheck) + -> Optional { + IVConditionInfo Info; + // First, collect all blocks in the loop that are on a patch from Succ + // to the header. + SmallVector WorkList; + WorkList.push_back(Succ); + WorkList.push_back(Header); + SmallPtrSet Seen; + Seen.insert(Header); + Info.PathIsNoop &= + all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + + while (!WorkList.empty()) { + BasicBlock *Current = WorkList.pop_back_val(); + if (!L->contains(Current)) + continue; + const auto &SeenIns = Seen.insert(Current); + if (!SeenIns.second) + continue; + + Info.PathIsNoop &= all_of( + *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); }); + WorkList.append(succ_begin(Current), succ_end(Current)); + } + + // Require at least 2 blocks on a path through the loop. This skips + // paths that directly exit the loop. + if (Seen.size() < 2) + return {}; + + // Next, check if there are any MemoryDefs that are on the path through + // the loop (in the Seen set) and they may-alias any of the locations in + // AccessedLocs. If that is the case, they may modify the condition and + // partial unswitching is not possible. + SmallPtrSet SeenAccesses; + while (!AccessesToCheck.empty()) { + MemoryAccess *Current = AccessesToCheck.pop_back_val(); + auto SeenI = SeenAccesses.insert(Current); + if (!SeenI.second || !Seen.contains(Current->getBlock())) + continue; + + // Bail out if exceeded the threshold. + if (SeenAccesses.size() >= MSSAThreshold) + return {}; + + // MemoryUse are read-only accesses. + if (isa(Current)) + continue; + + // For a MemoryDef, check if is aliases any of the location feeding + // the original condition. + if (auto *CurrentDef = dyn_cast(Current)) { + if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) { + return isModSet( + AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc)); + })) + return {}; + } + + for (Use &U : Current->uses()) + AccessesToCheck.push_back(cast(U.getUser())); + } + + // We could also allow loops with known trip counts without mustprogress, + // but ScalarEvolution may not be available. + Info.PathIsNoop &= + L->getHeader()->getParent()->mustProgress() || hasMustProgress(L); + + // If the path is considered a no-op so far, check if it reaches a + // single exit block without any phis. This ensures no values from the + // loop are used outside of the loop. + if (Info.PathIsNoop) { + for (auto *Exiting : ExitingBlocks) { + if (!Seen.contains(Exiting)) + continue; + for (auto *Succ : successors(Exiting)) { + if (L->contains(Succ)) + continue; + + Info.PathIsNoop &= llvm::empty(Succ->phis()) && + (!Info.ExitForPath || Info.ExitForPath == Succ); + if (!Info.PathIsNoop) + break; + assert((!Info.ExitForPath || Info.ExitForPath == Succ) && + "cannot have multiple exit blocks"); + Info.ExitForPath = Succ; + } + } + } + if (!Info.ExitForPath) + Info.PathIsNoop = false; + + Info.InstToDuplicate = InstToDuplicate; + return Info; + }; + + // If we branch to the same successor, partial unswitching will not be + // beneficial. + if (TI->getSuccessor(0) == TI->getSuccessor(1)) + return {}; + + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getTrue(TI->getContext()); + return Info; + } + if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), + AccessesToCheck)) { + Info->KnownValue = ConstantInt::getFalse(TI->getContext()); + return Info; + } + + return {}; +} /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. @@ -202,6 +410,49 @@ Direction ? &NormalSucc : &UnswitchedSucc); } +/// Copy a set of loop invariant values, and conditionally branch on them. +static void buildPartialInvariantUnswitchConditionalBranch( + BasicBlock &BB, ArrayRef Invariants, bool Direction, + BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, + MemorySSAUpdater *MSSAU) { + ValueToValueMapTy VMap; + for (auto *Val : reverse(Invariants)) { + Instruction *Inst = cast(Val); + Instruction *NewInst = Inst->clone(); + BB.getInstList().insert(BB.end(), NewInst); + RemapInstruction(NewInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[Val] = NewInst; + + if (!MSSAU) + continue; + + MemorySSA *MSSA = MSSAU->getMemorySSA(); + if (auto *MemUse = + dyn_cast_or_null(MSSA->getMemoryAccess(Inst))) { + auto *DefiningAccess = MemUse->getDefiningAccess(); + // Get the first defining access before the loop. + while (L.contains(DefiningAccess->getBlock())) { + // If the defining access is a MemoryPhi, get the incoming + // value for the pre-header as defining access. + if (auto *MemPhi = dyn_cast(DefiningAccess)) + DefiningAccess = + MemPhi->getIncomingValueForBlock(L.getLoopPreheader()); + else + DefiningAccess = cast(DefiningAccess)->getDefiningAccess(); + } + MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess, + NewInst->getParent(), + MemorySSA::BeforeTerminator); + } + } + + IRBuilder<> IRB(&BB); + Value *Cond = VMap[Invariants[0]]; + IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, + Direction ? &NormalSucc : &UnswitchedSucc); +} + /// Rewrite the PHI nodes in an unswitched loop exit basic block. /// /// Requires that the loop exit and unswitched basic block are the same, and @@ -1964,18 +2215,23 @@ static void unswitchNontrivialInvariants( Loop &L, Instruction &TI, ArrayRef Invariants, - SmallVectorImpl &ExitBlocks, DominatorTree &DT, LoopInfo &LI, - AssumptionCache &AC, function_ref)> UnswitchCB, + SmallVectorImpl &ExitBlocks, + struct IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast(&TI); SwitchInst *SI = BI ? nullptr : cast(&TI); // We can only unswitch switches, conditional branches with an invariant - // condition, or combining invariant conditions with an instruction. + // condition, or combining invariant conditions with an instruction or + // partially invariant instructions. assert((SI || (BI && BI->isConditional())) && "Can only unswitch switches and conditional branch!"); - bool FullUnswitch = SI || BI->getCondition() == Invariants[0]; + bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty(); + bool FullUnswitch = + SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant); if (FullUnswitch) assert(Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"); @@ -1989,18 +2245,23 @@ // Constant and BBs tracking the cloned and continuing successor. When we are // unswitching the entire condition, this can just be trivially chosen to // unswitch towards `true`. However, when we are unswitching a set of - // invariants combined with `and` or `or`, the combining operation determines - // the best direction to unswitch: we want to unswitch the direction that will - // collapse the branch. + // invariants combined with `and` or `or` or partially invariant instructions, + // the combining operation determines the best direction to unswitch: we want + // to unswitch the direction that will collapse the branch. bool Direction = true; int ClonedSucc = 0; if (!FullUnswitch) { if (!match(BI->getCondition(), m_LogicalOr())) { - assert(match(BI->getCondition(), m_LogicalAnd()) && - "Only `or`, `and`, an `select` instructions can combine " - "invariants being unswitched."); - Direction = false; - ClonedSucc = 1; + assert( + (match(BI->getCondition(), m_LogicalAnd()) || PartiallyInvariant) && + "Only `or`, `and`, an `select` instructions can combine invariants " + "being unswitched. Partially invariant instructions can also be " + "unswitched."); + if (match(BI->getCondition(), m_LogicalAnd()) || + (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) { + Direction = false; + ClonedSucc = 1; + } } } @@ -2088,10 +2349,18 @@ VMaps.reserve(UnswitchedSuccBBs.size()); SmallDenseMap ClonedPHs; for (auto *SuccBB : UnswitchedSuccBBs) { - VMaps.emplace_back(new ValueToValueMapTy()); - ClonedPHs[SuccBB] = buildClonedLoopBlocks( - L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB, - DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU); + // In Partially invariant case, if UnswithcedSuccBB is exit block, do not + // clone loop and assigned the UnswitchedSuccBB to ClonedPHs. + if (PartiallyInvariant && llvm::any_of(ExitBlocks, [&](BasicBlock *ExitBB) { + return ExitBB == SuccBB; + })) + ClonedPHs[SuccBB] = SuccBB; + else { + VMaps.emplace_back(new ValueToValueMapTy()); + ClonedPHs[SuccBB] = buildClonedLoopBlocks( + L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB, + DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU); + } } // Drop metadata if we may break its semantics by moving this instr into the @@ -2218,8 +2487,12 @@ BasicBlock *ClonedPH = ClonedPHs.begin()->second; // When doing a partial unswitch, we have to do a bit more work to build up // the branch in the split block. - buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, - *ClonedPH, *LoopPH); + if (PartiallyInvariant) + buildPartialInvariantUnswitchConditionalBranch( + *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); + else + buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, + *ClonedPH, *LoopPH); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); if (MSSAU) { @@ -2289,7 +2562,8 @@ // for each invariant operand. // So it happens that for multiple-partial case we dont replace // in the unswitched branch. - bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1); + bool ReplaceUnswitched = + FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant; ConstantInt *UnswitchedReplacement = Direction ? ConstantInt::getTrue(BI->getContext()) @@ -2301,7 +2575,7 @@ // Use make_early_inc_range here as set invalidates the iterator. for (Use &U : llvm::make_early_inc_range(Invariant->uses())) { Instruction *UserI = dyn_cast(U.getUser()); - if (!UserI) + if (!UserI || PartiallyInvariant) continue; // Replace it with the 'continue' side if in the main loop body, and the @@ -2384,7 +2658,7 @@ for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) if (UpdatedL->getParentLoop() == ParentL) SibLoops.push_back(UpdatedL); - UnswitchCB(IsStillLoop, SibLoops); + UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -2599,11 +2873,11 @@ return CostMultiplier; } -static bool -unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, - AssumptionCache &AC, TargetTransformInfo &TTI, - function_ref)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { +static bool unswitchBestCondition( + Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, + AAResults &AA, TargetTransformInfo &TTI, + function_ref)> UnswitchCB, + ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { // Collect all invariant conditions within this loop (as opposed to an inner // loop which would be handled when visiting that inner loop). SmallVector>, 4> @@ -2618,6 +2892,7 @@ CollectGuards = true; } + struct IVConditionInfo PartialIVInfo; for (auto *BB : L.blocks()) { if (LI.getLoopFor(BB) != &L) continue; @@ -2651,15 +2926,29 @@ } Instruction &CondI = *cast(BI->getCondition()); - if (!match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) - continue; + if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) { + TinyPtrVector Invariants = + collectHomogenousInstGraphLoopInvariants(L, CondI, LI); + if (Invariants.empty()) + continue; - TinyPtrVector Invariants = - collectHomogenousInstGraphLoopInvariants(L, CondI, LI); - if (Invariants.empty()) + UnswitchCandidates.push_back({BI, std::move(Invariants)}); continue; + } + } - UnswitchCandidates.push_back({BI, std::move(Invariants)}); + if (MSSAU && + !llvm::any_of(UnswitchCandidates, [&](auto &TerminatorAndInvariants) { + return TerminatorAndInvariants.first == L.getHeader()->getTerminator(); + })) { + if (auto Info = hasPartialIVCondition(&L, MSSAU, &AA)) { + LLVM_DEBUG( + dbgs() << "simple-loop-unswitch: Found partially invariant condition " + << *Info->InstToDuplicate[0] << "\n"); + PartialIVInfo = *Info; + UnswitchCandidates.push_back( + {L.getHeader()->getTerminator(), std::move(Info->InstToDuplicate)}); + } } // If we didn't find any candidates, we're done. @@ -2765,20 +3054,25 @@ continue; // If this is a partial unswitch candidate, then it must be a conditional - // branch with a condition of either `or`, `and`, or their corresponding - // select forms. In that case, one of the successors is necessarily - // duplicated, so don't even try to remove its cost. + // branch with a condition of either `or`, `and`, their corresponding + // select forms or partially invariant instructions. In that case, one of + // the successors is necessarily duplicated, so don't even try to remove + // its cost. if (!FullUnswitch) { auto &BI = cast(TI); if (match(BI.getCondition(), m_LogicalAnd())) { if (SuccBB == BI.getSuccessor(1)) continue; - } else { - assert(match(BI.getCondition(), m_LogicalOr()) && - "Only `and` and `or` conditions can result in a partial " - "unswitch!"); + } else if (match(BI.getCondition(), m_LogicalOr())) { if (SuccBB == BI.getSuccessor(0)) continue; + } else if (!PartialIVInfo.InstToDuplicate.empty()) { + if (PartialIVInfo.KnownValue->isOneValue() && + SuccBB == BI.getSuccessor(1)) + continue; + else if (!PartialIVInfo.KnownValue->isOneValue() && + SuccBB == BI.getSuccessor(0)) + continue; } } @@ -2852,11 +3146,11 @@ BestUnswitchTI = turnGuardIntoBranch(cast(BestUnswitchTI), L, ExitBlocks, DT, LI, MSSAU); - LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " - << BestUnswitchCost << ") terminator: " << *BestUnswitchTI - << "\n"); + LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << BestUnswitchCost + << ") terminator: " << *BestUnswitchTI << "\n"); unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants, - ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU); + ExitBlocks, PartialIVInfo, DT, LI, AC, + UnswitchCB, SE, MSSAU); return true; } @@ -2867,9 +3161,9 @@ /// looks at other loop invariant control flows and tries to unswitch those as /// well by cloning the loop if the result is small enough. /// -/// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also -/// updated based on the unswitch. -/// The `MSSA` analysis is also updated if valid (i.e. its use is enabled). +/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are +/// also updated based on the unswitch. The `MSSA` analysis is also updated if +/// valid (i.e. its use is enabled). /// /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is /// true, we will attempt to do non-trivial unswitching as well as trivial @@ -2881,11 +3175,11 @@ /// /// If `SE` is non-null, we will update that analysis based on the unswitching /// done. -static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, - AssumptionCache &AC, TargetTransformInfo &TTI, - bool NonTrivial, - function_ref)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { +static bool +unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, + AAResults &AA, TargetTransformInfo &TTI, bool NonTrivial, + function_ref)> UnswitchCB, + ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); @@ -2897,7 +3191,7 @@ if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) { // If we unswitched successfully we will want to clean up the loop before // processing it further so just mark it as unswitched and return. - UnswitchCB(/*CurrentLoopValid*/ true, {}); + UnswitchCB(/*CurrentLoopValid*/ true, false, {}); return true; } @@ -2923,7 +3217,7 @@ // Try to unswitch the best invariant condition. We prefer this full unswitch to // a partial unswitch when possible below the threshold. - if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU)) + if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU)) return true; // No other opportunities to unswitch. @@ -2944,6 +3238,7 @@ std::string LoopName = std::string(L.getName()); auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, + bool PartiallyInvariant, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. if (!NewLoops.empty()) @@ -2951,9 +3246,10 @@ // If the current loop remains valid, we should revisit it to catch any // other unswitch opportunities. Otherwise, we need to mark it as deleted. - if (CurrentLoopValid) - U.revisitCurrentLoop(); - else + if (CurrentLoopValid) { + if (!PartiallyInvariant) + U.revisitCurrentLoop(); + } else U.markLoopAsDeleted(L, LoopName); }; @@ -2963,8 +3259,9 @@ if (VerifyMemorySSA) AR.MSSA->verifyMemorySSA(); } - if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB, - &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, NonTrivial, + UnswitchCB, &AR.SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) @@ -3021,6 +3318,7 @@ auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); auto &AC = getAnalysis().getAssumptionCache(F); + auto &AA = getAnalysis().getAAResults(); auto &TTI = getAnalysis().getTTI(F); MemorySSA *MSSA = nullptr; Optional MSSAU; @@ -3032,7 +3330,7 @@ auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, + auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. for (auto *NewL : NewLoops) @@ -3041,16 +3339,17 @@ // If the current loop remains valid, re-add it to the queue. This is // a little wasteful as we'll finish processing the current loop as well, // but it is the best we can do in the old PM. - if (CurrentLoopValid) - LPM.addLoop(*L); - else + if (CurrentLoopValid) { + if (!PartiallyInvariant) + LPM.addLoop(*L); + } else LPM.markLoopAsDeleted(*L); }; if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); - bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE, + bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, NonTrivial, UnswitchCB, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); if (MSSA && VerifyMemorySSA) Index: llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll @@ -0,0 +1,1102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='loop-mssa(unswitch),verify' -S < %s | FileCheck %s + +declare void @clobber() + +define i32 @partial_unswitch_true_successor(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswitch_false_successor(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_false_successor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + br label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswtich_gep_load_icmp(i32** %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswtich_gep_load_icmp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32*, i32** [[PTR:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 100 +; CHECK-NEXT: br i1 [[TMP3]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32*, i32** [[PTR]], i32 1 +; CHECK-NEXT: [[LV_1:%.*]] = load i32*, i32** [[GEP]], align 8 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[LV_1]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr i32*, i32** %ptr, i32 1 + %lv.1 = load i32*, i32** %gep + %lv = load i32, i32* %lv.1 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswitch_reduction_phi(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_reduction_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: [[RED_US:%.*]] = phi i32 [ 20, [[ENTRY_SPLIT_US]] ], [ [[RED_NEXT_US:%.*]], [[LOOP_LATCH_US]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: [[ADD_10_US:%.*]] = add i32 [[RED_US]], 10 +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[RED_NEXT_US]] = phi i32 [ [[ADD_10_US]], [[NOCLOBBER_US]] ] +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: [[RED_NEXT_LCSSA_US:%.*]] = phi i32 [ [[RED_NEXT_US]], [[LOOP_LATCH_US]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ 20, [[ENTRY_SPLIT]] ], [ [[RED_NEXT:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[RED]], 5 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: noclobber: +; CHECK-NEXT: [[ADD_10:%.*]] = add i32 [[RED]], 10 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[RED_NEXT]] = phi i32 [ [[ADD_5]], [[CLOBBER]] ], [ [[ADD_10]], [[NOCLOBBER]] ] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP_LATCH]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[RED_NEXT_LCSSA]], [[EXIT_SPLIT]] ], [ [[RED_NEXT_LCSSA_US]], [[EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %red = phi i32 [ 20, %entry ], [ %red.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + %add.5 = add i32 %red, 5 + br label %loop.latch + +noclobber: + %add.10 = add i32 %red, 10 + br label %loop.latch + +loop.latch: + %red.next = phi i32 [ %add.5, %clobber ], [ %add.10, %noclobber ] + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + %red.next.lcssa = phi i32 [ %red.next, %loop.latch ] + ret i32 %red.next.lcssa +} + +; Partial unswitching is possible, because the store in %noclobber does not +; alias the load of the condition. +define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor_noclobber( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR_1:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: [[LV_US:%.*]] = load i32, i32* [[PTR_1]], align 4 +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: [[GEP_1_US:%.*]] = getelementptr i32, i32* [[PTR_2:%.*]], i32 [[IV_US]] +; CHECK-NEXT: store i32 [[LV_US]], i32* [[GEP_1_US]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_1]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, i32* [[PTR_2]], i32 [[IV]] +; CHECK-NEXT: store i32 [[LV]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr.1 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + %gep.1 = getelementptr i32, i32* %ptr.2, i32 %iv + store i32 %lv, i32* %gep.1 + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define void @no_partial_unswitch_phi_cond(i1 %lc, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_phi_cond( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[SC:%.*]] = phi i1 [ [[LC:%.*]], [[ENTRY]] ], [ true, [[LOOP_LATCH]] ] +; CHECK-NEXT: br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %sc = phi i1 [ %lc, %entry ], [ true, %loop.latch ] + br i1 %sc, label %clobber, label %noclobber + +clobber: + call void @clobber() + br label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +} + +define void @no_partial_unswitch_clobber_latch(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_clobber_latch( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + call void @clobber() + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +} + +define void @no_partial_unswitch_clobber_header(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_clobber_header( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + call void @clobber() + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +} + +define void @no_partial_unswitch_clobber_both(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_clobber_both( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + call void @clobber() + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret void +} + +define i32 @no_partial_unswitch_true_successor_storeclobber(i32* %ptr.1, i32* %ptr.2, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_true_successor_storeclobber( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_1:%.*]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, i32* [[PTR_2:%.*]], i32 [[IV]] +; CHECK-NEXT: store i32 [[LV]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr.1 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + %gep.1 = getelementptr i32, i32* %ptr.2, i32 %iv + store i32 %lv, i32* %gep.1 + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +; Make sure the duplicated instructions are moved to a preheader that always +; executes when the loop body also executes. Do not check the unswitched code, +; because it is already checked in the @partial_unswitch_true_successor test +; case. +define i32 @partial_unswitch_true_successor_preheader_insertion(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor_preheader_insertion( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EC:%.*]] = icmp ne i32* [[PTR:%.*]], null +; CHECK-NEXT: br i1 [[EC]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]] +; CHECK: loop.ph: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PH_SPLIT_US:%.*]], label [[LOOP_PH_SPLIT:%.*]] +; CHECK: loop.ph.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[LOOP_PH_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_LOOPEXIT_SPLIT_US:%.*]] +; CHECK: exit.loopexit.split.us: +; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] +; CHECK: loop.ph.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[LOOP_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT_SPLIT:%.*]] +; CHECK: exit.loopexit.split: +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; + +entry: + %ec = icmp ne i32* %ptr, null + br i1 %ec, label %loop.ph, label %exit + +loop.ph: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %loop.ph ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +; Make sure the duplicated instructions are hoisted just before the branch of +; the preheader. Do not check the unswitched code, because it is already checked +; in the @partial_unswitch_true_successor test case +define i32 @partial_unswitch_true_successor_insert_point(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor_insert_point( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + call void @clobber() + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +; Make sure invariant instructions in the loop are also hoisted to the preheader. +; Do not check the unswitched code, because it is already checked in the +; @partial_unswitch_true_successor test case +define i32 @partial_unswitch_true_successor_hoist_invariant(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_successor_hoist_invariant( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 100 +; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 1 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr i32, i32* %ptr, i64 1 + %lv = load i32, i32* %gep + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +; Do not unswitch if the condition depends on an atomic load. Duplicating such +; loads is not safe. +define i32 @no_partial_unswitch_atomic_load_unordered(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_atomic_load_unordered( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load atomic i32, i32* [[PTR:%.*]] unordered, align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load atomic i32, i32* %ptr unordered, align 4 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +; Do not unswitch if the condition depends on an atomic load. Duplicating such +; loads is not safe. +define i32 @no_partial_unswitch_atomic_load_monotonic(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_atomic_load_monotonic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load atomic i32, i32* [[PTR:%.*]] monotonic, align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load atomic i32, i32* %ptr monotonic, align 4 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + + +declare i32 @get_value() + +; Do not unswitch if the condition depends on a call, that may clobber memory. +; Duplicating such a call is not safe. +define i32 @no_partial_unswitch_cond_call(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_cond_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = call i32 @get_value() +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = call i32 @get_value() + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @no_partial_unswitch_true_successor_exit(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_true_successor_exit( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[EXIT:%.*]], label [[CLOBBER:%.*]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %exit, label %clobber + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @no_partial_unswitch_true_same_successor(i32* %ptr, i32 %N) { +; CHECK-LABEL: @no_partial_unswitch_true_same_successor( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[NOCLOBBER]] +; CHECK: noclobber: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %noclobber + +noclobber: + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} + +define i32 @partial_unswitch_true_to_latch(i32* %ptr, i32 %N) { +; CHECK-LABEL: @partial_unswitch_true_to_latch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 +; CHECK-NEXT: br i1 [[SC]], label [[LOOP_LATCH]], label [[CLOBBER:%.*]] +; CHECK: clobber: +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 10 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %loop.latch, label %clobber + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +}