Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -103,6 +103,11 @@ cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it.")); +static cl::opt + MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", + cl::desc("Max number of memory uses to explore during " + "partial unswitching analysis"), + cl::init(100), cl::Hidden); /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. @@ -187,8 +192,9 @@ llvm_unreachable("Basic blocks should never be empty!"); } -/// Insert code to test a set of loop invariant values, and conditionally branch -/// on them. +/// Copy a set of loop invariant values \p ToDuplicate and insert them at the +/// end of \p BB and conditionally branch on the copied condition. We only +/// branch on a single value. static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef Invariants, bool Direction, @@ -202,6 +208,49 @@ Direction ? &NormalSucc : &UnswitchedSucc); } +/// Copy a set of loop invariant values, and conditionally branch on them. +static void buildPartialInvariantUnswitchConditionalBranch( + BasicBlock &BB, ArrayRef ToDuplicate, bool Direction, + BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, + MemorySSAUpdater *MSSAU) { + ValueToValueMapTy VMap; + for (auto *Val : reverse(ToDuplicate)) { + Instruction *Inst = cast(Val); + Instruction *NewInst = Inst->clone(); + BB.getInstList().insert(BB.end(), NewInst); + RemapInstruction(NewInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[Val] = NewInst; + + if (!MSSAU) + continue; + + MemorySSA *MSSA = MSSAU->getMemorySSA(); + if (auto *MemUse = + dyn_cast_or_null(MSSA->getMemoryAccess(Inst))) { + auto *DefiningAccess = MemUse->getDefiningAccess(); + // Get the first defining access before the loop. + while (L.contains(DefiningAccess->getBlock())) { + // If the defining access is a MemoryPhi, get the incoming + // value for the pre-header as defining access. + if (auto *MemPhi = dyn_cast(DefiningAccess)) + DefiningAccess = + MemPhi->getIncomingValueForBlock(L.getLoopPreheader()); + else + DefiningAccess = cast(DefiningAccess)->getDefiningAccess(); + } + MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess, + NewInst->getParent(), + MemorySSA::BeforeTerminator); + } + } + + IRBuilder<> IRB(&BB); + Value *Cond = VMap[ToDuplicate[0]]; + IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, + Direction ? &NormalSucc : &UnswitchedSucc); +} + /// Rewrite the PHI nodes in an unswitched loop exit basic block. /// /// Requires that the loop exit and unswitched basic block are the same, and @@ -1963,18 +2012,22 @@ static void unswitchNontrivialInvariants( Loop &L, Instruction &TI, ArrayRef Invariants, - SmallVectorImpl &ExitBlocks, DominatorTree &DT, LoopInfo &LI, - AssumptionCache &AC, function_ref)> UnswitchCB, + SmallVectorImpl &ExitBlocks, IVConditionInfo &PartialIVInfo, + DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast(&TI); SwitchInst *SI = BI ? nullptr : cast(&TI); // We can only unswitch switches, conditional branches with an invariant - // condition, or combining invariant conditions with an instruction. + // condition, or combining invariant conditions with an instruction or + // partially invariant instructions. assert((SI || (BI && BI->isConditional())) && "Can only unswitch switches and conditional branch!"); - bool FullUnswitch = SI || BI->getCondition() == Invariants[0]; + bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty(); + bool FullUnswitch = + SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant); if (FullUnswitch) assert(Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"); @@ -1988,20 +2041,24 @@ // Constant and BBs tracking the cloned and continuing successor. When we are // unswitching the entire condition, this can just be trivially chosen to // unswitch towards `true`. However, when we are unswitching a set of - // invariants combined with `and` or `or`, the combining operation determines - // the best direction to unswitch: we want to unswitch the direction that will - // collapse the branch. + // invariants combined with `and` or `or` or partially invariant instructions, + // the combining operation determines the best direction to unswitch: we want + // to unswitch the direction that will collapse the branch. bool Direction = true; int ClonedSucc = 0; if (!FullUnswitch) { Value *Cond = BI->getCondition(); (void)Cond; - assert((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) && - "Only `or`, `and`, an `select` instructions can combine " - "invariants being unswitched."); + assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || + PartiallyInvariant) && + "Only `or`, `and`, an `select`, partially invariant instructions " + "can combine invariants being unswitched."); if (!match(BI->getCondition(), m_LogicalOr())) { - Direction = false; - ClonedSucc = 1; + if (match(BI->getCondition(), m_LogicalAnd()) || + (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) { + Direction = false; + ClonedSucc = 1; + } } } @@ -2219,8 +2276,12 @@ BasicBlock *ClonedPH = ClonedPHs.begin()->second; // When doing a partial unswitch, we have to do a bit more work to build up // the branch in the split block. - buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, - *ClonedPH, *LoopPH); + if (PartiallyInvariant) + buildPartialInvariantUnswitchConditionalBranch( + *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); + else + buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, + *ClonedPH, *LoopPH); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); if (MSSAU) { @@ -2272,7 +2333,7 @@ // verification steps. assert(DT.verify(DominatorTree::VerificationLevel::Fast)); - if (BI) { + if (BI && !PartiallyInvariant) { // If we unswitched a branch which collapses the condition to a known // constant we want to replace all the uses of the invariants within both // the original and cloned blocks. We do this here so that we can use the @@ -2290,7 +2351,8 @@ // for each invariant operand. // So it happens that for multiple-partial case we dont replace // in the unswitched branch. - bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1); + bool ReplaceUnswitched = + FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant; ConstantInt *UnswitchedReplacement = Direction ? ConstantInt::getTrue(BI->getContext()) @@ -2385,7 +2447,7 @@ for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) if (UpdatedL->getParentLoop() == ParentL) SibLoops.push_back(UpdatedL); - UnswitchCB(IsStillLoop, SibLoops); + UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -2600,11 +2662,11 @@ return CostMultiplier; } -static bool -unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, - AssumptionCache &AC, TargetTransformInfo &TTI, - function_ref)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { +static bool unswitchBestCondition( + Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, + AAResults &AA, TargetTransformInfo &TTI, + function_ref)> UnswitchCB, + ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { // Collect all invariant conditions within this loop (as opposed to an inner // loop which would be handled when visiting that inner loop). SmallVector>, 4> @@ -2619,6 +2681,7 @@ CollectGuards = true; } + IVConditionInfo PartialIVInfo; for (auto *BB : L.blocks()) { if (LI.getLoopFor(BB) != &L) continue; @@ -2659,15 +2722,35 @@ } Instruction &CondI = *cast(BI->getCondition()); - if (!match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) - continue; + if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) { + TinyPtrVector Invariants = + collectHomogenousInstGraphLoopInvariants(L, CondI, LI); + if (Invariants.empty()) + continue; - TinyPtrVector Invariants = - collectHomogenousInstGraphLoopInvariants(L, CondI, LI); - if (Invariants.empty()) + UnswitchCandidates.push_back({BI, std::move(Invariants)}); continue; + } + } - UnswitchCandidates.push_back({BI, std::move(Invariants)}); + Instruction *PartialIVCondBranch = nullptr; + if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") && + !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) { + return TerminatorAndInvariants.first == L.getHeader()->getTerminator(); + })) { + MemorySSA *MSSA = MSSAU->getMemorySSA(); + if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) { + LLVM_DEBUG( + dbgs() << "simple-loop-unswitch: Found partially invariant condition " + << *Info->InstToDuplicate[0] << "\n"); + PartialIVInfo = *Info; + PartialIVCondBranch = L.getHeader()->getTerminator(); + TinyPtrVector ValsToDuplicate; + for (auto *Inst : Info->InstToDuplicate) + ValsToDuplicate.push_back(Inst); + UnswitchCandidates.push_back( + {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)}); + } } // If we didn't find any candidates, we're done. @@ -2773,20 +2856,25 @@ continue; // If this is a partial unswitch candidate, then it must be a conditional - // branch with a condition of either `or`, `and`, or their corresponding - // select forms. In that case, one of the successors is necessarily - // duplicated, so don't even try to remove its cost. + // branch with a condition of either `or`, `and`, their corresponding + // select forms or partially invariant instructions. In that case, one of + // the successors is necessarily duplicated, so don't even try to remove + // its cost. if (!FullUnswitch) { auto &BI = cast(TI); if (match(BI.getCondition(), m_LogicalAnd())) { if (SuccBB == BI.getSuccessor(1)) continue; - } else { - assert(match(BI.getCondition(), m_LogicalOr()) && - "Only `and` and `or` conditions can result in a partial " - "unswitch!"); + } else if (match(BI.getCondition(), m_LogicalOr())) { if (SuccBB == BI.getSuccessor(0)) continue; + } else if (!PartialIVInfo.InstToDuplicate.empty()) { + if (PartialIVInfo.KnownValue->isOneValue() && + SuccBB == BI.getSuccessor(1)) + continue; + else if (!PartialIVInfo.KnownValue->isOneValue() && + SuccBB == BI.getSuccessor(0)) + continue; } } @@ -2855,6 +2943,9 @@ return false; } + if (BestUnswitchTI != PartialIVCondBranch) + PartialIVInfo.InstToDuplicate.clear(); + // If the best candidate is a guard, turn it into a branch. if (isGuard(BestUnswitchTI)) BestUnswitchTI = turnGuardIntoBranch(cast(BestUnswitchTI), L, @@ -2864,7 +2955,8 @@ << BestUnswitchCost << ") terminator: " << *BestUnswitchTI << "\n"); unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants, - ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU); + ExitBlocks, PartialIVInfo, DT, LI, AC, + UnswitchCB, SE, MSSAU); return true; } @@ -2875,9 +2967,9 @@ /// looks at other loop invariant control flows and tries to unswitch those as /// well by cloning the loop if the result is small enough. /// -/// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also -/// updated based on the unswitch. -/// The `MSSA` analysis is also updated if valid (i.e. its use is enabled). +/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are +/// also updated based on the unswitch. The `MSSA` analysis is also updated if +/// valid (i.e. its use is enabled). /// /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is /// true, we will attempt to do non-trivial unswitching as well as trivial @@ -2889,11 +2981,11 @@ /// /// If `SE` is non-null, we will update that analysis based on the unswitching /// done. -static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, - AssumptionCache &AC, TargetTransformInfo &TTI, - bool NonTrivial, - function_ref)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { +static bool +unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, + AAResults &AA, TargetTransformInfo &TTI, bool NonTrivial, + function_ref)> UnswitchCB, + ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); @@ -2905,7 +2997,7 @@ if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) { // If we unswitched successfully we will want to clean up the loop before // processing it further so just mark it as unswitched and return. - UnswitchCB(/*CurrentLoopValid*/ true, {}); + UnswitchCB(/*CurrentLoopValid*/ true, false, {}); return true; } @@ -2941,7 +3033,7 @@ // Try to unswitch the best invariant condition. We prefer this full unswitch to // a partial unswitch when possible below the threshold. - if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU)) + if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU)) return true; // No other opportunities to unswitch. @@ -2962,6 +3054,7 @@ std::string LoopName = std::string(L.getName()); auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, + bool PartiallyInvariant, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. if (!NewLoops.empty()) @@ -2969,9 +3062,21 @@ // If the current loop remains valid, we should revisit it to catch any // other unswitch opportunities. Otherwise, we need to mark it as deleted. - if (CurrentLoopValid) - U.revisitCurrentLoop(); - else + if (CurrentLoopValid) { + if (PartiallyInvariant) { + // Mark the new loop as partially unswitched, to avoid unswitching on + // the same condition again. + auto &Context = L.getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, + MDString::get(Context, "llvm.loop.unswitch.partial.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L.getLoopID(), {"llvm.loop.unswitch.partial"}, + {DisableUnswitchMD}); + L.setLoopID(NewLoopID); + } else + U.revisitCurrentLoop(); + } else U.markLoopAsDeleted(L, LoopName); }; @@ -2981,8 +3086,9 @@ if (VerifyMemorySSA) AR.MSSA->verifyMemorySSA(); } - if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB, - &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, NonTrivial, + UnswitchCB, &AR.SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) @@ -3039,6 +3145,7 @@ auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); auto &AC = getAnalysis().getAssumptionCache(F); + auto &AA = getAnalysis().getAAResults(); auto &TTI = getAnalysis().getTTI(F); MemorySSA *MSSA = nullptr; Optional MSSAU; @@ -3050,7 +3157,7 @@ auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, + auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. for (auto *NewL : NewLoops) @@ -3059,17 +3166,22 @@ // If the current loop remains valid, re-add it to the queue. This is // a little wasteful as we'll finish processing the current loop as well, // but it is the best we can do in the old PM. - if (CurrentLoopValid) - LPM.addLoop(*L); - else + if (CurrentLoopValid) { + // If the current loop has been unswitched using a partially invariant + // condition, we should not re-add the current loop to avoid unswitching + // on the same condition again. + if (!PartiallyInvariant) + LPM.addLoop(*L); + } else LPM.markLoopAsDeleted(*L); }; if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); - bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); + bool Changed = + unswitchLoop(*L, DT, LI, AC, AA, TTI, NonTrivial, UnswitchCB, SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); Index: llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='loop-mssa(unswitch),loop-mssa(unswitch),verify' -S < %s | FileCheck %s + +; Below bugs have caused endless unswitch. +; +; https://bugs.llvm.org/show_bug.cgi?id=50279 +; https://bugs.llvm.org/show_bug.cgi?id=50302 +; +; This test's loop should be unswitched only one time even though we run +; SimpleLoopUnswitch pass two times. + +@a = dso_local local_unnamed_addr global i32 0, align 4 +@c = dso_local local_unnamed_addr global i32 0, align 4 +@b = dso_local local_unnamed_addr global i8 0, align 1 + +; Function Attrs: nofree norecurse nosync nounwind uwtable +define dso_local void @d() { +; CHECK-LABEL: @d( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* null, align 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_SPLIT:%.*]], label [[FOR_END_SPLIT_US:%.*]] +; CHECK: for.end.split.us: +; CHECK-NEXT: br label [[G_US:%.*]] +; CHECK: g.us: +; CHECK-NEXT: br label [[G_SPLIT_US6:%.*]] +; CHECK: for.cond1.us1: +; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* null, align 2 +; CHECK-NEXT: [[TOBOOL4_NOT_US:%.*]] = icmp eq i16 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TOBOOL4_NOT_US]], label [[FOR_COND5_PREHEADER_US4:%.*]], label [[G_LOOPEXIT_US:%.*]] +; CHECK: for.cond5.us2: +; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT_US5:%.*]], label [[FOR_INC_US3:%.*]] +; CHECK: for.inc.us3: +; CHECK-NEXT: store i8 0, i8* @b, align 1 +; CHECK-NEXT: br label [[FOR_COND5_US2:%.*]] +; CHECK: for.cond5.preheader.us4: +; CHECK-NEXT: br label [[FOR_COND5_US2]] +; CHECK: for.cond1.loopexit.us5: +; CHECK-NEXT: br label [[FOR_COND1_US1:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: g.loopexit.us: +; CHECK-NEXT: br label [[G_US]] +; CHECK: g.split.us6: +; CHECK-NEXT: br label [[FOR_COND1_US1]] +; CHECK: for.end.split: +; CHECK-NEXT: br label [[G:%.*]] +; CHECK: g.loopexit: +; CHECK-NEXT: br label [[G]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: g: +; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* null, align 2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i16 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[G_SPLIT_US:%.*]], label [[G_SPLIT:%.*]] +; CHECK: g.split.us: +; CHECK-NEXT: br label [[FOR_COND1_US:%.*]] +; CHECK: for.cond1.us: +; CHECK-NEXT: br label [[FOR_COND5_PREHEADER_US:%.*]] +; CHECK: for.cond5.us: +; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT_US:%.*]], label [[FOR_INC_US:%.*]] +; CHECK: for.inc.us: +; CHECK-NEXT: store i8 0, i8* @b, align 1 +; CHECK-NEXT: br label [[FOR_COND5_US:%.*]] +; CHECK: for.cond5.preheader.us: +; CHECK-NEXT: br label [[FOR_COND5_US]] +; CHECK: for.cond1.loopexit.us: +; CHECK-NEXT: br label [[FOR_COND1_US]] +; CHECK: g.split: +; CHECK-NEXT: br label [[FOR_COND1:%.*]] +; CHECK: for.cond1.loopexit: +; CHECK-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP0]] +; CHECK: for.cond1: +; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* null, align 2 +; CHECK-NEXT: [[TOBOOL4_NOT:%.*]] = icmp eq i16 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label [[FOR_COND5_PREHEADER:%.*]], label [[G_LOOPEXIT:%.*]] +; CHECK: for.cond5.preheader: +; CHECK-NEXT: br label [[FOR_COND5:%.*]] +; CHECK: for.cond5: +; CHECK-NEXT: br i1 false, label [[FOR_COND1_LOOPEXIT:%.*]], label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: store i8 0, i8* @b, align 1 +; CHECK-NEXT: br label [[FOR_COND5]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + br i1 false, label %for.end, label %for.cond + +for.end: ; preds = %for.cond + br label %g + +g: ; preds = %for.cond1, %for.end + br label %for.cond1 + +for.cond1: ; preds = %for.cond5, %g + %0 = load i16, i16* null, align 2 + %tobool4.not = icmp eq i16 %0, 0 + br i1 %tobool4.not, label %for.cond5, label %g + +for.cond5: ; preds = %for.inc, %for.cond1 + br i1 false, label %for.cond1, label %for.inc + +for.inc: ; preds = %for.cond5 + store i8 0, i8* @b, align 1 + br label %for.cond5 +} Index: llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch-mssa-threshold.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch-mssa-threshold.ll @@ -0,0 +1,48 @@ +; RUN: opt -loop-unswitch-memoryssa-threshold=0 -memssa-check-limit=1 -passes='loop-mssa(unswitch),verify' -S < %s | FileCheck --check-prefix=THRESHOLD-0 %s +; RUN: opt -memssa-check-limit=1 -passes='loop-mssa(unswitch),verify' -S < %s | FileCheck --check-prefix=THRESHOLD-DEFAULT %s + +; Make sure -loop-unswitch-memoryssa-threshold works. The test uses +; -memssa-check-limit=1 to effectively disable any MemorySSA optimizations +; on construction, so the test can be kept simple. + +declare void @clobber() + +; Partial unswitching is possible, because the store in %noclobber does not +; alias the load of the condition. +define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) { +; THRESHOLD-0-LABEL: @partial_unswitch_true_successor +; THRESHOLD-0: entry: +; THRESHOLD-0: br label %loop.header +; +; THRESHOLD-DEFAULT-LABEL: @partial_unswitch_true_successor +; THRESHOLD-DEFAULT-NEXT: entry: +; THRESHOLD-DEFAULT-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr.1, align 4 +; THRESHOLD-DEFAULT-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100 +; THRESHOLD-DEFAULT-NEXT: br i1 [[C]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lv = load i32, i32* %ptr.1 + %sc = icmp eq i32 %lv, 100 + br i1 %sc, label %noclobber, label %clobber + +noclobber: + %gep.1 = getelementptr i32, i32* %ptr.2, i32 %iv + store i32 %lv, i32* %gep.1 + br label %loop.latch + +clobber: + call void @clobber() + br label %loop.latch + +loop.latch: + %c = icmp ult i32 %iv, %N + %iv.next = add i32 %iv, 1 + br i1 %c, label %loop.header, label %exit + +exit: + ret i32 10 +} Index: llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch-update-memoryssa.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch-update-memoryssa.ll @@ -0,0 +1,76 @@ +; RUN: opt -passes='loop-mssa(unswitch),verify' -verify-dom-info -verify-memoryssa -S %s | FileCheck %s +; RUN: opt -passes='loop-mssa(unswitch),verify' -memssa-check-limit=3 -verify-dom-info -verify-memoryssa -S %s | FileCheck %s + +declare void @clobber() + +; Check that MemorySSA updating can deal with a clobbering access of a +; duplicated load being a MemoryPHI outside the loop. +define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) { +; CHECK-LABEL: @partial_unswitch_memssa_update( +; CHECK-LABEL: loop.ph: +; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0 +; CHECK-NEXT: br i1 [[C]] +entry: + br i1 %c, label %loop.ph, label %outside.clobber + +outside.clobber: + call void @clobber() + br label %loop.ph + +loop.ph: + br label %loop.header + +loop.header: + %lv = load i32, i32* %ptr, align 4 + %hc = icmp eq i32 %lv, 0 + br i1 %hc, label %if, label %then + +if: + br label %loop.latch + +then: + br label %loop.latch + +loop.latch: + br i1 true, label %loop.header, label %exit + +exit: + ret void +} + +; Check that MemorySSA updating can deal with skipping defining accesses in the +; loop body until it finds the first defining access outside the loop. +define void @partial_unswitch_inloop_stores_beteween_outside_defining_access(i64* noalias %ptr, i16* noalias %src) { +; CHECK-LABEL: @partial_unswitch_inloop_stores_beteween_outside_defining_access +; CHECK-LABEL: entry: +; CHECK-NEXT: store i64 0, i64* %ptr, align 1 +; CHECK-NEXT: store i64 1, i64* %ptr, align 1 +; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i16, i16* %src, align 1 +; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i16 [[LV]], 0 +; CHECK-NEXT: br i1 [[C]] +; +entry: + store i64 0, i64* %ptr, align 1 + store i64 1, i64* %ptr, align 1 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + store i64 2, i64* %ptr, align 1 + %lv = load i16, i16* %src, align 1 + %invar.cond = icmp eq i16 %lv, 0 + br i1 %invar.cond, label %noclobber, label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, 1000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + Index: llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll =================================================================== --- llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll +++ llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll @@ -6,10 +6,27 @@ define i32 @partial_unswitch_true_successor(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] ; CHECK: noclobber: @@ -18,9 +35,11 @@ ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 10 ; @@ -52,10 +71,27 @@ define i32 @partial_unswitch_false_successor(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_false_successor( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]] ; CHECK: clobber: @@ -64,9 +100,11 @@ ; CHECK: noclobber: ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 10 ; @@ -98,10 +136,29 @@ define i32 @partial_unswtich_gep_load_icmp(i32** %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswtich_gep_load_icmp( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32*, i32** [[PTR:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 100 +; CHECK-NEXT: br i1 [[TMP3]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32*, i32** [[PTR:%.*]], i32 1 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32*, i32** [[PTR]], i32 1 ; CHECK-NEXT: [[LV_1:%.*]] = load i32*, i32** [[GEP]], align 8 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[LV_1]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 @@ -112,9 +169,11 @@ ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 10 ; @@ -148,11 +207,32 @@ define i32 @partial_unswitch_reduction_phi(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_reduction_phi( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: [[RED_US:%.*]] = phi i32 [ 20, [[ENTRY_SPLIT_US]] ], [ [[RED_NEXT_US:%.*]], [[LOOP_LATCH_US]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: [[ADD_10_US:%.*]] = add i32 [[RED_US]], 10 +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[RED_NEXT_US]] = phi i32 [ [[ADD_10_US]], [[NOCLOBBER_US]] ] +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: [[RED_NEXT_LCSSA_US:%.*]] = phi i32 [ [[RED_NEXT_US]], [[LOOP_LATCH_US]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ 20, [[ENTRY]] ], [ [[RED_NEXT:%.*]], [[LOOP_LATCH]] ] -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ 20, [[ENTRY_SPLIT]] ], [ [[RED_NEXT:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]] ; CHECK: clobber: @@ -164,12 +244,15 @@ ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[RED_NEXT]] = phi i32 [ [[ADD_5]], [[CLOBBER]] ], [ [[ADD_10]], [[NOCLOBBER]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: exit.split: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP_LATCH]] ] -; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[RED_NEXT_LCSSA]], [[EXIT_SPLIT]] ], [ [[RED_NEXT_LCSSA_US]], [[EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] ; entry: br label %loop.header @@ -206,23 +289,45 @@ define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor_noclobber( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR_1:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: [[LV_US:%.*]] = load i32, i32* [[PTR_1]], align 4 +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: [[GEP_1_US:%.*]] = getelementptr i32, i32* [[PTR_2:%.*]], i32 [[IV_US]] +; CHECK-NEXT: store i32 [[LV_US]], i32* [[GEP_1_US]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_1:%.*]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_1]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] ; CHECK: noclobber: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, i32* [[PTR_2:%.*]], i32 [[IV]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, i32* [[PTR_2]], i32 [[IV]] ; CHECK-NEXT: store i32 [[LV]], i32* [[GEP_1]], align 4 ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: clobber: ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 10 ; @@ -501,9 +606,26 @@ ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32* [[PTR:%.*]], null ; CHECK-NEXT: br i1 [[EC]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]] ; CHECK: loop.ph: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PH_SPLIT_US:%.*]], label [[LOOP_PH_SPLIT:%.*]] +; CHECK: loop.ph.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[LOOP_PH_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_LOOPEXIT_SPLIT_US:%.*]] +; CHECK: exit.loopexit.split.us: +; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] +; CHECK: loop.ph.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[LOOP_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] @@ -513,9 +635,11 @@ ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT_SPLIT:%.*]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: exit.loopexit.split: +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -558,10 +682,27 @@ ; CHECK-LABEL: @partial_unswitch_true_successor_insert_point( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] ; CHECK: noclobber: @@ -570,9 +711,11 @@ ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 10 ; @@ -608,10 +751,28 @@ define i32 @partial_unswitch_true_successor_hoist_invariant(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_successor_hoist_invariant( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 100 +; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] +; CHECK: noclobber.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 1 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] @@ -621,9 +782,11 @@ ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 10 ; @@ -883,19 +1046,36 @@ define i32 @partial_unswitch_true_to_latch(i32* %ptr, i32 %N) { ; CHECK-LABEL: @partial_unswitch_true_to_latch( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] +; CHECK: loop.header.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] +; CHECK-NEXT: br label [[LOOP_LATCH_US]] +; CHECK: loop.latch.us: +; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[LOOP_LATCH]], label [[CLOBBER:%.*]] ; CHECK: clobber: ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 10 ; @@ -920,3 +1100,80 @@ exit: ret i32 10 } + +; There could be multiple unswitch candidates which include partially invariant +; condition. When the exiting block is selected as best unswitch one, clone loop +; blocks. +define i32 @partial_unswitch_exiting_block_with_multiple_unswitch_candidates(i32 %0, i32 %1, i32* %ptr) { +; CHECK-LABEL: @partial_unswitch_exiting_block_with_multiple_unswitch_candidates( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[PTR:%.*]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 41 +; CHECK-NEXT: br i1 [[TMP3]], label [[ENTRY_SPLIT_US_SPLIT:%.*]], label [[ENTRY_SPLIT_US_SPLIT_US:%.*]] +; CHECK: entry.split.us.split.us: +; CHECK-NEXT: br label [[LOOP_US_US:%.*]] +; CHECK: loop.us.us: +; CHECK-NEXT: br label [[EXITING_US_US:%.*]] +; CHECK: exiting.us.us: +; CHECK-NEXT: br label [[LOOP_US_US]] +; CHECK: entry.split.us.split: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[VAL_US:%.*]] = load i32, i32* [[PTR]], align 16 +; CHECK-NEXT: [[IF_COND_US:%.*]] = icmp ult i32 [[VAL_US]], 41 +; CHECK-NEXT: br i1 [[IF_COND_US]], label [[IF_THEN_US:%.*]], label [[EXITING_US:%.*]] +; CHECK: if.then.us: +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[PTR]], align 16 +; CHECK-NEXT: br label [[EXITING_US]] +; CHECK: exiting.us: +; CHECK-NEXT: br label [[LOOP_US]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR]], align 16 +; CHECK-NEXT: [[IF_COND:%.*]] = icmp ult i32 [[VAL]], 41 +; CHECK-NEXT: br i1 [[IF_COND]], label [[IF_THEN:%.*]], label [[EXITING:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 [[TMP1]], i32* [[PTR]], align 16 +; CHECK-NEXT: br label [[EXITING]] +; CHECK: exiting: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RET_VAL:%.*]] = phi i32 [ 1, [[EXITING]] ] +; CHECK-NEXT: ret i32 [[RET_VAL]] +; +entry: + %exit.cond = icmp ne i32 %0, 0 + br label %loop + +loop: + %val = load i32, i32* %ptr, align 16 + %if.cond = icmp ult i32 %val, 41 + br i1 %if.cond, label %if.then, label %exiting + +if.then: + store i32 %1, i32* %ptr, align 16 + br label %exiting + +exiting: + br i1 %exit.cond, label %loop, label %exit + +exit: + %ret.val = phi i32 [ 1, %exiting ] + ret i32 %ret.val +} + +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[UNSWITCH_PARTIAL_DISABLE:![0-9]+]]} +; CHECK: [[UNSWITCH_PARTIAL_DISABLE]] = !{!"llvm.loop.unswitch.partial.disable"} +; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[UNSWITCH_PARTIAL_DISABLE]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[UNSWITCH_PARTIAL_DISABLE]]}