diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -71,6 +72,7 @@ using namespace llvm::PatternMatch; STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSelects, "Number of selects unswitched"); STATISTIC(NumSwitches, "Number of switches unswitched"); STATISTIC(NumGuards, "Number of guards turned into branches for unswitching"); STATISTIC(NumTrivial, "Number of unswitches that are trivial"); @@ -1112,8 +1114,8 @@ ArrayRef ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap &DominatingSucc, - ValueToValueMapTy &VMap, - SmallVectorImpl &DTUpdates, AssumptionCache &AC, + SelectInst *SLI, ValueToValueMapTy &VMap, + DomTreeUpdater &DTU, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE) { SmallVector NewBlocks; @@ -1229,36 +1231,45 @@ // Remove the cloned parent as a predecessor of any successor we ended up // cloning other than the unswitched one. auto *ClonedParentBB = cast(VMap.lookup(ParentBB)); - for (auto *SuccBB : successors(ParentBB)) { - if (SuccBB == UnswitchedSuccBB) - continue; + if (!SLI) { + for (auto *SuccBB : successors(ParentBB)) { + if (SuccBB == UnswitchedSuccBB) + continue; - auto *ClonedSuccBB = cast_or_null(VMap.lookup(SuccBB)); - if (!ClonedSuccBB) - continue; + auto *ClonedSuccBB = cast_or_null(VMap.lookup(SuccBB)); + if (!ClonedSuccBB) + continue; - ClonedSuccBB->removePredecessor(ClonedParentBB, - /*KeepOneInputPHIs*/ true); + ClonedSuccBB->removePredecessor(ClonedParentBB, + /*KeepOneInputPHIs*/ true); + } } - // Replace the cloned branch with an unconditional branch to the cloned - // unswitched successor. auto *ClonedSuccBB = cast(VMap.lookup(UnswitchedSuccBB)); - Instruction *ClonedTerminator = ClonedParentBB->getTerminator(); - // Trivial Simplification. If Terminator is a conditional branch and - // condition becomes dead - erase it. - Value *ClonedConditionToErase = nullptr; - if (auto *BI = dyn_cast(ClonedTerminator)) - ClonedConditionToErase = BI->getCondition(); - else if (auto *SI = dyn_cast(ClonedTerminator)) - ClonedConditionToErase = SI->getCondition(); - - ClonedTerminator->eraseFromParent(); - BranchInst::Create(ClonedSuccBB, ClonedParentBB); - - if (ClonedConditionToErase) - RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr, - MSSAU); + if (SLI) { + // Replace the cloned select with the true operand + SelectInst *ClonedSLI = cast(VMap.lookup(SLI)); + ClonedSLI->replaceAllUsesWith(ClonedSLI->getOperand(1)); + ClonedSLI->eraseFromParent(); + } else { + // Replace the cloned branch with an unconditional branch to the cloned + // unswitched successor. + Instruction *ClonedTerminator = ClonedParentBB->getTerminator(); + // Trivial Simplification. If Terminator is a conditional branch and + // condition becomes dead - erase it. + Value *ClonedConditionToErase = nullptr; + if (auto *BI = dyn_cast(ClonedTerminator)) + ClonedConditionToErase = BI->getCondition(); + else if (auto *SI = dyn_cast(ClonedTerminator)) + ClonedConditionToErase = SI->getCondition(); + + ClonedTerminator->eraseFromParent(); + BranchInst::Create(ClonedSuccBB, ClonedParentBB); + + if (ClonedConditionToErase) + RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr, + MSSAU); + } // If there are duplicate entries in the PHI nodes because of multiple edges // to the unswitched successor, we need to nuke all but one as we replaced it @@ -1283,7 +1294,7 @@ for (auto *ClonedBB : NewBlocks) { for (auto *SuccBB : successors(ClonedBB)) if (SuccSet.insert(SuccBB).second) - DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB}); + DTU.applyUpdates({{DominatorTree::Insert, ClonedBB, SuccBB}}); SuccSet.clear(); } @@ -2081,16 +2092,17 @@ function_ref DestroyLoopCB) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast(&TI); - SwitchInst *SI = BI ? nullptr : cast(&TI); + SwitchInst *SWI = dyn_cast(&TI); + SelectInst *SLI = dyn_cast(&TI); // We can only unswitch switches, conditional branches with an invariant // condition, or combining invariant conditions with an instruction or // partially invariant instructions. - assert((SI || (BI && BI->isConditional())) && - "Can only unswitch switches and conditional branch!"); + assert((SWI || SLI || (BI && BI->isConditional())) && + "Can only unswitch switches, selects and conditional branch!"); bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty(); bool FullUnswitch = - SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] && + SWI || SLI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] && !PartiallyInvariant); if (FullUnswitch) assert(Invariants.size() == 1 && @@ -2115,7 +2127,7 @@ (void)Cond; assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && - "Only `or`, `and`, an `select`, partially invariant instructions " + "Only `or`, `and` and partially invariant instructions " "can combine invariants being unswitched."); if (!match(Cond, m_LogicalOr())) { if (match(Cond, m_LogicalAnd()) || @@ -2127,18 +2139,17 @@ } BasicBlock *RetainedSuccBB = - BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest(); + BI ? BI->getSuccessor(1 - ClonedSucc) : SLI ? SLI->getParent() : SWI->getDefaultDest(); SmallSetVector UnswitchedSuccBBs; if (BI) UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc)); + else if (SLI) + UnswitchedSuccBBs.insert(ParentBB); else - for (auto Case : SI->cases()) + for (auto Case : SWI->cases()) if (Case.getCaseSuccessor() != RetainedSuccBB) UnswitchedSuccBBs.insert(Case.getCaseSuccessor()); - assert(!UnswitchedSuccBBs.count(RetainedSuccBB) && - "Should not unswitch the same successor we are retaining!"); - // The branch should be in this exact loop. Any inner loop's invariant branch // should be handled by unswitching that inner loop. The caller of this // routine should filter out any candidates that remain (but were skipped for @@ -2191,18 +2202,20 @@ // store a map from each block in its dominator subtree to it. This lets us // tell when cloning for a particular successor if a block is dominated by // some *other* successor with a single data structure. We use this to - // significantly reduce cloning. + // significantly reduce cloning. We must clone everything when unswitching + // a select instruction. SmallDenseMap DominatingSucc; - for (auto *SuccBB : llvm::concat( - makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs)) - if (SuccBB->getUniquePredecessor() || - llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { - return PredBB == ParentBB || DT.dominates(SuccBB, PredBB); - })) - visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) { - DominatingSucc[BB] = SuccBB; - return true; - }); + if (!SLI) + for (auto *SuccBB : llvm::concat( + makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs)) + if (SuccBB->getUniquePredecessor() || + llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { + return PredBB == ParentBB || DT.dominates(SuccBB, PredBB); + })) + visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) { + DominatingSucc[BB] = SuccBB; + return true; + }); // Split the preheader, so that we know that there is a safe place to insert // the conditional branch. We will change the preheader to have a conditional @@ -2213,6 +2226,7 @@ BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU); // Keep track of the dominator tree updates needed. + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); SmallVector DTUpdates; // Clone the loop for each unswitched successor. @@ -2223,7 +2237,7 @@ VMaps.emplace_back(new ValueToValueMapTy()); ClonedPHs[SuccBB] = buildClonedLoopBlocks( L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB, - DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE); + DominatingSucc, SLI, *VMaps.back(), DTU, AC, DT, LI, MSSAU, SE); } // Drop metadata if we may break its semantics by moving this instr into the @@ -2248,55 +2262,80 @@ // nuke the initial terminator placed in the split block. SplitBB->getTerminator()->eraseFromParent(); if (FullUnswitch) { - // Splice the terminator from the original loop and rewrite its - // successors. - SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI); + Instruction *NewTI; - // Keep a clone of the terminator for MSSA updates. - Instruction *NewTI = TI.clone(); - ParentBB->getInstList().push_back(NewTI); - - // First wire up the moved terminator to the preheaders. - if (BI) { + if (SLI) { BasicBlock *ClonedPH = ClonedPHs.begin()->second; - BI->setSuccessor(ClonedSucc, ClonedPH); - BI->setSuccessor(1 - ClonedSucc, LoopPH); - Value *Cond = skipTrivialSelect(BI->getCondition()); + NewTI = BranchInst::Create(ClonedPH, LoopPH, SLI->getCondition()); + SplitBB->getInstList().push_back(NewTI); + + Value *Cond = SLI->getCondition(); if (InsertFreeze) { if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT)) Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI); } - BI->setCondition(Cond); - DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); + DTU.applyUpdates({{DominatorTree::Insert, SplitBB, ClonedPH}}); + + // Replace the retained select with the false operand + SLI->replaceAllUsesWith(SLI->getOperand(2)); + SLI->eraseFromParent(); + + // Fold terminators that used the select instruction and now have a + // constant condition. This can reduce the size of the CFG and make + // further unswitching cheaper. Terminators in the new unswitched + // loop will be folded in LoopSimplifyCFG. + for (BasicBlock *BB : L.getBlocks()) + ConstantFoldTerminator(BB, false, nullptr, &DTU); } else { - assert(SI && "Must either be a branch or switch!"); - - // Walk the cases and directly update their successors. - assert(SI->getDefaultDest() == RetainedSuccBB && - "Not retaining default successor!"); - SI->setDefaultDest(LoopPH); - for (const auto &Case : SI->cases()) - if (Case.getCaseSuccessor() == RetainedSuccBB) - Case.setSuccessor(LoopPH); - else - Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); - - if (InsertFreeze) { - auto Cond = SI->getCondition(); - if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT)) - SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI)); + assert((BI || SWI) && "Must either be a branch, select or switch!"); + + // Splice the terminator from the original loop and rewrite its + // successors. + SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI); + + // Keep a clone of the terminator for MSSA updates. + NewTI = TI.clone(); + ParentBB->getInstList().push_back(NewTI); + + // First wire up the moved terminator to the preheaders. + if (BI) { + BasicBlock *ClonedPH = ClonedPHs.begin()->second; + BI->setSuccessor(ClonedSucc, ClonedPH); + BI->setSuccessor(1 - ClonedSucc, LoopPH); + Value *Cond = skipTrivialSelect(BI->getCondition()); + if (InsertFreeze) { + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT)) + Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI); + } + BI->setCondition(Cond); + DTU.applyUpdates({{DominatorTree::Insert, SplitBB, ClonedPH}}); + } else { + // Walk the cases and directly update their successors. + assert(SWI->getDefaultDest() == RetainedSuccBB && + "Not retaining default successor!"); + SWI->setDefaultDest(LoopPH); + for (const auto &Case : SWI->cases()) + if (Case.getCaseSuccessor() == RetainedSuccBB) + Case.setSuccessor(LoopPH); + else + Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); + + if (InsertFreeze) { + auto Cond = SWI->getCondition(); + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SWI, &DT)) + SWI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SWI)); + } + // We need to use the set to populate domtree updates as even when there + // are multiple cases pointing at the same successor we only want to + // remove and insert one edge in the domtree. + for (BasicBlock *SuccBB : UnswitchedSuccBBs) + DTU.applyUpdates( + {{DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second}}); } - // We need to use the set to populate domtree updates as even when there - // are multiple cases pointing at the same successor we only want to - // remove and insert one edge in the domtree. - for (BasicBlock *SuccBB : UnswitchedSuccBBs) - DTUpdates.push_back( - {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second}); } if (MSSAU) { - DT.applyUpdates(DTUpdates); - DTUpdates.clear(); + DTU.flush(); // Remove all but one edge to the retained block and all unswitched // blocks. This is to avoid having duplicate entries in the cloned Phis, @@ -2317,7 +2356,7 @@ // Now unhook the successor relationship as we'll be replacing // the terminator with a direct branch. This is much simpler for branches - // than switches so we handle those first. + // than switches so we handle those first. No work required for selects. if (BI) { // Remove the parent as a predecessor of the unswitched successor. assert(UnswitchedSuccBBs.size() == 1 && @@ -2325,18 +2364,18 @@ BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin(); UnswitchedSuccBB->removePredecessor(ParentBB, /*KeepOneInputPHIs*/ true); - DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB}); - } else { + DTU.applyUpdates({{DominatorTree::Delete, ParentBB, UnswitchedSuccBB}}); + } else if (SWI) { // Note that we actually want to remove the parent block as a predecessor // of *every* case successor. The case successor is either unswitched, // completely eliminating an edge from the parent to that successor, or it // is a duplicate edge to the retained successor as the retained successor // is always the default successor and as we'll replace this with a direct // branch we no longer need the duplicate entries in the PHI nodes. - SwitchInst *NewSI = cast(NewTI); - assert(NewSI->getDefaultDest() == RetainedSuccBB && + SwitchInst *NewSWI = cast(NewTI); + assert(NewSWI->getDefaultDest() == RetainedSuccBB && "Not retaining default successor!"); - for (const auto &Case : NewSI->cases()) + for (const auto &Case : NewSWI->cases()) Case.getCaseSuccessor()->removePredecessor( ParentBB, /*KeepOneInputPHIs*/ true); @@ -2345,15 +2384,17 @@ // are multiple cases pointing at the same successor we only want to // remove and insert one edge in the domtree. for (BasicBlock *SuccBB : UnswitchedSuccBBs) - DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB}); + DTU.applyUpdates({{DominatorTree::Delete, ParentBB, SuccBB}}); } - // After MSSAU update, remove the cloned terminator instruction NewTI. - ParentBB->getTerminator()->eraseFromParent(); + if (!SLI) { + // After MSSAU update, remove the cloned terminator instruction NewTI. + ParentBB->getTerminator()->eraseFromParent(); - // Create a new unconditional branch to the continuing block (as opposed to - // the one cloned). - BranchInst::Create(RetainedSuccBB, ParentBB); + // Create a new unconditional branch to the continuing block (as opposed to + // the one cloned). + BranchInst::Create(RetainedSuccBB, ParentBB); + } } else { assert(BI && "Only branches have partial unswitching."); assert(UnswitchedSuccBBs.size() == 1 && @@ -2369,11 +2410,10 @@ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, FreezeLoopUnswitchCond, BI, &AC, DT); } - DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); + DTU.applyUpdates({{DominatorTree::Insert, SplitBB, ClonedPH}}); if (MSSAU) { - DT.applyUpdates(DTUpdates); - DTUpdates.clear(); + DTU.flush(); // Perform MSSA cloning updates. for (auto &VMap : VMaps) @@ -2384,7 +2424,7 @@ } // Apply the updates accumulated above to get an up-to-date dominator tree. - DT.applyUpdates(DTUpdates); + DTU.flush(); // Now that we have an accurate dominator tree, first delete the dead cloned // blocks so that we can accurately build any cloned loops. It is important to @@ -2545,14 +2585,17 @@ if (BI) ++NumBranches; + else if (SLI) + ++NumSelects; else ++NumSwitches; + } /// Recursively compute the cost of a dominator subtree based on the per-block /// cost map provided. /// -/// The recursive computation is memozied into the provided DT-indexed cost map +/// The recursive computation is memoized into the provided DT-indexed cost map /// to allow querying it for most nodes in the domtree without it becoming /// quadratic. static InstructionCost computeDomSubtreeCost( @@ -2687,14 +2730,16 @@ // another path to the latch remaining that does not allow to eliminate the // loop copy on unswitch). const BasicBlock *Latch = L.getLoopLatch(); - const BasicBlock *CondBlock = TI.getParent(); - if (DT.dominates(CondBlock, Latch) && - (isGuard(&TI) || - llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) { - return L.contains(SuccBB); - }) <= 1)) { - NumCostMultiplierSkipped++; - return 1; + if (TI.isTerminator()) { + const BasicBlock *CondBlock = TI.getParent(); + if (DT.dominates(CondBlock, Latch) && + (isGuard(&TI) || + llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) { + return L.contains(SuccBB); + }) <= 1)) { + NumCostMultiplierSkipped++; + return 1; + } } auto *ParentL = L.getParentLoop(); @@ -2779,6 +2824,13 @@ UnswitchCandidates.push_back({&I, {Cond}}); } + for (Instruction &I : *BB) { + if (auto *SI = dyn_cast(&I)) { + if (L.isLoopInvariant(SI->getCondition()) && !isa(SI->getCondition())) + UnswitchCandidates.push_back({SI, {SI->getCondition()}}); + } + } + if (auto *SI = dyn_cast(BB->getTerminator())) { // We can only consider fully loop-invariant switch conditions as we need // to completely eliminate the switch after unswitching. @@ -2933,6 +2985,11 @@ // cost for that terminator. auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) -> InstructionCost { + // Unswitching selects require copying the entire loop. + if (isa(&TI)) { + return LoopCost; + } + BasicBlock &BB = *TI.getParent(); SmallPtrSet Visited; diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll @@ -1,8 +1,15 @@ ; RUN: opt < %s -simple-loop-unswitch -verify-memoryssa ; PR8622 + +; Can't unswitch `%call1 = select` because `%tobool.i` is defined in the loop. `tobool.i` could +; have been hoisted in the LICM pass, but LoopUnswitching pass does not do LICM. + @g_38 = external global i32, align 4 define void @func_67(i32 %p_68.coerce) nounwind { +; CHECK-LABEL: @func_67 +; CHECK: select + entry: br i1 true, label %for.end12, label %bb.nph diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll @@ -8,6 +8,10 @@ @b = common global i32 0, align 4 define void @func() noreturn nounwind uwtable { +; CHECK-LABEL: @func +; CHECK-NOT: select +; CHECK: br i1 %tobool + entry: %0 = load i32, i32* @a, align 4 %tobool = icmp eq i32 %0, 0 diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll @@ -2331,27 +2331,32 @@ define i32 @test_partial_unswitch_all_conds_guaranteed_non_poison(i1 noundef %c.1, i1 noundef %c.2) { ; CHECK-LABEL: @test_partial_unswitch_all_conds_guaranteed_non_poison( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[C_1:%.*]], [[C_2:%.*]] -; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]] -; CHECK: entry.split.us: -; CHECK-NEXT: br label [[LOOP_US:%.*]] -; CHECK: loop.us: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @a() -; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] -; CHECK: exit.split.us: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: entry.split: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() -; CHECK-NEXT: [[SEL:%.*]] = select i1 true, i1 true, i1 false -; CHECK-NEXT: br i1 [[SEL]], label [[LOOP]], label [[EXIT_SPLIT:%.*]] -; CHECK: exit.split: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: ret i32 0 -; +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %c.1, label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT; br i1 %c.2, label [[ENTRY_SPLIT_US_SPLIT_US:%.*]], label [[ENTRY_SPLIT_US_SPLIT:%.*]] +; CHECK: entry.split.us.split.us: +; CHECK-NEXT: br label [[LOOP_US_US:%.*]] +; CHECK: loop.us.us: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @a() +; CHECK-NEXT: br label [[LOOP_US_US]] +; CHECK: entry.split.us.split: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() +; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @a() +; CHECK-NEXT: br label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 + entry: br label %loop diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll @@ -93,16 +93,20 @@ ; CHECK-NEXT: br label [[LOOP_US:%.*]] ; CHECK: loop.us: ; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[LOOP_US]] ] -; CHECK-NEXT: br label [[LOOP_US]] +; CHECK-NEXT: br i1 true, label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: [[LCSSA_US:%.*]] = phi i32 [ [[P_US]], [[LOOP_US]] ] +; CHECK-NEXT: br label %exit ; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ] -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 false, i1 true, i1 false +; CHECK-NEXT: br label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, %loop ] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[LOOP]] ] -; CHECK-NEXT: ret i32 [[LCSSA]] +; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[LCSSA]], [[EXIT_SPLIT]] ], [ [[LCSSA_US]], [[EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[RET]] ; entry: %c = icmp ult i32 %x, 100 diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr32818.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr32818.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr32818.ll @@ -0,0 +1,24 @@ +; Check that the call doesn't get removed even if +; it has no uses. It could have side-effects. +; RUN: opt -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s + +define i32 @tinkywinky(i8 %patatino) { +; CHECK-LABEL: @tinky +; CHECK-NOT: select +; CHECK: br i1 %cmp1 + + %cmp1 = icmp slt i8 %patatino, 5 + br label %body +body: + %i = select i1 %cmp1, i8 6, i8 undef + br i1 true, label %body, label %end +end: + %split = phi i8 [ %i, %body ] + %conv4 = sext i8 %split to i32 +; CHECK: tail call fastcc i32 @fn5( + %call = tail call fastcc i32 @fn5(i32 %conv4) + ret i32 0 +} +declare fastcc i32 @fn5(i32 returned) unnamed_addr diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/unswitch-equality-undef.ll b/llvm/test/Transforms/SimpleLoopUnswitch/unswitch-equality-undef.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/unswitch-equality-undef.ll @@ -0,0 +1,122 @@ +; REQUIRES: asserts +; RUN: opt < %s -instcombine -licm -simple-loop-unswitch -enable-new-pm=0 -verify-memoryssa -disable-output -stats 2>&1| FileCheck %s +; Check no loop unswitch is done because unswitching of equality expr with +; undef is unsafe before the freeze patch is committed. +; CHECK-NOT: Number of branches unswitched + +define void @ham(i64 %arg) local_unnamed_addr { +bb: + %tmp = icmp eq i64 %arg, 0 + br i1 %tmp, label %bb3, label %bb1 + +bb1: ; preds = %bb + %tmp2 = load volatile i64, i64* @global, align 8 + br label %bb3 + +bb3: ; preds = %bb1, %bb + %tmp4 = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ] + %tmp5 = load i64, i64* @global.1, align 8 + br label %bb6 + +bb6: ; preds = %bb21, %bb3 + %tmp7 = phi i64 [ 3, %bb21 ], [ %tmp5, %bb3 ] + %tmp8 = phi i64 [ %tmp25, %bb21 ], [ 0, %bb3 ] + %tmp9 = icmp eq i64 %tmp7, %arg + br i1 %tmp9, label %bb10, label %bb28 + +bb10: ; preds = %bb6 + %tmp11 = icmp eq i64 %tmp7, 0 + br i1 %tmp11, label %bb21, label %bb12 + +bb12: ; preds = %bb10 + %tmp13 = load i64, i64* @global.2, align 8 + %tmp14 = add nsw i64 %tmp13, 1 + store i64 %tmp14, i64* @global.2, align 8 + %tmp15 = load i64, i64* @global.3, align 8 + %tmp16 = icmp eq i64 %tmp15, %tmp4 + br i1 %tmp16, label %bb17, label %bb21 + +bb17: ; preds = %bb12 + %tmp18 = phi i64 [ %tmp15, %bb12 ] + %tmp19 = load i64, i64* @global.4, align 8 + %tmp20 = add nsw i64 %tmp19, %tmp18 + store i64 %tmp20, i64* @global.5, align 8 + br label %bb29 + +bb21: ; preds = %bb12, %bb10 + %tmp22 = load i64, i64* @global.3, align 8 + %tmp23 = load volatile i64, i64* @global, align 8 + %tmp24 = add nsw i64 %tmp23, %tmp22 + store i64 %tmp24, i64* @global.5, align 8 + store i64 3, i64* @global.1, align 8 + %tmp25 = add nsw i64 %tmp8, 1 + %tmp26 = load i64, i64* @global.6, align 8 + %tmp27 = icmp slt i64 %tmp25, %tmp26 + br i1 %tmp27, label %bb6, label %bb28 + +bb28: ; preds = %bb21, %bb6 + br label %bb29 + +bb29: ; preds = %bb28, %bb17 + ret void +} + +define void @zot(i64 %arg, i64 %arg1) local_unnamed_addr { +bb: + %tmp = icmp eq i64 %arg, 0 + %tmp2 = select i1 %tmp, i64 %arg1, i64 undef + %tmp3 = load i64, i64* @global.1, align 8 + br label %bb4 + +bb4: ; preds = %bb19, %bb + %tmp5 = phi i64 [ 3, %bb19 ], [ %tmp3, %bb ] + %tmp6 = phi i64 [ %tmp23, %bb19 ], [ 0, %bb ] + %tmp7 = icmp eq i64 %tmp5, %arg + br i1 %tmp7, label %bb8, label %bb26 + +bb8: ; preds = %bb4 + %tmp9 = icmp eq i64 %tmp5, 0 + br i1 %tmp9, label %bb19, label %bb10 + +bb10: ; preds = %bb8 + %tmp11 = load i64, i64* @global.2, align 8 + %tmp12 = add nsw i64 %tmp11, 1 + store i64 %tmp12, i64* @global.2, align 8 + %tmp13 = load i64, i64* @global.3, align 8 + %tmp14 = icmp eq i64 %tmp13, %tmp2 + br i1 %tmp14, label %bb15, label %bb19 + +bb15: ; preds = %bb10 + %tmp16 = phi i64 [ %tmp13, %bb10 ] + %tmp17 = load i64, i64* @global.4, align 8 + %tmp18 = add nsw i64 %tmp17, %tmp16 + store i64 %tmp18, i64* @global.5, align 8 + br label %bb27 + +bb19: ; preds = %bb10, %bb8 + %tmp20 = load i64, i64* @global.3, align 8 + %tmp21 = load volatile i64, i64* @global, align 8 + %tmp22 = add nsw i64 %tmp21, %tmp20 + store i64 %tmp22, i64* @global.5, align 8 + store i64 3, i64* @global.1, align 8 + %tmp23 = add nsw i64 %tmp6, 1 + %tmp24 = load i64, i64* @global.6, align 8 + %tmp25 = icmp slt i64 %tmp23, %tmp24 + br i1 %tmp25, label %bb4, label %bb26 + +bb26: ; preds = %bb19, %bb4 + br label %bb27 + +bb27: ; preds = %bb26, %bb15 + ret void +} + +@global = common global i64 0, align 8 +@global.1 = common global i64 0, align 8 +@global.2 = common global i64 0, align 8 +@global.3 = common global i64 0, align 8 +@global.4 = common global i64 0, align 8 +@global.5 = common global i64 0, align 8 +@global.6 = common global i64 0, align 8 + + diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/unswitch-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/unswitch-select.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/unswitch-select.ll @@ -0,0 +1,54 @@ +; RUN: opt -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s + +define i32 @test(i1 zeroext %x, i32 %a) local_unnamed_addr { +; CHECK-LABEL: define i32 @test(i1 zeroext %x, i32 %a) local_unnamed_addr { +; CHECK-NOT: select +; CHECK: br i1 %x + +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %s.0 = phi i32 [ %a, %entry ], [ %add, %while.body ] + %cmp = icmp slt i32 %i.0, 10000 + br i1 %cmp, label %while.body, label %while.end + +while.body: ; preds = %while.cond + %cond = select i1 %x, i32 %a, i32 %i.0 + %add = add nsw i32 %s.0, %cond + %inc = add nsw i32 %i.0, 1 + br label %while.cond + +while.end: ; preds = %while.cond + %s.0.lcssa = phi i32 [ %s.0, %while.cond ] + ret i32 %s.0.lcssa +} + +define i32 @cant_unswitch(i32 %a) local_unnamed_addr { +; CHECK-LABEL: @cant_unswitch +; CHECK: select i1 %invariant_cmp + +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %s.0 = phi i32 [ %a, %entry ], [ %add, %while.body ] + %cmp = icmp slt i32 %i.0, 10000 + br i1 %cmp, label %while.body, label %while.end + +while.body: ; preds = %while.cond + %invariant_cmp = icmp ugt i32 %s.0, 100 + %cond = select i1 %invariant_cmp, i32 %a, i32 %i.0 + %add = add nsw i32 %s.0, %cond + %inc = add nsw i32 %i.0, 1 + br label %while.cond + +while.end: ; preds = %while.cond + %s.0.lcssa = phi i32 [ %s.0, %while.cond ] + ret i32 %s.0.lcssa +} +