Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -329,6 +329,7 @@ void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry&); void initializeSimpleInlinerPass(PassRegistry&); +void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry&); void initializeSingleLoopExtractorPass(PassRegistry&); void initializeSinkingLegacyPassPass(PassRegistry&); void initializeSjLjEHPreparePass(PassRegistry&); Index: include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h @@ -0,0 +1,53 @@ +//===- SimpleLoopUnswitch.h - Hoist loop-invariant control flow -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H +#define LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +/// This pass transforms loops that contain branches on loop-invariant +/// conditions to have multiple loops. For example, it turns the left into the +/// right code: +/// +/// for (...) if (lic) +/// A for (...) +/// if (lic) A; B; C +/// B else +/// C for (...) +/// A; C +/// +/// This can increase the size of the code exponentially (doubling it every time +/// a loop is unswitched) so we only unswitch if the resultant code will be +/// smaller than a threshold. +/// +/// This pass expects LICM to be run before it to hoist invariant conditions out +/// of the loop, to make the unswitching opportunity obvious. +/// +class SimpleLoopUnswitchPass : public PassInfoMixin { +public: + SimpleLoopUnswitchPass() = default; + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +/// Create the legacy pass object for the simple loop unswitcher. +/// +/// See the documentaion for `SimpleLoopUnswitchPass` for details. +Pass *createSimpleLoopUnswitchLegacyPass(); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -125,6 +125,7 @@ #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -229,6 +229,7 @@ LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("unroll", LoopUnrollPass::create()) LOOP_PASS("unroll-full", LoopUnrollPass::createFull()) +LOOP_PASS("unswitch", SimpleLoopUnswitchPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Vectorize.h" using namespace llvm; @@ -145,6 +146,11 @@ cl::Hidden, cl::desc("Disable shrink-wrap library calls")); +static cl::opt + EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false), + cl::Hidden, + cl::desc("Enable the simple loop unswitch pass.")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -312,7 +318,10 @@ // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + if (EnableSimpleLoopUnswitch) + MPM.add(createSimpleLoopUnswitchLegacyPass()); + else + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -55,6 +55,7 @@ Scalar.cpp Scalarizer.cpp SeparateConstOffsetFromGEP.cpp + SimpleLoopUnswitch.cpp SimplifyCFGPass.cpp Sink.cpp SpeculativeExecution.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" @@ -83,6 +84,7 @@ initializeCFGSimplifyPassPass(Registry); initializeLateCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); + initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); Index: lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -0,0 +1,649 @@ +//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include + +#define DEBUG_TYPE "simple-loop-unswitch" + +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumTrivial, "Number of unswitches that are trivial"); + +static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, + Constant &Replacement) { + assert(!isa(LIC) && "Why are we unswitching on a constant?"); + + // Replace uses of LIC in the loop with the given constant. + for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { + // Grab the use and walk past it so we can clobber it in the use list. + Use *U = &*UI++; + Instruction *UserI = dyn_cast(U->getUser()); + if (!UserI || !L.contains(UserI)) + continue; + + // Replace this use within the loop body. + *U = &Replacement; + } +} + +/// Update the dominator tree after removing one exiting predecessor of a loop +/// exit block. +static void updateLoopExitIDomAfterUnswitch(BasicBlock *LoopExitBB, Loop &L, + DominatorTree &DT) { + assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && + "Cannot have empty predecessors of the loop exit block if we split " + "off a block to unswitch1"); + + BasicBlock *IDom = *pred_begin(LoopExitBB); + // Walk all of the other predecessors finding the nearest common dominator + // until all predecessors are covered or we reach the loop header. The loop + // header necesasrily dominates all loop exit blocks in loop simplified form + // so we can early-exit the moment we hit that block. + for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); + PI != PE && IDom != L.getHeader(); ++PI) + IDom = DT.findNearestCommonDominator(IDom, *PI); + + DT.changeImmediateDominator(LoopExitBB, IDom); +} + +/// Update the dominator tree after unswitching a particular former exit block. +/// +/// This handles the full update of the dominator tree after hoisting a block +/// that previously was an exit block (or split off of an exit block) up to be +/// reached from the new immediate dominator of the preheader. +/// +/// The common case is simple -- we just move the unswitched block to have an +/// immediate dominator of the old preheader. But in complex cases, there may +/// be other blocks reachable from the unswitched block that are immediately +/// dominated by some node between the unswitched one and the old preheader. +/// All of these also need to be hoisted in the dominator tree. We also want to +/// minimize queries to the dominator tree because each step of this +/// invalidates any DFS numbers that would make queries fast. +static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, + DominatorTree &DT) { + DomTreeNode *OldPHNode = DT[OldPH]; + DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; + // If the dominator tree has already been updated for this unswitched node, + // we're done. This makes it easier to use this routine if there are multiple + // paths to the same unswitched destination. + if (UnswitchedNode->getIDom() == OldPHNode) + return; + + // First collect the domtree nodes that we are hoisting over. These are the + // set of nodes which may have children that need to be hoisted as well. + SmallPtrSet DomChain; + for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; + IDom = IDom->getIDom()) + DomChain.insert(IDom); + + // The unswitched block ends up immediately dominated by the old preheader -- + // regardless of whether it is the loop exit block or split off of the loop + // exit block. + DT.changeImmediateDominator(UnswitchedNode, OldPHNode); + + // Blocks reachable from the unswitched block may need to change their IDom + // as well. + SmallSetVector Worklist; + for (auto *SuccBB : successors(UnswitchedBB)) + Worklist.insert(SuccBB); + + // Walk the worklist. We grow the list in the loop and so must recompute size. + for (int i = 0; i < (int)Worklist.size(); ++i) { + auto *BB = Worklist[i]; + + DomTreeNode *Node = DT[BB]; + assert(!DomChain.count(Node) && + "Cannot be dominated by a block you can reach1"); + // If this block doesn't have an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree hasn't changed. Either + // it is above the region hoisted and still valid, or it is below the + // hoisted block and so was trivially updated. This also applies to + // everything reachable from this block so we're completely done with the + // it. + if (!DomChain.count(Node->getIDom())) + continue; + + // We need to change the IDom for this node but also walk its successors + // which could have similar dominance position. + DT.changeImmediateDominator(Node, OldPHNode); + for (auto *SuccBB : successors(BB)) + Worklist.insert(SuccBB); + } +} + +/// Unswitch a trivial branch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the branch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and one of the successors is a loop exit. This +/// allows us to unswitch without duplicating the loop, making it trivial. +/// +/// If this routine fails to unswitch the branch it returns false. +/// +/// If the branch can be unswitched, this routine splits the preheader and +/// hoists the branch above that split. Preserves loop simplified form +/// (splitting the exit block as necessary). It simplifies the branch within +/// the loop to an unconditional branch but doesn't remove it entirely. Further +/// cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, + LoopInfo &LI) { + assert(BI.isConditional() && "Can only unswitch a conditional branch!"); + DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); + + Value *LoopCond = BI.getCondition(); + + // Need a trivial loop condition to unswitch. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any + // side-effects. If so, determine the value of Cond that causes + // it to do this. + ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); + ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); + int LoopExitSuccIdx = 0; + auto *LoopExitBB = BI.getSuccessor(0); + if (!ExitBlockSet.count(LoopExitBB)) { + std::swap(CondVal, Replacement); + LoopExitSuccIdx = 1; + LoopExitBB = BI.getSuccessor(1); + if (!ExitBlockSet.count(LoopExitBB)) + return false; + } + auto *ContinueBB = BI.getSuccessor((LoopExitSuccIdx + 1) % 2); + assert(L.contains(ContinueBB) && + "Cannot have both successors exit and still be in the loop!"); + + // If the loop exit block contains phi nodes, this isn't trivial. + if (isa(LoopExitBB->begin())) + return false; + + DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal + << " == " << LoopCond << "\n"); + + // Split the preheader, so that we know that there is a safe place to insert + // the conditional branch. We will change the preheader to have a conditional + // branch on LoopCond. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the exit block out of the loop that we are + // unswitching. We need to split this if there are other loop predecessors. + // Because the loop is in simplified form, *any* other predecessor is enough. + BasicBlock *UnswitchedBB; + if (llvm::all_of(predecessors(LoopExitBB), [&](BasicBlock *PredBB) { + return PredBB == BI.getParent(); + })) { + UnswitchedBB = LoopExitBB; + } else { + UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); + } + + BasicBlock *ParentBB = BI.getParent(); + + // Now splice the branch to gate reaching the new preheader and re-point its + // successors. + OldPH->getInstList().splice(std::prev(OldPH->end()), + BI.getParent()->getInstList(), BI); + OldPH->getTerminator()->eraseFromParent(); + BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); + BI.setSuccessor((LoopExitSuccIdx + 1) % 2, NewPH); + + // Create a new unconditional branch that will continue the loop as a new + // terminator. + BranchInst::Create(ContinueBB, ParentBB); + + // Now we need to update the dominator tree. + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + // But if we split something off of the loop exit block then we also removed + // one of the predecessors for the loop exit block and may need to update its + // idom. + if (UnswitchedBB != LoopExitBB) + updateLoopExitIDomAfterUnswitch(LoopExitBB, L, DT); + + // Since this is an i1 condition we can also trivially replace uses of it + // within the loop with a constant. + replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); + + ++NumTrivial; + ++NumBranches; + return true; +} + +/// Unswitch a trivial switcih if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the switch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and that at least one of the successors is a loop +/// exit. This allows us to unswitch without duplicating the loop, making it +/// trivial. +/// +/// If this routine fails to unswitch the switch it returns false. +/// +/// If the switch can be unswitched, this routine splits the preheader and +/// copies the switch above that split. If the default case is one of the +/// exiting cases, it copies the non-exiting cases and points them at the new +/// preheader. If the default case is not exiting, it copies the exiting cases +/// and points the default at the preheader. It preserves loop simplified form +/// (splitting the exit blocks as necessary). It simplifies the switch within +/// the loop by removing now-dead cases. If the default case is one of those +/// unswitched, it replaces its destination with a new basic block containing +/// only unreachable. Such basic blocks, while technically loop exits, are not +/// considered for unswitching so this is a stable transform and the same +/// switch will not be revisited. If after unswitching there is only a single +/// in-loop successor, the switch is further simplified to an unconditional +/// branch. Still more cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, + LoopInfo &LI) { + DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); + Value *LoopCond = SI.getCondition(); + + // If this isn't switching on an invariant condition, we can't unswitch it. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + SmallVector ExitCaseIndices; + for (auto Case : SI.cases()) { + auto *SuccBB = Case.getCaseSuccessor(); + if (ExitBlockSet.count(SuccBB) && !isa(SuccBB->begin())) + ExitCaseIndices.push_back(Case.getCaseIndex()); + } + BasicBlock *DefaultExitBB = nullptr; + if (ExitBlockSet.count(SI.getDefaultDest()) && + !isa(SI.getDefaultDest()->begin()) && + !isa(SI.getDefaultDest()->getTerminator())) + DefaultExitBB = SI.getDefaultDest(); + else if (ExitCaseIndices.empty()) + return false; + + DEBUG(dbgs() << " unswitching trivial cases...\n"); + + SmallVector, 4> ExitCases; + ExitCases.reserve(ExitCaseIndices.size()); + // We walk the case indices backwards so that we remove the last case first + // and don't disrupt the earlier indices. + for (unsigned Index : reverse(ExitCaseIndices)) { + auto CaseI = SI.case_begin() + Index; + // Save the value of this case. + ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); + // Delete the unswitched cases. + SI.removeCase(CaseI); + } + + // Check if after this all of the remaining cases point at the same + // successor. + BasicBlock *CommonSuccBB = nullptr; + if (SI.getNumCases() > 0 && + std::all_of(std::next(SI.case_begin()), SI.case_end(), + [&SI](const SwitchInst::CaseHandle &Case) { + return Case.getCaseSuccessor() == + SI.case_begin()->getCaseSuccessor(); + })) + CommonSuccBB = SI.case_begin()->getCaseSuccessor(); + + if (DefaultExitBB) { + // We can't remove the default edge so replace it with an edge to either + // the single common remaining successor (if we have one) or an unreachable + // block. + if (CommonSuccBB) { + SI.setDefaultDest(CommonSuccBB); + } else { + BasicBlock *ParentBB = SI.getParent(); + BasicBlock *UnreachableBB = BasicBlock::Create( + ParentBB->getContext(), + Twine(ParentBB->getName()) + ".unreachable_default", + ParentBB->getParent()); + new UnreachableInst(ParentBB->getContext(), UnreachableBB); + SI.setDefaultDest(UnreachableBB); + DT.addNewBlock(UnreachableBB, ParentBB); + } + } else { + // If we're not unswitching the default, we need it to match any cases to + // have a common successor or if we have no cases it is the common + // successor. + if (SI.getNumCases() == 0) + CommonSuccBB = SI.getDefaultDest(); + else if (SI.getDefaultDest() != CommonSuccBB) + CommonSuccBB = nullptr; + } + + // Split the preheader, so that we know that there is a safe + // place to insert the conditional branch. We will change the preheader to + // have a conditional branch on Cond. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + OldPH->getTerminator()->eraseFromParent(); + + // Now add the unswitched switch. + auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); + + // Split any exit blocks with remaining in-loop predecessors. We walk in + // reverse so that we split in the same order as the cases appeared. This is + // purely for convenience of reading the resulting IR, but it doesn't cost + // anything really. + SmallDenseMap SplitExitBBMap; + // Handle the default exit if necessary. + // FIXME: It'd be great if we could merge this with the loop below but LLVM's + // ranges aren't quite powerful enough yet. + if (DefaultExitBB && !pred_empty(DefaultExitBB)) { + auto *SplitBB = + SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); + updateLoopExitIDomAfterUnswitch(DefaultExitBB, L, DT); + DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; + } + for (auto CasePair : reverse(ExitCases)) { + // Grab a reference to the exit block in the pair so that we can update it. + BasicBlock *&ExitBB = CasePair.second; + + // If this case is the last edge into the exit block, we can simply reuse it + // as it will no longer be a loop exit. No mapping necessary. + if (pred_empty(ExitBB)) + continue; + + // Otherwise we need to split the exit block so that we retain an exit + // block from the loop and a target for the unswitched condition. + BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; + if (!SplitExitBB) { + // If this is the first time we see this, do the split and remember it. + SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); + updateLoopExitIDomAfterUnswitch(ExitBB, L, DT); + } + ExitBB = SplitExitBB; + } + + // Now add the unswitched cases. We do this in reverse order as we built them + // in reverse order. + for (auto CasePair : reverse(ExitCases)) { + ConstantInt *CaseVal = CasePair.first; + BasicBlock *UnswitchedBB = CasePair.second; + + NewSI->addCase(CaseVal, UnswitchedBB); + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + } + + // If the default was unswitched, re-point it and add explicit cases for + // entering the loop. + if (DefaultExitBB) { + NewSI->setDefaultDest(DefaultExitBB); + updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); + + // We removed all the exit cases, so we just copy the cases to the + // unswitched switch. + for (auto Case : SI.cases()) + NewSI->addCase(Case.getCaseValue(), NewPH); + } + + // If we ended up with a common successor for every path through the switch + // after unswitching, rewrite it to an unconditional branch to make it easy + // to recognize. Otherwise we potentially have to recognize the default case + // pointing at unreachable and other complexity. + if (CommonSuccBB) { + BasicBlock *BB = SI.getParent(); + SI.eraseFromParent(); + BranchInst::Create(CommonSuccBB, BB); + } + + ++NumTrivial; + ++NumSwitches; + return true; +} + +/// This routine cans the loop to find a branch or switch which occurs before +/// any side effects occur. These can potentially be unswitched without +/// duplicating the loop. If a branch or switch is successfully unswitched the +/// scanning continues to see if subsequent branches or switches have become +/// trivial. Once all trivial candidates have been unswitched, this routine +/// returns. +/// +/// The return value indicates whether any changes were made, not whether +/// anything was actually unswitched. +static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, + LoopInfo &LI) { + bool Changed = false; + + // If loop header has only one reachable successor we should keep looking for + // trivial condition candidates in the successor as well. An alternative is + // to constant fold conditions and merge successors into loop header (then we + // only need to check header's terminator). The reason for not doing this in + // LoopUnswitch pass is that it could potentially break LoopPassManager's + // invariants. Folding dead branches could either eliminate the current loop + // or make other loops unreachable. LCSSA form might also not be preserved + // after deleting branches. The following code keeps traversing loop header's + // successors until it finds the trivial condition candidate (condition that + // is not a constant). Since unswitching generates branches with constant + // conditions, this scenario could be very common in practice. + BasicBlock *CurrentBB = L.getHeader(); + SmallPtrSet Visited; + Visited.insert(CurrentBB); + do { + // Check if this loop will execute any side-effecting instructions (e.g. + // stores, calls, volatile loads) in the part of the loop that the code + // *would* execute. Check the header first. + for (Instruction &I : *CurrentBB) + if (I.mayHaveSideEffects()) + return Changed; + + TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + + if (SwitchInst *SI = dyn_cast(CurrentTerm)) { + // See if we have a known case. + if (ConstantInt *Cond = dyn_cast(SI->getCondition())) { + CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor(); + continue; + } + // See if all the successors are the same basic block. + if (llvm::all_of(SI->cases(), [&SI](const SwitchInst::CaseHandle &Case) { + return Case.getCaseSuccessor() == SI->getDefaultDest(); + })) { + CurrentBB = SI->getDefaultDest(); + continue; + } + + if (!unswitchTrivialSwitch(L, *SI, DT, LI)) + // Coludn't unswitch this one so we're done. + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // If unswitching turned the terminator into an unconditional branch then + // we can continue. The unswitching logic specifically works to fold any + // cases it can into an unconditional branch to make it easier to + // recognize here. + auto *BI = dyn_cast(CurrentBB->getTerminator()); + if (!BI || BI->isConditional()) + return Changed; + + CurrentBB = BI->getSuccessor(0); + continue; + } + + BranchInst *BI = dyn_cast(CurrentTerm); + if (!BI) + // We do not understand other terminator instructions. + return false; + + // Skip unconditional branches. + if (!BI->isConditional()) { + CurrentBB = BI->getSuccessor(0); + continue; + } + + // Skip foldable branches. + if (ConstantInt *Cond = dyn_cast(BI->getCondition())) { + CurrentBB = BI->getSuccessor(Cond->isNullValue() ? 1 : 0); + continue; + } + + // Found a trivial condition candidate: non-foldable conditional branch. If + // we fail to unswitch this, we can't do anything else that is trivial. + if (!unswitchTrivialBranch(L, *BI, DT, LI)) + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // We unswitched the branch. This should always leave us with an + // unconditional branch that we can follow now. + BI = cast(CurrentBB->getTerminator()); + assert(!BI->isConditional() && + "Cannot form a conditional branch by unswitching1"); + CurrentBB = BI->getSuccessor(0); + + // When continuing, if we exit the loop or reach a previous visited block, + // then we can not reach any trivial condition candidates (unfoldable + // branch instructions or switch instructions) and no unswitch can happen. + } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); + + return Changed; +} + +/// Unswitch control flow predicated on loop invariant conditions. +/// +/// This first hoists all branches or switches which are trivial (IE, do not +/// require duplicating any part of the loop) out of the loop body. It then +/// looks at other loop invariant control flows and tries to unswitch those as +/// well by cloning the loop if the result is small enough. +static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC) { + assert(L.isLCSSAForm(DT) && + "Loops must be in LCSSA form before unswitching."); + bool Changed = false; + + // Must be in loop simplified form: we need a preheader and dedicated exits. + if (!L.isLoopSimplifyForm()) + return false; + + // Try trivial unswitch first before loop over other basic blocks in the loop. + Changed |= unswitchAllTrivialConditions(L, DT, LI); + + // FIXME: Add support for non-trivial unswitching by cloning the loop. + + return Changed; +} + +PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function &F = *L.getHeader()->getParent(); + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); + + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) + return PreservedAnalyses::all(); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + AR.DT.verifyDomTree(); +#endif + return getLoopPassPreservedAnalyses(); +} + +namespace { +class SimpleLoopUnswitchLegacyPass : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { + initializeSimpleLoopUnswitchLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + getLoopAnalysisUsage(AU); + } +}; +} // namespace + +bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); + + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &AC = getAnalysis().getAssumptionCache(F); + + bool Changed = unswitchLoop(*L, DT, LI, AC); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + DT.verifyDomTree(); +#endif + return Changed; +} + +char SimpleLoopUnswitchLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +// INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) + +Pass *llvm::createSimpleLoopUnswitchLegacyPass() { + return new SimpleLoopUnswitchLegacyPass(); +} Index: test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output + + %struct.BLEND_MAP = type { i16, i16, i16, i32, %struct.BLEND_MAP_ENTRY* } + %struct.BLEND_MAP_ENTRY = type { float, i8, { [5 x float], [4 x i8] } } + %struct.TPATTERN = type { i16, i16, i16, i32, float, float, float, %struct.WARP*, %struct.TPATTERN*, %struct.BLEND_MAP*, { %struct.anon, [4 x i8] } } + %struct.TURB = type { i16, %struct.WARP*, [3 x double], i32, float, float } + %struct.WARP = type { i16, %struct.WARP* } + %struct.anon = type { float, [3 x double] } + +define void @Parse_Pattern() { +entry: + br label %bb1096.outer20 +bb671: ; preds = %cond_true1099 + br label %bb1096.outer23 +bb1096.outer20.loopexit: ; preds = %cond_true1099 + %Local_Turb.0.ph24.lcssa = phi %struct.TURB* [ %Local_Turb.0.ph24, %cond_true1099 ] ; <%struct.TURB*> [#uses=1] + br label %bb1096.outer20 +bb1096.outer20: ; preds = %bb1096.outer20.loopexit, %entry + %Local_Turb.0.ph22 = phi %struct.TURB* [ undef, %entry ], [ %Local_Turb.0.ph24.lcssa, %bb1096.outer20.loopexit ] ; <%struct.TURB*> [#uses=1] + %tmp1098 = icmp eq i32 0, 0 ; [#uses=1] + br label %bb1096.outer23 +bb1096.outer23: ; preds = %bb1096.outer20, %bb671 + %Local_Turb.0.ph24 = phi %struct.TURB* [ %Local_Turb.0.ph22, %bb1096.outer20 ], [ null, %bb671 ] ; <%struct.TURB*> [#uses=2] + br label %bb1096 +bb1096: ; preds = %cond_true1099, %bb1096.outer23 + br i1 %tmp1098, label %cond_true1099, label %bb1102 +cond_true1099: ; preds = %bb1096 + switch i32 0, label %bb1096.outer20.loopexit [ + i32 161, label %bb671 + i32 359, label %bb1096 + ] +bb1102: ; preds = %bb1096 + %Local_Turb.0.ph24.lcssa1 = phi %struct.TURB* [ %Local_Turb.0.ph24, %bb1096 ] ; <%struct.TURB*> [#uses=0] + ret void +} Index: test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output + +define void @init_caller_save() { +entry: + br label %cond_true78 +cond_next20: ; preds = %cond_true64 + br label %bb31 +bb31: ; preds = %cond_true64, %cond_true64, %cond_next20 + %iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ] ; [#uses=0] + br label %bb54 +bb54: ; preds = %cond_true78, %bb31 + br i1 false, label %bb75, label %cond_true64 +cond_true64: ; preds = %bb54 + switch i32 %i.0.0, label %cond_next20 [ + i32 17, label %bb31 + i32 18, label %bb31 + ] +bb75: ; preds = %bb54 + %tmp74.0 = add i32 %i.0.0, 1 ; [#uses=1] + br label %cond_true78 +cond_true78: ; preds = %bb75, %entry + %i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ] ; [#uses=2] + br label %bb54 +} + Index: test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll @@ -0,0 +1,28 @@ +; PR1333 +; RUN: opt < %s -simple-loop-unswitch -disable-output + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "i686-pc-linux-gnu" + %struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* } + %struct.ada__tags__dispatch_table = type { [1 x i8*] } + %struct.quotes__T173s = type { i8, %struct.quotes__T173s__T174s, [2 x [1 x double]], [2 x i16], i64, i8 } + %struct.quotes__T173s__T174s = type { i8, i8, i8, i16, i16, [2 x [1 x double]] } + +define void @quotes__write_quote() { +entry: + %tmp606.i = icmp eq i32 0, 0 ; [#uses=1] + br label %bb +bb: ; preds = %cond_next73, %bb, %entry + br i1 false, label %bb51, label %bb +bb51: ; preds = %cond_next73, %bb + br i1 %tmp606.i, label %quotes__bid_ask_depth_offset_matrices__get_price.exit, label %cond_true.i +cond_true.i: ; preds = %bb51 + unreachable +quotes__bid_ask_depth_offset_matrices__get_price.exit: ; preds = %bb51 + br i1 false, label %cond_next73, label %cond_true72 +cond_true72: ; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit + unreachable +cond_next73: ; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit + br i1 false, label %bb, label %bb51 +} + Index: test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output +; PR1333 + +define void @pp_cxx_expression() { +entry: + %tmp6 = lshr i32 0, 24 ; [#uses=1] + br label %tailrecurse + +tailrecurse: ; preds = %tailrecurse, %tailrecurse, %entry + switch i32 %tmp6, label %bb96 [ + i32 24, label %bb10 + i32 25, label %bb10 + i32 28, label %bb10 + i32 29, label %bb48 + i32 31, label %bb48 + i32 32, label %bb48 + i32 33, label %bb48 + i32 34, label %bb48 + i32 36, label %bb15 + i32 51, label %bb89 + i32 52, label %bb89 + i32 54, label %bb83 + i32 57, label %bb59 + i32 63, label %bb80 + i32 64, label %bb80 + i32 68, label %bb80 + i32 169, label %bb75 + i32 170, label %bb19 + i32 171, label %bb63 + i32 172, label %bb63 + i32 173, label %bb67 + i32 174, label %bb67 + i32 175, label %bb19 + i32 176, label %bb75 + i32 178, label %bb59 + i32 179, label %bb89 + i32 180, label %bb59 + i32 182, label %bb48 + i32 183, label %bb48 + i32 184, label %bb48 + i32 185, label %bb48 + i32 186, label %bb48 + i32 195, label %bb48 + i32 196, label %bb59 + i32 197, label %bb89 + i32 198, label %bb70 + i32 199, label %bb59 + i32 200, label %bb59 + i32 201, label %bb59 + i32 202, label %bb59 + i32 203, label %bb75 + i32 204, label %bb59 + i32 205, label %tailrecurse + i32 210, label %tailrecurse + ] + +bb10: ; preds = %tailrecurse, %tailrecurse, %tailrecurse + ret void + +bb15: ; preds = %tailrecurse + ret void + +bb19: ; preds = %tailrecurse, %tailrecurse + ret void + +bb48: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse + ret void + +bb59: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse + ret void + +bb63: ; preds = %tailrecurse, %tailrecurse + ret void + +bb67: ; preds = %tailrecurse, %tailrecurse + ret void + +bb70: ; preds = %tailrecurse + ret void + +bb75: ; preds = %tailrecurse, %tailrecurse, %tailrecurse + ret void + +bb80: ; preds = %tailrecurse, %tailrecurse, %tailrecurse + ret void + +bb83: ; preds = %tailrecurse + ret void + +bb89: ; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse + ret void + +bb96: ; preds = %tailrecurse + ret void +} Index: test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output + +@str3 = external constant [3 x i8] ; <[3 x i8]*> [#uses=1] + +define i32 @stringSearch_Clib(i32 %count) { +entry: + %ttmp25 = icmp sgt i32 %count, 0 ; [#uses=1] + br i1 %ttmp25, label %bb36.preheader, label %bb44 + +bb36.preheader: ; preds = %entry + %ttmp33 = icmp slt i32 0, 250 ; [#uses=1] + br label %bb36.outer + +bb36.outer: ; preds = %bb41, %bb36.preheader + br i1 %ttmp33, label %bb.nph, label %bb41 + +bb.nph: ; preds = %bb36.outer + %ttmp8 = icmp eq i8* null, null ; [#uses=1] + %ttmp6 = icmp eq i8* null, null ; [#uses=1] + %tmp31 = call i32 @strcspn( i8* null, i8* getelementptr ([3 x i8], [3 x i8]* @str3, i64 0, i64 0) ) ; [#uses=1] + br i1 %ttmp8, label %cond_next, label %cond_true + +cond_true: ; preds = %bb.nph + ret i32 0 + +cond_next: ; preds = %bb.nph + br i1 %ttmp6, label %cond_next28, label %cond_true20 + +cond_true20: ; preds = %cond_next + ret i32 0 + +cond_next28: ; preds = %cond_next + %tmp33 = add i32 %tmp31, 0 ; [#uses=1] + br label %bb41 + +bb41: ; preds = %cond_next28, %bb36.outer + %c.2.lcssa = phi i32 [ 0, %bb36.outer ], [ %tmp33, %cond_next28 ] ; [#uses=1] + br i1 false, label %bb36.outer, label %bb44 + +bb44: ; preds = %bb41, %entry + %c.01.1 = phi i32 [ 0, %entry ], [ %c.2.lcssa, %bb41 ] ; [#uses=1] + ret i32 %c.01.1 +} + +declare i32 @strcspn(i8*, i8*) Index: test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output + +define i32 @main(i32 %argc, i8** %argv) { +entry: + %tmp1785365 = icmp ult i32 0, 100 ; [#uses=1] + br label %bb + +bb: ; preds = %cond_true, %entry + br i1 false, label %cond_true, label %cond_next + +cond_true: ; preds = %bb + br i1 %tmp1785365, label %bb, label %bb1788 + +cond_next: ; preds = %bb + %iftmp.1.0 = select i1 false, i32 0, i32 0 ; [#uses=1] + br i1 false, label %cond_true47, label %cond_next74 + +cond_true47: ; preds = %cond_next + %tmp53 = urem i32 %iftmp.1.0, 0 ; [#uses=0] + ret i32 0 + +cond_next74: ; preds = %cond_next + ret i32 0 + +bb1788: ; preds = %cond_true + ret i32 0 +} Index: test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output +; PR1559 + +target triple = "i686-pc-linux-gnu" + %struct.re_pattern_buffer = type { i8*, i32, i32, i32, i8*, i8*, i32, i8 } + +define fastcc i32 @byte_regex_compile(i8* %pattern, i32 %size, i32 %syntax, %struct.re_pattern_buffer* %bufp) { +entry: + br i1 false, label %bb147, label %cond_next123 + +cond_next123: ; preds = %entry + ret i32 0 + +bb147: ; preds = %entry + switch i32 0, label %normal_char [ + i32 91, label %bb1734 + i32 92, label %bb5700 + ] + +bb1734: ; preds = %bb147 + br label %bb1855.outer.outer + +cond_true1831: ; preds = %bb1855.outer + br i1 %tmp1837, label %cond_next1844, label %cond_true1840 + +cond_true1840: ; preds = %cond_true1831 + ret i32 0 + +cond_next1844: ; preds = %cond_true1831 + br i1 false, label %bb1855.outer, label %cond_true1849 + +cond_true1849: ; preds = %cond_next1844 + br label %bb1855.outer.outer + +bb1855.outer.outer: ; preds = %cond_true1849, %bb1734 + %b.10.ph.ph = phi i8* [ null, %cond_true1849 ], [ null, %bb1734 ] ; [#uses=1] + br label %bb1855.outer + +bb1855.outer: ; preds = %bb1855.outer.outer, %cond_next1844 + %b.10.ph = phi i8* [ null, %cond_next1844 ], [ %b.10.ph.ph, %bb1855.outer.outer ] ; [#uses=1] + %tmp1837 = icmp eq i8* null, null ; [#uses=2] + br i1 false, label %cond_true1831, label %cond_next1915 + +cond_next1915: ; preds = %cond_next1961, %bb1855.outer + store i8* null, i8** null + br i1 %tmp1837, label %cond_next1929, label %cond_true1923 + +cond_true1923: ; preds = %cond_next1915 + ret i32 0 + +cond_next1929: ; preds = %cond_next1915 + br i1 false, label %cond_next1961, label %cond_next2009 + +cond_next1961: ; preds = %cond_next1929 + %tmp1992 = getelementptr i8, i8* %b.10.ph, i32 0 ; [#uses=0] + br label %cond_next1915 + +cond_next2009: ; preds = %cond_next1929 + ret i32 0 + +bb5700: ; preds = %bb147 + ret i32 0 + +normal_char: ; preds = %bb147 + ret i32 0 +} Index: test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -licm -simple-loop-unswitch -disable-output +; PR 1589 + + %struct.QBasicAtomic = type { i32 } + +define void @_ZNK5QDate9addMonthsEi(%struct.QBasicAtomic* sret %agg.result, %struct.QBasicAtomic* %this, i32 %nmonths) { +entry: + br label %cond_true90 + +bb16: ; preds = %cond_true90 + br i1 false, label %bb93, label %cond_true90 + +bb45: ; preds = %cond_true90 + br i1 false, label %bb53, label %bb58 + +bb53: ; preds = %bb45 + br i1 false, label %bb93, label %cond_true90 + +bb58: ; preds = %bb45 + store i32 0, i32* null, align 4 + br i1 false, label %cond_true90, label %bb93 + +cond_true90: ; preds = %bb58, %bb53, %bb16, %entry + %nmonths_addr.016.1 = phi i32 [ %nmonths, %entry ], [ 0, %bb16 ], [ 0, %bb53 ], [ %nmonths_addr.016.1, %bb58 ] ; [#uses=2] + %tmp14 = icmp slt i32 %nmonths_addr.016.1, -11 ; [#uses=1] + br i1 %tmp14, label %bb16, label %bb45 + +bb93: ; preds = %bb58, %bb53, %bb16 + ret void +} Index: test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output + %struct.ClassDef = type { %struct.QByteArray, %struct.QByteArray, %"struct.QList", %"struct.QList", i8, i8, %"struct.QList", %"struct.QList", %"struct.QList", %"struct.QList", %"struct.QList", %"struct.QList", %"struct.QMap", %"struct.QList", %"struct.QMap", i32, i32 } + %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] } + %struct.Generator = type { %struct.FILE*, %struct.ClassDef*, %"struct.QList", %struct.QByteArray, %"struct.QList" } + %struct.QBasicAtomic = type { i32 } + %struct.QByteArray = type { %"struct.QByteArray::Data"* } + %"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] } + %"struct.QList" = type { %"struct.QList::._19" } + %"struct.QList::._19" = type { %struct.QListData } + %struct.QListData = type { %"struct.QListData::Data"* } + %"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] } + %"struct.QMap" = type { %"struct.QMap::._56" } + %"struct.QMap::._56" = type { %struct.QMapData* } + %struct.QMapData = type { %struct.QMapData*, [12 x %struct.QMapData*], %struct.QBasicAtomic, i32, i32, i32, i8 } + %struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 } +@.str9 = external constant [1 x i8] ; <[1 x i8]*> [#uses=1] + +declare i32 @strcmp(i8*, i8*) + +define i32 @_ZN9Generator6strregEPKc(%struct.Generator* %this, i8* %s) { +entry: + %s_addr.0 = select i1 false, i8* getelementptr ([1 x i8], [1 x i8]* @.str9, i32 0, i32 0), i8* %s ; [#uses=2] + %tmp122 = icmp eq i8* %s_addr.0, null ; [#uses=1] + br label %bb184 + +bb55: ; preds = %bb184 + ret i32 0 + +bb88: ; preds = %bb184 + br i1 %tmp122, label %bb154, label %bb128 + +bb128: ; preds = %bb88 + %tmp138 = call i32 @strcmp( i8* null, i8* %s_addr.0 ) ; [#uses=1] + %iftmp.37.0.in4 = icmp eq i32 %tmp138, 0 ; [#uses=1] + br i1 %iftmp.37.0.in4, label %bb250, label %bb166 + +bb154: ; preds = %bb88 + br i1 false, label %bb250, label %bb166 + +bb166: ; preds = %bb154, %bb128 + %tmp175 = add i32 %idx.0, 1 ; [#uses=1] + %tmp177 = add i32 %tmp175, 0 ; [#uses=1] + %tmp181 = add i32 %tmp177, 0 ; [#uses=1] + %tmp183 = add i32 %i33.0, 1 ; [#uses=1] + br label %bb184 + +bb184: ; preds = %bb166, %entry + %i33.0 = phi i32 [ 0, %entry ], [ %tmp183, %bb166 ] ; [#uses=2] + %idx.0 = phi i32 [ 0, %entry ], [ %tmp181, %bb166 ] ; [#uses=2] + %tmp49 = icmp slt i32 %i33.0, 0 ; [#uses=1] + br i1 %tmp49, label %bb88, label %bb55 + +bb250: ; preds = %bb154, %bb128 + ret i32 %idx.0 +} Index: test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -licm -loop-unroll -disable-output + +@resonant = external global i32 ; [#uses=2] + +define void @weightadj() { +entry: + br label %bb + +bb: ; preds = %bb158, %entry + store i32 0, i32* @resonant, align 4 + br i1 false, label %g.exit, label %bb158 + +g.exit: ; preds = %bb68, %bb + br i1 false, label %bb68, label %cond_true + +cond_true: ; preds = %g.exit + store i32 1, i32* @resonant, align 4 + br label %bb68 + +bb68: ; preds = %cond_true, %g.exit + %tmp71 = icmp slt i32 0, 0 ; [#uses=1] + br i1 %tmp71, label %g.exit, label %bb158 + +bb158: ; preds = %bb68, %bb + br i1 false, label %bb, label %return + +return: ; preds = %bb158 + ret void +} Index: test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -simple-loop-unswitch -instcombine -gvn -disable-output +; PR2372 +target triple = "i386-pc-linux-gnu" + +define i32 @func_3(i16 signext %p_5, i16 signext %p_6) nounwind { +entry: + %tmp3 = icmp eq i16 %p_5, 0 ; [#uses=1] + %tmp1314 = sext i16 %p_6 to i32 ; [#uses=1] + %tmp28 = icmp ugt i32 %tmp1314, 3 ; [#uses=1] + %bothcond = or i1 %tmp28, false ; [#uses=1] + br label %bb +bb: ; preds = %bb54, %entry + br i1 %tmp3, label %bb54, label %bb5 +bb5: ; preds = %bb + br i1 %bothcond, label %bb54, label %bb31 +bb31: ; preds = %bb5 + br label %bb54 +bb54: ; preds = %bb31, %bb5, %bb + br i1 false, label %bb64, label %bb +bb64: ; preds = %bb54 + %tmp6566 = sext i16 %p_6 to i32 ; [#uses=1] + %tmp68 = tail call i32 (...) @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind ; [#uses=0] + ret i32 undef +} + +declare i32 @func_18(...) Index: test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -licm -simple-loop-unswitch -disable-output +@g_56 = external global i16 ; [#uses=2] + +define i32 @func_67(i32 %p_68, i8 signext %p_69, i8 signext %p_71) nounwind { +entry: + br label %bb +bb: ; preds = %bb44, %entry + br label %bb3 +bb3: ; preds = %bb36, %bb + %bothcond = or i1 false, false ; [#uses=1] + br i1 %bothcond, label %bb29, label %bb19 +bb19: ; preds = %bb3 + br i1 false, label %bb36, label %bb29 +bb29: ; preds = %bb19, %bb3 + ret i32 0 +bb36: ; preds = %bb19 + store i16 0, i16* @g_56, align 2 + br i1 false, label %bb44, label %bb3 +bb44: ; preds = %bb44, %bb36 + %tmp46 = load i16, i16* @g_56, align 2 ; [#uses=0] + br i1 false, label %bb, label %bb44 +} Index: test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -simple-loop-unswitch +; PR8622 +@g_38 = external global i32, align 4 + +define void @func_67(i32 %p_68.coerce) nounwind { +entry: + br i1 true, label %for.end12, label %bb.nph + +bb.nph: ; preds = %entry + %g_38.promoted = load i32, i32* @g_38 + br label %for.body + +for.body: ; preds = %for.cond, %bb.nph + %tobool.i = icmp eq i32 %p_68.coerce, 1 + %xor4.i = xor i32 %p_68.coerce, 1 + %call1 = select i1 %tobool.i, i32 0, i32 %xor4.i + br label %for.cond + +for.cond: ; preds = %for.body + br i1 true, label %for.cond.for.end12_crit_edge, label %for.body + +for.cond.for.end12_crit_edge: ; preds = %for.cond + store i32 %call1, i32* @g_38 + br label %for.end12 + +for.end12: ; preds = %for.cond.for.end12_crit_edge, %entry + ret void +} Index: test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll @@ -0,0 +1,28 @@ +; RUN: opt -simple-loop-unswitch -disable-output < %s +; PR10031 + +define i32 @test(i32 %command) { +entry: + br label %tailrecurse + +tailrecurse: ; preds = %if.then14, %tailrecurse, %entry + br i1 undef, label %if.then, label %tailrecurse + +if.then: ; preds = %tailrecurse + switch i32 %command, label %sw.bb [ + i32 2, label %land.lhs.true + i32 0, label %land.lhs.true + ] + +land.lhs.true: ; preds = %if.then, %if.then + br i1 undef, label %sw.bb, label %if.then14 + +if.then14: ; preds = %land.lhs.true + switch i32 %command, label %tailrecurse [ + i32 0, label %sw.bb + i32 1, label %sw.bb + ] + +sw.bb: ; preds = %if.then14 + unreachable +} Index: test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll @@ -0,0 +1,63 @@ +; RUN: opt < %s -sroa -simple-loop-unswitch -disable-output +; PR11016 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.2" + +%class.MyContainer.1.3.19.29 = type { [6 x %class.MyMemVarClass.0.2.18.28*] } +%class.MyMemVarClass.0.2.18.28 = type { i32 } + +define void @_ZN11MyContainer1fEi(%class.MyContainer.1.3.19.29* %this, i32 %doit) uwtable ssp align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %inc1 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %conv = sext i32 %inc1 to i64 + %cmp = icmp ult i64 %conv, 6 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tobool = icmp ne i32 %doit, 0 + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %idxprom = sext i32 %inc1 to i64 + %array_ = getelementptr inbounds %class.MyContainer.1.3.19.29, %class.MyContainer.1.3.19.29* %this, i32 0, i32 0 + %arrayidx = getelementptr inbounds [6 x %class.MyMemVarClass.0.2.18.28*], [6 x %class.MyMemVarClass.0.2.18.28*]* %array_, i32 0, i64 %idxprom + %tmp4 = load %class.MyMemVarClass.0.2.18.28*, %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8 + %isnull = icmp eq %class.MyMemVarClass.0.2.18.28* %tmp4, null + br i1 %isnull, label %for.inc, label %delete.notnull + +delete.notnull: ; preds = %if.then + invoke void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28* %tmp4) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %delete.notnull + %0 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8* + call void @_ZdlPv(i8* %0) nounwind + br label %for.inc + +lpad: ; preds = %delete.notnull + %1 = landingpad { i8*, i32 } + cleanup + %2 = extractvalue { i8*, i32 } %1, 0 + %3 = extractvalue { i8*, i32 } %1, 1 + %4 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8* + call void @_ZdlPv(i8* %4) nounwind + %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0 + %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1 + resume { i8*, i32 } %lpad.val7 + +for.inc: ; preds = %invoke.cont, %if.then, %for.body + %inc = add nsw i32 %inc1, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28*) + +declare i32 @__gxx_personality_v0(...) + +declare void @_ZdlPv(i8*) nounwind Index: test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -S -simple-loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s +; PR12343: -simple-loop-unswitch crash on indirect branch + +; CHECK: %0 = icmp eq i64 undef, 0 +; CHECK-NEXT: br i1 %0, label %"5", label %"4" + +; CHECK: "5": ; preds = %entry +; CHECK-NEXT: br label %"16" + +; CHECK: "16": ; preds = %"22", %"5" +; CHECK-NEXT: indirectbr i8* undef, [label %"22", label %"33"] + +; CHECK: "22": ; preds = %"16" +; CHECK-NEXT: br i1 %0, label %"16", label %"26" + +; CHECK: "26": ; preds = %"22" +; CHECK-NEXT: unreachable + +define void @foo() { +entry: + %0 = icmp eq i64 undef, 0 + br i1 %0, label %"5", label %"4" + +"4": ; preds = %entry + unreachable + +"5": ; preds = %entry + br label %"16" + +"16": ; preds = %"22", %"5" + indirectbr i8* undef, [label %"22", label %"33"] + +"22": ; preds = %"16" + br i1 %0, label %"16", label %"26" + +"26": ; preds = %"22" + unreachable + +"33": ; preds = %"16" + unreachable +} Index: test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll @@ -0,0 +1,97 @@ +; RUN: opt < %s -basicaa -instcombine -inline -functionattrs -licm -simple-loop-unswitch -gvn -verify +; PR12573 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379 = type { %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 } +%class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376 = type { %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* } +%class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 = type { %class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377* } +%class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377 = type { i8 } + +define void @_Z23get_reconstruction_pathv() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %c = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, align 8 + br label %for.cond + +for.cond: ; preds = %for.end, %entry + invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %for.cond + invoke void @_ZN1C3endEv() + to label %for.cond3 unwind label %lpad + +for.cond3: ; preds = %invoke.cont6, %invoke.cont + invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c) + to label %invoke.cont4 unwind label %lpad + +invoke.cont4: ; preds = %for.cond3 + invoke void @_ZN1C3endEv() + to label %invoke.cont6 unwind label %lpad + +invoke.cont6: ; preds = %invoke.cont4 + br i1 undef, label %for.cond3, label %for.end + +lpad: ; preds = %for.end, %invoke.cont4, %for.cond3, %invoke.cont, %for.cond + %0 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } undef + +for.end: ; preds = %invoke.cont6 + invoke void @_ZN1C13_M_insert_auxER1D() + to label %for.cond unwind label %lpad +} + +define void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this) uwtable ssp align 2 { +entry: + %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8 + store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8 + %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr + %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0 + %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8 + %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null + br i1 %tobool, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + call void @_Z10__assert13v() noreturn + unreachable + +cond.end: ; preds = %entry + ret void +} + +declare i32 @__gxx_personality_v0(...) + +declare void @_ZN1C3endEv() + +define void @_ZN1C13_M_insert_auxER1D() uwtable ssp align 2 { +entry: + ret void +} + +define void @_ZN1DD1Ev() unnamed_addr uwtable inlinehint ssp align 2 { +entry: + ret void +} + +define void @_ZN1DD2Ev() unnamed_addr uwtable inlinehint ssp align 2 { +entry: + ret void +} + +define void @_ZN1BD1Ev() unnamed_addr uwtable ssp align 2 { +entry: + ret void +} + +define void @_ZN1BD2Ev() unnamed_addr uwtable ssp align 2 { +entry: + ret void +} + +define void @_ZN1BaSERS_() uwtable ssp align 2 { +entry: + unreachable +} + +declare void @_Z10__assert13v() noreturn Index: test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output +; PR12887 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = common global i32 0, align 4 +@c = common global i32 0, align 4 +@b = common global i32 0, align 4 + +define void @func() noreturn nounwind uwtable { +entry: + %0 = load i32, i32* @a, align 4 + %tobool = icmp eq i32 %0, 0 + %1 = load i32, i32* @b, align 4 + br label %while.body + +while.body: ; preds = %while.body, %entry + %d.0 = phi i8 [ undef, %entry ], [ %conv2, %while.body ] + %conv = sext i8 %d.0 to i32 + %cond = select i1 %tobool, i32 0, i32 %conv + %conv11 = zext i8 %d.0 to i32 + %add = add i32 %1, %conv11 + %conv2 = trunc i32 %add to i8 + br label %while.body +} Index: test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s + +; In cases where two address spaces do not have the same size pointer, the +; input for the addrspacecast should not be used as a substitute for itself +; when manipulating the pointer. + +target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16" + +define void @foo() { +; CHECK-LABEL: @foo +entry: + %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef + %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)* + br i1 undef, label %for.body.i, label %bar.exit + +for.body.i: ; preds = %for.body.i, %entry +; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it +; will try to find the base object to prove deferenceability. If we look +; through the addrspacecast, we'll fail an assertion about bitwidths matching +; CHECK-LABEL: for.body.i +; CHECK: %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2 + %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2 + %cmp1.i = icmp eq i16 %0, 0 + br i1 %cmp1.i, label %bar.exit, label %for.body.i + +bar.exit: ; preds = %for.body.i, %entry + ret void +} Index: test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -simple-loop-unswitch -S 2>&1 | FileCheck %s + +; This is to test trivial loop unswitch only happens when trivial condition +; itself is an LIV loop condition (not partial LIV which could occur in and/or). + +define i32 @test(i1 %cond1, i32 %var1) { +entry: + br label %loop_begin + +loop_begin: + %var3 = phi i32 [%var1, %entry], [%var2, %do_something] + %cond2 = icmp eq i32 %var3, 10 + %cond.and = and i1 %cond1, %cond2 + +; %cond.and only has %cond1 as LIV so no unswitch should happen. +; CHECK: br i1 %cond.and, label %do_something, label %loop_exit + br i1 %cond.and, label %do_something, label %loop_exit + +do_something: + %var2 = add i32 %var3, 1 + call void @some_func() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +declare void @some_func() noreturn Index: test/Transforms/SimpleLoopUnswitch/basictest.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/basictest.ll @@ -0,0 +1,184 @@ +; RUN: opt -passes='loop(unswitch),verify' -S < %s | FileCheck %s + +define i32 @test(i32* %A, i1 %C) { +entry: + br label %no_exit +no_exit: ; preds = %no_exit.backedge, %entry + %i.0.0 = phi i32 [ 0, %entry ], [ %i.0.0.be, %no_exit.backedge ] ; [#uses=3] + %gep.upgrd.1 = zext i32 %i.0.0 to i64 ; [#uses=1] + %tmp.7 = getelementptr i32, i32* %A, i64 %gep.upgrd.1 ; [#uses=4] + %tmp.13 = load i32, i32* %tmp.7 ; [#uses=2] + %tmp.14 = add i32 %tmp.13, 1 ; [#uses=1] + store i32 %tmp.14, i32* %tmp.7 + br i1 %C, label %then, label %endif +then: ; preds = %no_exit + %tmp.29 = load i32, i32* %tmp.7 ; [#uses=1] + %tmp.30 = add i32 %tmp.29, 2 ; [#uses=1] + store i32 %tmp.30, i32* %tmp.7 + %inc9 = add i32 %i.0.0, 1 ; [#uses=2] + %tmp.112 = icmp ult i32 %inc9, 100000 ; [#uses=1] + br i1 %tmp.112, label %no_exit.backedge, label %return +no_exit.backedge: ; preds = %endif, %then + %i.0.0.be = phi i32 [ %inc9, %then ], [ %inc, %endif ] ; [#uses=1] + br label %no_exit +endif: ; preds = %no_exit + %inc = add i32 %i.0.0, 1 ; [#uses=2] + %tmp.1 = icmp ult i32 %inc, 100000 ; [#uses=1] + br i1 %tmp.1, label %no_exit.backedge, label %return +return: ; preds = %endif, %then + ret i32 %tmp.13 +} + +; This simple test would normally unswitch, but should be inhibited by the presence of +; the noduplicate call. + +; CHECK-LABEL: @test2( +define i32 @test2(i32* %var) { + %mem = alloca i32 + store i32 2, i32* %mem + %c = load i32, i32* %mem + + br label %loop_begin + +loop_begin: + + %var_val = load i32, i32* %var + + switch i32 %c, label %default [ + i32 1, label %inc + i32 2, label %dec + ] + +inc: + call void @incf() noreturn nounwind + br label %loop_begin +dec: +; CHECK: call void @decf() +; CHECK-NOT: call void @decf() + call void @decf() noreturn nounwind noduplicate + br label %loop_begin +default: + br label %loop_exit +loop_exit: + ret i32 0 +; CHECK: } +} + +; This simple test would normally unswitch, but should be inhibited by the presence of +; the convergent call that is not control-dependent on the unswitch condition. + +; CHECK-LABEL: @test3( +define i32 @test3(i32* %var) { + %mem = alloca i32 + store i32 2, i32* %mem + %c = load i32, i32* %mem + + br label %loop_begin + +loop_begin: + + %var_val = load i32, i32* %var + +; CHECK: call void @conv() +; CHECK-NOT: call void @conv() + call void @conv() convergent + + switch i32 %c, label %default [ + i32 1, label %inc + i32 2, label %dec + ] + +inc: + call void @incf() noreturn nounwind + br label %loop_begin +dec: + call void @decf() noreturn nounwind + br label %loop_begin +default: + br label %loop_exit +loop_exit: + ret i32 0 +; CHECK: } +} + +; Make sure we don't unswitch, as we can not find an input value %a +; that will effectively unswitch 0 or 3 out of the loop. +; +; CHECK: define void @and_or_i2_as_switch_input(i2 +; CHECK: entry: +; This is an indication that the loop has NOT been unswitched. +; CHECK-NOT: icmp +; CHECK: br +define void @and_or_i2_as_switch_input(i2 %a) { +entry: + br label %for.body + +for.body: + %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ] + %and = and i2 %a, %i + %or = or i2 %and, %i + switch i2 %or, label %sw.default [ + i2 0, label %sw.bb + i2 3, label %sw.bb1 + ] + +sw.bb: + br label %sw.epilog + +sw.bb1: + br label %sw.epilog + +sw.default: + br label %sw.epilog + +sw.epilog: + br label %for.inc + +for.inc: + %inc = add nsw i2 %i, 1 + %cmp = icmp slt i2 %inc, 3 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; Make sure we don't unswitch, as we can not find an input value %a +; that will effectively unswitch true/false out of the loop. +; +; CHECK: define void @and_or_i1_as_branch_input(i1 +; CHECK: entry: +; This is an indication that the loop has NOT been unswitched. +; CHECK-NOT: icmp +; CHECK: br +define void @and_or_i1_as_branch_input(i1 %a) { +entry: + br label %for.body + +for.body: + %i = phi i1 [ 0, %entry ], [ %inc, %for.inc ] + %and = and i1 %a, %i + %or = or i1 %and, %i + br i1 %or, label %sw.bb, label %sw.bb1 + +sw.bb: + br label %sw.epilog + +sw.bb1: + br label %sw.epilog + +sw.epilog: + br label %for.inc + +for.inc: + %inc = add nsw i1 %i, 1 + %cmp = icmp slt i1 %inc, 1 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +declare void @incf() noreturn +declare void @decf() noreturn +declare void @conv() convergent Index: test/Transforms/SimpleLoopUnswitch/cleanuppad.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/cleanuppad.ll @@ -0,0 +1,44 @@ +; RUN: opt -S -simple-loop-unswitch < %s | FileCheck %s +target triple = "x86_64-pc-win32" + +define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 { +entry: + %tobool = icmp eq i32 %doit, 0 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + br i1 %x, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br i1 %tobool, label %if.then, label %for.inc + +if.then: ; preds = %for.body + br i1 %y, label %for.inc, label %delete.notnull + +delete.notnull: ; preds = %if.then + invoke void @g() + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %delete.notnull + br label %for.inc + +lpad: ; preds = %delete.notnull + %cp = cleanuppad within none [] + cleanupret from %cp unwind to caller + +for.inc: ; preds = %invoke.cont, %if.then, %for.body + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare void @g() + +declare i32 @__CxxFrameHandler3(...) + +; CHECK-LABEL: define void @f( +; CHECK: cleanuppad within none [] +; CHECK-NOT: cleanuppad + +attributes #0 = { ssp uwtable } Index: test/Transforms/SimpleLoopUnswitch/copy-metadata.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/copy-metadata.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s + +; This test checks if unswitched condition preserve make.implicit metadata. +define i32 @test(i1 %cond) { +; CHECK-LABEL: @test( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit, !make.implicit !0 +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + br i1 %cond, label %continue, label %loop_exit, !make.implicit !0 +; CHECK: loop_begin: +; CHECK-NEXT: br label %continue + +continue: + call void @some_func() + br label %loop_begin +; CHECK: continue: +; CHECK-NEXT: call +; CHECK-NEXT: br label %loop_begin + +loop_exit: + ret i32 0 +; CHECK: loop_exit: +; CHECK-NEXT: ret +} + +declare void @some_func() + +!0 = !{} Index: test/Transforms/SimpleLoopUnswitch/crash.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/crash.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output + +define void @test1(i32* %S2) { +entry: + br i1 false, label %list_Length.exit, label %cond_true.i +cond_true.i: ; preds = %entry + ret void +list_Length.exit: ; preds = %entry + br i1 false, label %list_Length.exit9, label %cond_true.i5 +cond_true.i5: ; preds = %list_Length.exit + ret void +list_Length.exit9: ; preds = %list_Length.exit + br i1 false, label %bb78, label %return +bb44: ; preds = %bb78, %cond_next68 + br i1 %tmp49.not, label %bb62, label %bb62.loopexit +bb62.loopexit: ; preds = %bb44 + br label %bb62 +bb62: ; preds = %bb62.loopexit, %bb44 + br i1 false, label %return.loopexit, label %cond_next68 +cond_next68: ; preds = %bb62 + br i1 false, label %return.loopexit, label %bb44 +bb78: ; preds = %list_Length.exit9 + %tmp49.not = icmp eq i32* %S2, null ; [#uses=1] + br label %bb44 +return.loopexit: ; preds = %cond_next68, %bb62 + %retval.0.ph = phi i32 [ 1, %cond_next68 ], [ 0, %bb62 ] ; [#uses=1] + br label %return +return: ; preds = %return.loopexit, %list_Length.exit9 + %retval.0 = phi i32 [ 0, %list_Length.exit9 ], [ %retval.0.ph, %return.loopexit ] ; [#uses=0] + ret void +} + +define void @test2() nounwind { +entry: + br label %bb.nph + +bb.nph: ; preds = %entry + %and.i13521 = and <4 x i1> undef, undef ; <<4 x i1>> [#uses=1] + br label %for.body + +for.body: ; preds = %for.body, %bb.nph + %or.i = select <4 x i1> %and.i13521, <4 x i32> undef, <4 x i32> undef ; <<4 x i32>> [#uses=0] + br i1 false, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +; PR6879 +define i32* @test3(i32** %p_45, i16 zeroext %p_46, i64 %p_47, i64 %p_48, i16 signext %p_49) nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond4, %entry + br i1 false, label %for.cond4, label %for.end88 + +for.cond4: ; preds = %for.cond + %conv46 = trunc i32 0 to i8 ; [#uses=2] + %cmp60 = icmp sgt i8 %conv46, 124 ; [#uses=1] + %or.cond = and i1 undef, %cmp60 ; [#uses=1] + %cond = select i1 %or.cond, i8 %conv46, i8 undef ; [#uses=0] + br label %for.cond + +for.end88: ; preds = %for.cond + ret i32* undef +} Index: test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll @@ -0,0 +1,51 @@ +; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s + +define void @f(i32 %n, i32* %ptr) { +; CHECK-LABEL: @f( +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ] + %iv.inc = add i32 %iv, 1 + %unswitch_cond_root = icmp ne i32 %iv.inc, 42 + %us.0 = and i1 %unswitch_cond_root, %unswitch_cond_root + %us.1 = and i1 %us.0, %us.0 + %us.2 = and i1 %us.1, %us.1 + %us.3 = and i1 %us.2, %us.2 + %us.4 = and i1 %us.3, %us.3 + %us.5 = and i1 %us.4, %us.4 + %us.6 = and i1 %us.5, %us.5 + %us.7 = and i1 %us.6, %us.6 + %us.8 = and i1 %us.7, %us.7 + %us.9 = and i1 %us.8, %us.8 + %us.10 = and i1 %us.9, %us.9 + %us.11 = and i1 %us.10, %us.10 + %us.12 = and i1 %us.11, %us.11 + %us.13 = and i1 %us.12, %us.12 + %us.14 = and i1 %us.13, %us.13 + %us.15 = and i1 %us.14, %us.14 + %us.16 = and i1 %us.15, %us.15 + %us.17 = and i1 %us.16, %us.16 + %us.18 = and i1 %us.17, %us.17 + %us.19 = and i1 %us.18, %us.18 + %us.20 = and i1 %us.19, %us.19 + %us.21 = and i1 %us.20, %us.20 + %us.22 = and i1 %us.21, %us.21 + %us.23 = and i1 %us.22, %us.22 + %us.24 = and i1 %us.23, %us.23 + %us.25 = and i1 %us.24, %us.24 + %us.26 = and i1 %us.25, %us.25 + %us.27 = and i1 %us.26, %us.26 + %us.28 = and i1 %us.27, %us.27 + %us.29 = and i1 %us.28, %us.28 + br i1 %us.29, label %leave, label %be + +be: + store volatile i32 0, i32* %ptr + %becond = icmp ult i32 %iv.inc, %n + br i1 %becond, label %leave, label %loop + +leave: + ret void +} Index: test/Transforms/SimpleLoopUnswitch/infinite-loop.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/infinite-loop.ll @@ -0,0 +1,64 @@ +; REQUIRES: asserts +; RUN: opt -simple-loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s +; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s +; PR5373 + +; Loop unswitching shouldn't trivially unswitch the true case of condition %a +; in the code here because it leads to an infinite loop. While this doesn't +; contain any instructions with side effects, it's still a kind of side effect. +; It can trivially unswitch on the false cas of condition %a though. + +; STATS: 2 simple-loop-unswitch - Number of branches unswitched +; STATS: 2 simple-loop-unswitch - Number of unswitches that are trivial + +; CHECK-LABEL: @func_16( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0 + +; CHECK: entry.split: +; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1 + +; CHECK: entry.split.split: +; CHECK-NEXT: br label %for.body + +; CHECK: for.body: +; CHECK-NEXT: br label %cond.end + +; CHECK: cond.end: +; CHECK-NEXT: br label %for.body + +; CHECK: abort0: +; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]] +; CHECK-NEXT: unreachable + +; CHECK: abort1: +; CHECK-NEXT: call void @end1() [[NOR_NUW]] +; CHECK-NEXT: unreachable + +; CHECK: } + +define void @func_16(i1 %a, i1 %b) nounwind { +entry: + br label %for.body + +for.body: + br i1 %a, label %cond.end, label %abort0 + +cond.end: + br i1 %b, label %for.body, label %abort1 + +abort0: + call void @end0() noreturn nounwind + unreachable + +abort1: + call void @end1() noreturn nounwind + unreachable +} + +declare void @end0() noreturn +declare void @end1() noreturn + +; CHECK: attributes #0 = { nounwind } +; CHECK: attributes #1 = { noreturn } +; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind } Index: test/Transforms/SimpleLoopUnswitch/msan.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/msan.ll @@ -0,0 +1,141 @@ +; RUN: opt -passes='loop(unswitch),verify' -S < %s | FileCheck %s + +declare void @unknown() +declare void @unknown2() + +@y = global i64 0, align 8 + +; The following is approximately: +; void f(bool *x) { +; for (int i = 0; i < 1; ++i) { +; if (*x) { +; if (y) +; unknown(); +; else +; break; +; } +; } +; } +; With MemorySanitizer, the loop can not be unswitched on "y", because "y" could +; be uninitialized when x == false. +; Test that the branch on "y" is inside the loop (after the first unconditional +; branch). + +define void @may_not_execute_trivial(i1* %x) sanitize_memory { +; CHECK-LABEL: @may_not_execute_trivial( +entry: + %y = load i64, i64* @y, align 8 + %y.cmp = icmp eq i64 %y, 0 + br label %for.body +; CHECK: %[[Y:.*]] = load i64, i64* @y +; CHECK: %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0 +; CHECK-NOT: br i1 +; CHECK: br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %x.load = load i1, i1* %x + br i1 %x.load, label %for.inc, label %if.then +; CHECK: %[[XLOAD:.*]] = load i1, i1* %x +; CHECK: br i1 %[[XLOAD]] + +if.then: + br i1 %y.cmp, label %for.end, label %if.then4 +; CHECK: br i1 %[[YCMP]] + +if.then4: + call void @unknown() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, 1 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + + +; The same as above, but "y" is a function parameter instead of a global. +; This shows that it is not enough to suppress hoisting of load instructions, +; the actual problem is in the speculative branching. + +define void @may_not_execute2_trivial(i1* %x, i1 %y) sanitize_memory { +; CHECK-LABEL: @may_not_execute2_trivial( +entry: + br label %for.body +; CHECK-NOT: br i1 +; CHECK: br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %x.load = load i1, i1* %x + br i1 %x.load, label %for.inc, label %if.then +; CHECK: %[[XLOAD:.*]] = load i1, i1* %x +; CHECK: br i1 %[[XLOAD]] + +if.then: + br i1 %y, label %for.end, label %if.then4 +; CHECK: br i1 %y + +if.then4: + call void @unknown() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, 1 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + + +; The following is approximately: +; void f() { +; for (int i = 0; i < 1; ++i) { +; if (y) +; unknown(); +; else +; break; +; } +; } +; "if (y)" is guaranteed to execute; the loop can be unswitched. + +define void @must_execute_trivial() sanitize_memory { +; CHECK-LABEL: @must_execute_trivial( +entry: + %y = load i64, i64* @y, align 8 + %y.cmp = icmp eq i64 %y, 0 + br label %for.body +; CHECK: %[[Y:.*]] = load i64, i64* @y +; CHECK: %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0 +; CHECK: br i1 %[[YCMP]], label %[[EXIT_SPLIT:.*]], label %[[PH:.*]] +; +; CHECK: [[PH]]: +; CHECK: br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + br i1 %y.cmp, label %for.end, label %if.then4 +; CHECK: br label %if.then4 + +if.then4: + call void @unknown() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i, 1 + %cmp = icmp slt i32 %inc, 1 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +; CHECK: for.end: +; CHECK: br label %[[EXIT_SPLIT]] +; +; CHECK: [[EXIT_SPLIT]]: +; CHECK: ret void +} Index: test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll @@ -0,0 +1,129 @@ +; RUN: opt -simple-loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s + +; Loop unswitch should be able to unswitch these loops and +; preserve LCSSA and LoopSimplify forms. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv6-apple-darwin9" + +@delim1 = external global i32 ; [#uses=1] +@delim2 = external global i32 ; [#uses=1] + +define i32 @ineqn(i8* %s, i8* %p) nounwind readonly { +entry: + %0 = load i32, i32* @delim1, align 4 ; [#uses=1] + %1 = load i32, i32* @delim2, align 4 ; [#uses=1] + br label %bb8.outer + +bb: ; preds = %bb8 + %2 = icmp eq i8* %p_addr.0, %s ; [#uses=1] + br i1 %2, label %bb10, label %bb2 + +bb2: ; preds = %bb + %3 = getelementptr inbounds i8, i8* %p_addr.0, i32 1 ; [#uses=3] + switch i32 %ineq.0.ph, label %bb8.backedge [ + i32 0, label %bb3 + i32 1, label %bb6 + ] + +bb8.backedge: ; preds = %bb6, %bb5, %bb2 + br label %bb8 + +bb3: ; preds = %bb2 + %4 = icmp eq i32 %8, %0 ; [#uses=1] + br i1 %4, label %bb8.outer.loopexit, label %bb5 + +bb5: ; preds = %bb3 + br i1 %6, label %bb6, label %bb8.backedge + +bb6: ; preds = %bb5, %bb2 + %5 = icmp eq i32 %8, %1 ; [#uses=1] + br i1 %5, label %bb7, label %bb8.backedge + +bb7: ; preds = %bb6 + %.lcssa1 = phi i8* [ %3, %bb6 ] ; [#uses=1] + br label %bb8.outer.backedge + +bb8.outer.backedge: ; preds = %bb8.outer.loopexit, %bb7 + %.lcssa2 = phi i8* [ %.lcssa1, %bb7 ], [ %.lcssa, %bb8.outer.loopexit ] ; [#uses=1] + %ineq.0.ph.be = phi i32 [ 0, %bb7 ], [ 1, %bb8.outer.loopexit ] ; [#uses=1] + br label %bb8.outer + +bb8.outer.loopexit: ; preds = %bb3 + %.lcssa = phi i8* [ %3, %bb3 ] ; [#uses=1] + br label %bb8.outer.backedge + +bb8.outer: ; preds = %bb8.outer.backedge, %entry + %ineq.0.ph = phi i32 [ 0, %entry ], [ %ineq.0.ph.be, %bb8.outer.backedge ] ; [#uses=3] + %p_addr.0.ph = phi i8* [ %p, %entry ], [ %.lcssa2, %bb8.outer.backedge ] ; [#uses=1] + %6 = icmp eq i32 %ineq.0.ph, 1 ; [#uses=1] + br label %bb8 + +bb8: ; preds = %bb8.outer, %bb8.backedge + %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; [#uses=3] + %7 = load i8, i8* %p_addr.0, align 1 ; [#uses=2] + %8 = sext i8 %7 to i32 ; [#uses=2] + %9 = icmp eq i8 %7, 0 ; [#uses=1] + br i1 %9, label %bb10, label %bb + +bb10: ; preds = %bb8, %bb + %.0 = phi i32 [ %ineq.0.ph, %bb ], [ 0, %bb8 ] ; [#uses=1] + ret i32 %.0 +} + +; This is a simplified form of ineqn from above. It triggers some +; different cases in the loop-unswitch code. + +define void @simplified_ineqn() nounwind readonly { +entry: + br label %bb8.outer + +bb8.outer: ; preds = %bb6, %bb2, %entry + %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; [#uses=1] + br i1 undef, label %return, label %bb2 + +bb2: ; preds = %bb + switch i32 %x, label %bb6 [ + i32 0, label %bb8.outer + ] + +bb6: ; preds = %bb2 + br i1 undef, label %bb8.outer, label %bb2 + +return: ; preds = %bb8, %bb + ret void +} + +; This function requires special handling to preserve LCSSA form. +; PR4934 + +define void @pnp_check_irq() nounwind noredzone { +entry: + %conv56 = trunc i64 undef to i32 ; [#uses=1] + br label %while.cond.i + +while.cond.i: ; preds = %while.cond.i.backedge, %entry + %call.i25 = call i8* @pci_get_device() nounwind noredzone ; [#uses=2] + br i1 undef, label %if.then65, label %while.body.i + +while.body.i: ; preds = %while.cond.i + br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge + +while.cond.i.backedge: ; preds = %if.then31.i.i, %while.body.i + br label %while.cond.i + +if.then31.i.i: ; preds = %while.body.i + switch i32 %conv56, label %while.cond.i.backedge [ + i32 14, label %if.then42.i.i + i32 15, label %if.then42.i.i + ] + +if.then42.i.i: ; preds = %if.then31.i.i, %if.then31.i.i + %call.i25.lcssa48 = phi i8* [ %call.i25, %if.then31.i.i ], [ %call.i25, %if.then31.i.i ] ; [#uses=0] + unreachable + +if.then65: ; preds = %while.cond.i + unreachable +} + +declare i8* @pci_get_device() noredzone Index: test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll @@ -0,0 +1,233 @@ +; RUN: opt -passes='loop(unswitch),verify' -S < %s | FileCheck %s + +declare void @some_func() noreturn + +; This test contains two trivial unswitch condition in one loop. +; LoopUnswitch pass should be able to unswitch the second one +; after unswitching the first one. +define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test1( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split +; +; CHECK: entry.split: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split.split, label %loop_exit +; +; CHECK: entry.split.split: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + br i1 %cond1, label %continue, label %loop_exit ; first trivial condition +; CHECK: loop_begin: +; CHECK-NEXT: br label %continue + +continue: + %var_val = load i32, i32* %var + br i1 %cond2, label %do_something, label %loop_exit ; second trivial condition +; CHECK: continue: +; CHECK-NEXT: load +; CHECK-NEXT: br label %do_something + +do_something: + call void @some_func() noreturn nounwind + br label %loop_begin +; CHECK: do_something: +; CHECK-NEXT: call +; CHECK-NEXT: br label %loop_begin + +loop_exit: + ret i32 0 +; CHECK: loop_exit: +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: ret +} + +; We will not be able trivially unswitch on the SwitchInst, as its input +; is a constant. However, since its a constant we should be able to figure +; out that the switch can be folded into a unconditional branch to %continue. +; Then we unswitch on the br inst in %continue. +define i32 @test2(i32* %var, i1 %cond1) { +; CHECK-LABEL: @test2( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + switch i32 1, label %continue [ + i32 0, label %loop_exit + i32 1, label %continue + ] +; CHECK: loop_begin: +; CHECK-NEXT: switch i32 1, label %continue [ +; CHECK-NEXT: i32 0, label %loop_exit +; CHECK-NEXT: i32 1, label %continue +; CHECK-NEXT: ] + +continue: + %var_val = load i32, i32* %var + br i1 %cond1, label %do_something, label %loop_exit +; CHECK: continue: +; CHECK-NEXT: load +; CHECK-NEXT: br label %do_something + +do_something: + call void @some_func() noreturn nounwind + br label %loop_begin +; CHECK: do_something: +; CHECK-NEXT: call +; CHECK-NEXT: br label %loop_begin + +loop_exit: + ret i32 0 +; CHECK: loop_exit: +; CHECK-NEXT: br label %loop_exit.split +; +; CHECK: loop_exit.split: +; CHECK-NEXT: ret +} + +; Test for two trivially unswitchable switches. +define i32 @test3(i32* %var, i32 %cond1, i32 %cond2) { +; CHECK-LABEL: @test3( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 %cond1, label %entry.split [ +; CHECK-NEXT: i32 0, label %loop_exit1 +; CHECK-NEXT: ] +; +; CHECK: entry.split: +; CHECK-NEXT: switch i32 %cond2, label %loop_exit2 [ +; CHECK-NEXT: i32 42, label %loop_exit2 +; CHECK-NEXT: i32 0, label %entry.split.split +; CHECK-NEXT: ] +; +; CHECK: entry.split.split: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + switch i32 %cond1, label %continue [ + i32 0, label %loop_exit1 + ] +; CHECK: loop_begin: +; CHECK-NEXT: br label %continue + +continue: + %var_val = load i32, i32* %var + switch i32 %cond2, label %loop_exit2 [ + i32 0, label %do_something + i32 42, label %loop_exit2 + ] +; CHECK: continue: +; CHECK-NEXT: load +; CHECK-NEXT: br label %do_something + +do_something: + call void @some_func() noreturn nounwind + br label %loop_begin +; CHECK: do_something: +; CHECK-NEXT: call +; CHECK-NEXT: br label %loop_begin + +loop_exit1: + ret i32 0 +; CHECK: loop_exit1: +; CHECK-NEXT: ret + +loop_exit2: + ret i32 0 +; CHECK: loop_exit2: +; CHECK-NEXT: ret +; +; We shouldn't have any unreachable blocks here because the unswitched switches +; turn into branches instead. +; CHECK-NOT: unreachable +} + +; Test for a trivially unswitchable switch with multiple exiting cases and +; multiple looping cases. +define i32 @test4(i32* %var, i32 %cond1, i32 %cond2) { +; CHECK-LABEL: @test4( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 %cond2, label %loop_exit2 [ +; CHECK-NEXT: i32 13, label %loop_exit1 +; CHECK-NEXT: i32 42, label %loop_exit3 +; CHECK-NEXT: i32 0, label %entry.split +; CHECK-NEXT: i32 1, label %entry.split +; CHECK-NEXT: i32 2, label %entry.split +; CHECK-NEXT: ] +; +; CHECK: entry.split: +; CHECK-NEXT: br label %loop_begin + +loop_begin: + %var_val = load i32, i32* %var + switch i32 %cond2, label %loop_exit2 [ + i32 0, label %loop0 + i32 1, label %loop1 + i32 13, label %loop_exit1 + i32 2, label %loop2 + i32 42, label %loop_exit3 + ] +; CHECK: loop_begin: +; CHECK-NEXT: load +; CHECK-NEXT: switch i32 %cond2, label %[[UNREACHABLE:.*]] [ +; CHECK-NEXT: i32 0, label %loop0 +; CHECK-NEXT: i32 1, label %loop1 +; CHECK-NEXT: i32 2, label %loop2 +; CHECK-NEXT: ] + +loop0: + call void @some_func() noreturn nounwind + br label %loop_latch +; CHECK: loop0: +; CHECK-NEXT: call +; CHECK-NEXT: br label %loop_latch + +loop1: + call void @some_func() noreturn nounwind + br label %loop_latch +; CHECK: loop1: +; CHECK-NEXT: call +; CHECK-NEXT: br label %loop_latch + +loop2: + call void @some_func() noreturn nounwind + br label %loop_latch +; CHECK: loop2: +; CHECK-NEXT: call +; CHECK-NEXT: br label %loop_latch + +loop_latch: + br label %loop_begin +; CHECK: loop_latch: +; CHECK-NEXT: br label %loop_begin + +loop_exit1: + ret i32 0 +; CHECK: loop_exit1: +; CHECK-NEXT: ret + +loop_exit2: + ret i32 0 +; CHECK: loop_exit2: +; CHECK-NEXT: ret + +loop_exit3: + ret i32 0 +; CHECK: loop_exit3: +; CHECK-NEXT: ret +; +; CHECK: [[UNREACHABLE]]: +; CHECK-NEXT: unreachable +}