Index: llvm/trunk/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h +++ llvm/trunk/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h @@ -17,9 +17,9 @@ namespace llvm { -/// This pass transforms loops that contain branches on loop-invariant -/// conditions to have multiple loops. For example, it turns the left into the -/// right code: +/// This pass transforms loops that contain branches or switches on loop- +/// invariant conditions to have multiple loops. For example, it turns the left +/// into the right code: /// /// for (...) if (lic) /// A for (...) @@ -35,6 +35,31 @@ /// This pass expects LICM to be run before it to hoist invariant conditions out /// of the loop, to make the unswitching opportunity obvious. /// +/// There is a taxonomy of unswitching that we use to classify different forms +/// of this transformaiton: +/// +/// - Trival unswitching: this is when the condition can be unswitched without +/// cloning any code from inside the loop. A non-trivial unswitch requires +/// code duplication. +/// +/// - Full unswitching: this is when the branch or switch is completely moved +/// from inside the loop to outside the loop. Partial unswitching removes the +/// branch from the clone of the loop but must leave a (somewhat simplified) +/// branch in the original loop. While theoretically partial unswitching can +/// be done for switches, the requirements are extreme - we need the loop +/// invariant input to the switch to be sufficient to collapse to a single +/// successor in each clone. +/// +/// This pass always does trivial, full unswitching for both branches and +/// switches. For branches, it also always does trivial, partial unswitching. +/// +/// If enabled (via the constructor's `NonTrivial` parameter), this pass will +/// additionally do non-trivial, full unswitching for branches and switches, and +/// will do non-trivial, partial unswitching for branches. +/// +/// Because partial unswitching of switches is extremely unlikely to be possible +/// in practice and significantly complicates the implementation, this pass does +/// not currently implement that in any mode. class SimpleLoopUnswitchPass : public PassInfoMixin { bool NonTrivial; Index: llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -715,8 +715,12 @@ /// /// This routine handles cloning all of the necessary loop blocks and exit /// blocks including rewriting their instructions and the relevant PHI nodes. -/// It skips loop and exit blocks that are not necessary based on the provided -/// set. It also correctly creates the unconditional branch in the cloned +/// Any loop blocks or exit blocks which are dominated by a different successor +/// than the one for this clone of the loop blocks can be trivially skipped. We +/// use the `DominatingSucc` map to determine whether a block satisfies that +/// property with a simple map lookup. +/// +/// It also correctly creates the unconditional branch in the cloned /// unswitched parent block to only point at the unswitched successor. /// /// This does not handle most of the necessary updates to `LoopInfo`. Only exit @@ -730,7 +734,7 @@ Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, - const SmallPtrSetImpl &SkippedLoopAndExitBlocks, + const SmallDenseMap &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI) { @@ -751,19 +755,26 @@ return NewBB; }; + // We skip cloning blocks when they have a dominating succ that is not the + // succ we are cloning for. + auto SkipBlock = [&](BasicBlock *BB) { + auto It = DominatingSucc.find(BB); + return It != DominatingSucc.end() && It->second != UnswitchedSuccBB; + }; + // First, clone the preheader. auto *ClonedPH = CloneBlock(LoopPH); // Then clone all the loop blocks, skipping the ones that aren't necessary. for (auto *LoopBB : L.blocks()) - if (!SkippedLoopAndExitBlocks.count(LoopBB)) + if (!SkipBlock(LoopBB)) CloneBlock(LoopBB); // Split all the loop exit edges so that when we clone the exit blocks, if // any of the exit blocks are *also* a preheader for some other loop, we // don't create multiple predecessors entering the loop header. for (auto *ExitBB : ExitBlocks) { - if (SkippedLoopAndExitBlocks.count(ExitBB)) + if (SkipBlock(ExitBB)) continue; // When we are going to clone an exit, we don't need to clone all the @@ -841,7 +852,7 @@ // Update any PHI nodes in the cloned successors of the skipped blocks to not // have spurious incoming values. for (auto *LoopBB : L.blocks()) - if (SkippedLoopAndExitBlocks.count(LoopBB)) + if (SkipBlock(LoopBB)) for (auto *SuccBB : successors(LoopBB)) if (auto *ClonedSuccBB = cast_or_null(VMap.lookup(SuccBB))) for (PHINode &PN : ClonedSuccBB->phis()) @@ -1175,10 +1186,41 @@ } static void +deleteDeadClonedBlocks(Loop &L, ArrayRef ExitBlocks, + ArrayRef> VMaps, + DominatorTree &DT) { + // Find all the dead clones, and remove them from their successors. + SmallVector DeadBlocks; + for (BasicBlock *BB : llvm::concat(L.blocks(), ExitBlocks)) + for (auto &VMap : VMaps) + if (BasicBlock *ClonedBB = cast_or_null(VMap->lookup(BB))) + if (!DT.isReachableFromEntry(ClonedBB)) { + for (BasicBlock *SuccBB : successors(ClonedBB)) + SuccBB->removePredecessor(ClonedBB); + DeadBlocks.push_back(ClonedBB); + } + + // Drop any remaining references to break cycles. + for (BasicBlock *BB : DeadBlocks) + BB->dropAllReferences(); + // Erase them from the IR. + for (BasicBlock *BB : DeadBlocks) + BB->eraseFromParent(); +} + +static void deleteDeadBlocksFromLoop(Loop &L, - const SmallVectorImpl &DeadBlocks, SmallVectorImpl &ExitBlocks, DominatorTree &DT, LoopInfo &LI) { + // Find all the dead blocks, and remove them from their successors. + SmallVector DeadBlocks; + for (BasicBlock *BB : llvm::concat(L.blocks(), ExitBlocks)) + if (!DT.isReachableFromEntry(BB)) { + for (BasicBlock *SuccBB : successors(BB)) + SuccBB->removePredecessor(BB); + DeadBlocks.push_back(BB); + } + SmallPtrSet DeadBlockSet(DeadBlocks.begin(), DeadBlocks.end()); @@ -1187,11 +1229,6 @@ llvm::erase_if(ExitBlocks, [&](BasicBlock *BB) { return DeadBlockSet.count(BB); }); - // Remove these blocks from their successors. - for (auto *BB : DeadBlocks) - for (BasicBlock *SuccBB : successors(BB)) - SuccBB->removePredecessor(BB, /*DontDeleteUselessPHIs*/ true); - // Walk from this loop up through its parents removing all of the dead blocks. for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) { for (auto *BB : DeadBlocks) @@ -1582,31 +1619,24 @@ } while (!DomWorklist.empty()); } -/// Take an invariant branch that has been determined to be safe and worthwhile -/// to unswitch despite being non-trivial to do so and perform the unswitch. -/// -/// This directly updates the CFG to hoist the predicate out of the loop, and -/// clone the necessary parts of the loop to maintain behavior. -/// -/// It also updates both dominator tree and loopinfo based on the unswitching. -/// -/// Once unswitching has been performed it runs the provided callback to report -/// the new loops and no-longer valid loops to the caller. -static bool unswitchInvariantBranch( - Loop &L, BranchInst &BI, ArrayRef Invariants, DominatorTree &DT, - LoopInfo &LI, AssumptionCache &AC, +static bool unswitchNontrivialInvariants( + Loop &L, TerminatorInst &TI, ArrayRef Invariants, + DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref)> UnswitchCB) { - auto *ParentBB = BI.getParent(); - - // We can only unswitch conditional branches with an invariant condition or - // combining invariant conditions with an instruction. - assert(BI.isConditional() && "Can only unswitch a conditional branch!"); - bool FullUnswitch = BI.getCondition() == Invariants[0]; + auto *ParentBB = TI.getParent(); + BranchInst *BI = dyn_cast(&TI); + SwitchInst *SI = BI ? nullptr : cast(&TI); + + // We can only unswitch switches, conditional branches with an invariant + // condition, or combining invariant conditions with an instruction. + assert((SI || BI->isConditional()) && + "Can only unswitch switches and conditional branch!"); + bool FullUnswitch = SI || BI->getCondition() == Invariants[0]; if (FullUnswitch) assert(Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"); else - assert(isa(BI.getCondition()) && + assert(isa(BI->getCondition()) && "Partial unswitching requires an instruction as the condition!"); // Constant and BBs tracking the cloned and continuing successor. When we are @@ -1618,18 +1648,27 @@ bool Direction = true; int ClonedSucc = 0; if (!FullUnswitch) { - if (cast(BI.getCondition())->getOpcode() != Instruction::Or) { - assert(cast(BI.getCondition())->getOpcode() == Instruction::And && - "Only `or` and `and` instructions can combine invariants being unswitched."); + if (cast(BI->getCondition())->getOpcode() != Instruction::Or) { + assert(cast(BI->getCondition())->getOpcode() == + Instruction::And && + "Only `or` and `and` instructions can combine invariants being " + "unswitched."); Direction = false; ClonedSucc = 1; } } - auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc); - auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc); - assert(UnswitchedSuccBB != ContinueSuccBB && - "Should not unswitch a branch that always goes to the same place!"); + BasicBlock *RetainedSuccBB = + BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest(); + SmallSetVector UnswitchedSuccBBs; + if (BI) + UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc)); + else + for (auto Case : SI->cases()) + UnswitchedSuccBBs.insert(Case.getCaseSuccessor()); + + assert(!UnswitchedSuccBBs.count(RetainedSuccBB) && + "Should not unswitch the same successor we are retaining!"); // The branch should be in this exact loop. Any inner loop's invariant branch // should be handled by unswitching that inner loop. The caller of this @@ -1648,9 +1687,6 @@ if (isa(ExitBB->getFirstNonPHI())) return false; - SmallPtrSet ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - // Compute the parent loop now before we start hacking on things. Loop *ParentL = L.getParentLoop(); @@ -1669,30 +1705,22 @@ OuterExitL = NewOuterExitL; } - // If the edge we *aren't* cloning in the unswitch (the continuing edge) - // dominates its target, we can skip cloning the dominated region of the loop - // and its exits. We compute this as a set of nodes to be skipped. - SmallPtrSet SkippedLoopAndExitBlocks; - if (ContinueSuccBB->getUniquePredecessor() || - llvm::all_of(predecessors(ContinueSuccBB), [&](BasicBlock *PredBB) { - return PredBB == ParentBB || DT.dominates(ContinueSuccBB, PredBB); - })) { - visitDomSubTree(DT, ContinueSuccBB, [&](BasicBlock *BB) { - SkippedLoopAndExitBlocks.insert(BB); - return true; - }); - } - // If we are doing full unswitching, then similarly to the above, the edge we - // *are* cloning in the unswitch (the unswitched edge) dominates its target, - // we will end up with dead nodes in the original loop and its exits that will - // need to be deleted. Here, we just retain that the property holds and will - // compute the deleted set later. - bool DeleteUnswitchedSucc = - FullUnswitch && - (UnswitchedSuccBB->getUniquePredecessor() || - llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) { - return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB); - })); + // If the edge from this terminator to a successor dominates that successor, + // store a map from each block in its dominator subtree to it. This lets us + // tell when cloning for a particular successor if a block is dominated by + // some *other* successor with a single data structure. We use this to + // significantly reduce cloning. + SmallDenseMap DominatingSucc; + for (auto *SuccBB : llvm::concat( + makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs)) + if (SuccBB->getUniquePredecessor() || + llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { + return PredBB == ParentBB || DT.dominates(SuccBB, PredBB); + })) + visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) { + DominatingSucc[BB] = SuccBB; + return true; + }); // Split the preheader, so that we know that there is a safe place to insert // the conditional branch. We will change the preheader to have a conditional @@ -1702,84 +1730,93 @@ BasicBlock *SplitBB = L.getLoopPreheader(); BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI); - // Keep a mapping for the cloned values. - ValueToValueMapTy VMap; - // Keep track of the dominator tree updates needed. SmallVector DTUpdates; - // Build the cloned blocks from the loop. - auto *ClonedPH = buildClonedLoopBlocks( - L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB, - ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI); + // Clone the loop for each unswitched successor. + SmallVector, 4> VMaps; + VMaps.reserve(UnswitchedSuccBBs.size()); + SmallDenseMap ClonedPHs; + for (auto *SuccBB : UnswitchedSuccBBs) { + VMaps.emplace_back(new ValueToValueMapTy()); + ClonedPHs[SuccBB] = buildClonedLoopBlocks( + L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB, + DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI); + } // The stitching of the branched code back together depends on whether we're // doing full unswitching or not with the exception that we always want to // nuke the initial terminator placed in the split block. SplitBB->getTerminator()->eraseFromParent(); if (FullUnswitch) { - // Remove the parent as a predecessor of the - // unswitched successor. - UnswitchedSuccBB->removePredecessor(ParentBB, - /*DontDeleteUselessPHIs*/ true); - DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB}); - - // Now splice the branch from the original loop and use it to select between - // the two loops. - SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI); - BI.setSuccessor(ClonedSucc, ClonedPH); - BI.setSuccessor(1 - ClonedSucc, LoopPH); + for (BasicBlock *SuccBB : UnswitchedSuccBBs) { + // Remove the parent as a predecessor of the unswitched successor. + SuccBB->removePredecessor(ParentBB, + /*DontDeleteUselessPHIs*/ true); + DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB}); + } + + // Now splice the terminator from the original loop and rewrite its + // successors. + SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI); + if (BI) { + assert(UnswitchedSuccBBs.size() == 1 && + "Only one possible unswitched block for a branch!"); + BasicBlock *ClonedPH = ClonedPHs.begin()->second; + BI->setSuccessor(ClonedSucc, ClonedPH); + BI->setSuccessor(1 - ClonedSucc, LoopPH); + DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); + } else { + assert(SI && "Must either be a branch or switch!"); + + // Walk the cases and directly update their successors. + for (auto &Case : SI->cases()) + Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); + // We need to use the set to populate domtree updates as even when there + // are multiple cases pointing at the same successor we only want to + // insert one edge in the domtree. + for (BasicBlock *SuccBB : UnswitchedSuccBBs) + DTUpdates.push_back( + {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second}); + + SI->setDefaultDest(LoopPH); + } // Create a new unconditional branch to the continuing block (as opposed to // the one cloned). - BranchInst::Create(ContinueSuccBB, ParentBB); + BranchInst::Create(RetainedSuccBB, ParentBB); } else { + assert(BI && "Only branches have partial unswitching."); + assert(UnswitchedSuccBBs.size() == 1 && + "Only one possible unswitched block for a branch!"); + BasicBlock *ClonedPH = ClonedPHs.begin()->second; // When doing a partial unswitch, we have to do a bit more work to build up // the branch in the split block. buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH); + DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); } - // Before we update the dominator tree, collect the dead blocks if we're going - // to end up deleting the unswitched successor. - SmallVector DeadBlocks; - if (DeleteUnswitchedSucc) { - DeadBlocks.push_back(UnswitchedSuccBB); - for (int i = 0; i < (int)DeadBlocks.size(); ++i) { - // If we reach an exit block, stop recursing as the unswitched loop will - // end up reaching the merge block which we make the successor of the - // exit. - if (ExitBlockSet.count(DeadBlocks[i])) - continue; - - // Insert the children that are within the loop or exit block set. Other - // children may reach out of the loop. While we don't expect these to be - // dead (as the unswitched clone should reach them) we don't try to prove - // that here. - for (DomTreeNode *ChildN : *DT[DeadBlocks[i]]) - if (L.contains(ChildN->getBlock()) || - ExitBlockSet.count(ChildN->getBlock())) - DeadBlocks.push_back(ChildN->getBlock()); - } - } - - // Add the remaining edge to our updates and apply them to get an up-to-date - // dominator tree. Note that this will cause the dead blocks above to be - // unreachable and no longer in the dominator tree. - DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); + // Apply the updates accumulated above to get an up-to-date dominator tree. DT.applyUpdates(DTUpdates); + // Now that we have an accurate dominator tree, first delete the dead cloned + // blocks so that we can accurately build any cloned loops. It is important to + // not delete the blocks from the original loop yet because we still want to + // reference the original loop to understand the cloned loop's structure. + deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT); + // Build the cloned loop structure itself. This may be substantially // different from the original structure due to the simplified CFG. This also // handles inserting all the cloned blocks into the correct loops. SmallVector NonChildClonedLoops; - buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops); - - // Delete anything that was made dead in the original loop due to - // unswitching. - if (!DeadBlocks.empty()) - deleteDeadBlocksFromLoop(L, DeadBlocks, ExitBlocks, DT, LI); + for (std::unique_ptr &VMap : VMaps) + buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops); + // Now that our cloned loops have been built, we can update the original loop. + // First we delete the dead blocks from it and then we rebuild the loop + // structure taking these deletions into account. + deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI); SmallVector HoistedLoops; bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops); @@ -1790,31 +1827,37 @@ // verification steps. assert(DT.verify(DominatorTree::VerificationLevel::Fast)); - // Now we want to replace all the uses of the invariants within both the - // original and cloned blocks. We do this here so that we can use the now - // updated dominator tree to identify which side the users are on. - ConstantInt *UnswitchedReplacement = - Direction ? ConstantInt::getTrue(BI.getContext()) - : ConstantInt::getFalse(BI.getContext()); - ConstantInt *ContinueReplacement = - Direction ? ConstantInt::getFalse(BI.getContext()) - : ConstantInt::getTrue(BI.getContext()); - for (Value *Invariant : Invariants) - for (auto UI = Invariant->use_begin(), UE = Invariant->use_end(); - UI != UE;) { - // Grab the use and walk past it so we can clobber it in the use list. - Use *U = &*UI++; - Instruction *UserI = dyn_cast(U->getUser()); - if (!UserI) - continue; + if (BI) { + // If we unswitched a branch which collapses the condition to a known + // constant we want to replace all the uses of the invariants within both + // the original and cloned blocks. We do this here so that we can use the + // now updated dominator tree to identify which side the users are on. + assert(UnswitchedSuccBBs.size() == 1 && + "Only one possible unswitched block for a branch!"); + BasicBlock *ClonedPH = ClonedPHs.begin()->second; + ConstantInt *UnswitchedReplacement = + Direction ? ConstantInt::getTrue(BI->getContext()) + : ConstantInt::getFalse(BI->getContext()); + ConstantInt *ContinueReplacement = + Direction ? ConstantInt::getFalse(BI->getContext()) + : ConstantInt::getTrue(BI->getContext()); + for (Value *Invariant : Invariants) + for (auto UI = Invariant->use_begin(), UE = Invariant->use_end(); + UI != UE;) { + // Grab the use and walk past it so we can clobber it in the use list. + Use *U = &*UI++; + Instruction *UserI = dyn_cast(U->getUser()); + if (!UserI) + continue; - // Replace it with the 'continue' side if in the main loop body, and the - // unswitched if in the cloned blocks. - if (DT.dominates(LoopPH, UserI->getParent())) - U->set(ContinueReplacement); - else if (DT.dominates(ClonedPH, UserI->getParent())) - U->set(UnswitchedReplacement); - } + // Replace it with the 'continue' side if in the main loop body, and the + // unswitched if in the cloned blocks. + if (DT.dominates(LoopPH, UserI->getParent())) + U->set(ContinueReplacement); + else if (DT.dominates(ClonedPH, UserI->getParent())) + U->set(UnswitchedReplacement); + } + } // We can change which blocks are exit blocks of all the cloned sibling // loops, the current loop, and any parent loops which shared exit blocks @@ -1937,8 +1980,16 @@ if (LI.getLoopFor(BB) != &L) continue; + if (auto *SI = dyn_cast(BB->getTerminator())) { + // We can only consider fully loop-invariant switch conditions as we need + // to completely eliminate the switch after unswitching. + if (!isa(SI->getCondition()) && + L.isLoopInvariant(SI->getCondition())) + UnswitchCandidates.push_back({SI, {SI->getCondition()}}); + continue; + } + auto *BI = dyn_cast(BB->getTerminator()); - // FIXME: Handle switches here! if (!BI || !BI->isConditional() || isa(BI->getCondition()) || BI->getSuccessor(0) == BI->getSuccessor(1)) continue; @@ -2091,9 +2142,9 @@ TerminatorInst &TI = *TerminatorAndInvariants.first; ArrayRef Invariants = TerminatorAndInvariants.second; BranchInst *BI = dyn_cast(&TI); - int CandidateCost = - ComputeUnswitchedCost(TI, /*FullUnswitch*/ Invariants.size() == 1 && BI && - Invariants[0] == BI->getCondition()); + int CandidateCost = ComputeUnswitchedCost( + TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 && + Invariants[0] == BI->getCondition())); LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost << " for unswitch candidate: " << TI << "\n"); if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) { @@ -2109,17 +2160,11 @@ return false; } - auto *UnswitchBI = dyn_cast(BestUnswitchTI); - if (!UnswitchBI) { - // FIXME: Add support for unswitching a switch here! - LLVM_DEBUG(dbgs() << "Cannot unswitch anything but a branch!\n"); - return false; - } - LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = " - << BestUnswitchCost << ") branch: " << *UnswitchBI << "\n"); - return unswitchInvariantBranch(L, *UnswitchBI, BestUnswitchInvariants, DT, LI, - AC, UnswitchCB); + << BestUnswitchCost << ") terminator: " << *BestUnswitchTI + << "\n"); + return unswitchNontrivialInvariants( + L, *BestUnswitchTI, BestUnswitchInvariants, DT, LI, AC, UnswitchCB); } /// Unswitch control flow predicated on loop invariant conditions. Index: llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll =================================================================== --- llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll +++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll @@ -387,7 +387,7 @@ loop_b: %b = load i32, i32* %b.ptr br i1 %v, label %loop_begin, label %loop_exit -; The 'loop_b' unswitched loop. +; The original loop, now non-looping due to unswitching.. ; ; CHECK: entry.split: ; CHECK-NEXT: br label %loop_begin @@ -398,14 +398,13 @@ ; CHECK-NEXT: br label %loop_exit.split ; ; CHECK: loop_exit.split: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin ] ; CHECK-NEXT: br label %loop_exit loop_exit: %ab.phi = phi i32 [ %b, %loop_b ], [ %a, %loop_begin ] ret i32 %ab.phi ; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA]], %loop_exit.split.us ] +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A]], %loop_exit.split ], [ %[[B_LCSSA]], %loop_exit.split.us ] ; CHECK-NEXT: ret i32 %[[AB_PHI]] } @@ -458,8 +457,7 @@ call void @sink1(i32 %a.phi) ret void ; CHECK: loop_exit1: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit1.split.us ] -; CHECK-NEXT: call void @sink1(i32 %[[A_PHI]]) +; CHECK-NEXT: call void @sink1(i32 %[[A_LCSSA]]) ; CHECK-NEXT: ret void loop_exit2: @@ -467,8 +465,8 @@ call void @sink2(i32 %b.phi) ret void ; CHECK: loop_exit2: -; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B]], %loop_b ] -; CHECK-NEXT: call void @sink2(i32 %[[B_PHI]]) +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] +; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]]) ; CHECK-NEXT: ret void } @@ -531,8 +529,7 @@ call void @sink2(i32 %b.phi) ret void ; CHECK: loop_exit2: -; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit2.split.us ] -; CHECK-NEXT: call void @sink2(i32 %[[B_PHI]]) +; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]]) ; CHECK-NEXT: ret void } @@ -587,8 +584,7 @@ call void @sink1(i32 %a.phi) br label %exit ; CHECK: loop_exit1: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit1.split.us ] -; CHECK-NEXT: call void @sink1(i32 %[[A_PHI]]) +; CHECK-NEXT: call void @sink1(i32 %[[A_LCSSA]]) ; CHECK-NEXT: br label %exit loop_exit2: @@ -596,8 +592,8 @@ call void @sink2(i32 %b.phi) br label %exit ; CHECK: loop_exit2: -; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B]], %loop_b ] -; CHECK-NEXT: call void @sink2(i32 %[[B_PHI]]) +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ] +; CHECK-NEXT: call void @sink2(i32 %[[B_LCSSA]]) ; CHECK-NEXT: br label %exit exit: @@ -663,7 +659,7 @@ %v2 = load i1, i1* %ptr br i1 %v2, label %loop_begin, label %loop_exit ; CHECK: loop_latch: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_b ] +; CHECK-NEXT: %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_b ] ; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr ; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.loopexit1 @@ -671,15 +667,14 @@ %ab.phi = phi i32 [ %a, %inner_loop_begin ], [ %b.phi, %loop_latch ] ret i32 %ab.phi ; CHECK: loop_exit.loopexit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit.split.us ] ; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit.loopexit1: -; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_latch ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %loop_latch ] ; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit: -; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_PHI]], %loop_exit.loopexit ], [ %[[B_PHI]], %loop_exit.loopexit1 ] +; CHECK-NEXT: %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[B_LCSSA]], %loop_exit.loopexit1 ] ; CHECK-NEXT: ret i32 %[[AB_PHI]] } @@ -773,11 +768,10 @@ ; CHECK-NEXT: br label %latch ; ; CHECK: latch: -; CHECK-NEXT: %[[B_PHI:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %loop_b_inner_exit ] ; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split ; ; CHECK: loop_exit.split: -; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_PHI]], %latch ] +; CHECK-NEXT: %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %latch ] ; CHECK-NEXT: br label %loop_exit loop_exit: @@ -1466,7 +1460,6 @@ %v = load i1, i1* %ptr br i1 %v, label %loop_begin, label %loop_exit ; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.split.us ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr ; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit @@ -1474,7 +1467,7 @@ %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] ret i32 %a.lcssa ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ] +; CHECK-NEXT: %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ] ; CHECK-NEXT: ret i32 %[[A_LCSSA]] } @@ -1555,7 +1548,7 @@ ret i32 %a.lcssa ; CHECK: loop_exit: ; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_PHI_US]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[AB_PHI]] +; CHECK-NEXT: ret i32 %[[A_PHI]] } ; Test that requires re-forming dedicated exits for the original loop. @@ -1635,7 +1628,7 @@ ret i32 %a.lcssa ; CHECK: loop_exit: ; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_PHI_SPLIT]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ] -; CHECK-NEXT: ret i32 %[[AB_PHI]] +; CHECK-NEXT: ret i32 %[[A_PHI]] } ; Check that if a cloned inner loop after unswitching doesn't loop and directly @@ -1721,7 +1714,6 @@ %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] ret i32 %a.lcssa ; CHECK: loop_exit.loopexit: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit.split.us ] ; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit.loopexit1: @@ -1729,7 +1721,7 @@ ; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit: -; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ] +; CHECK-NEXT: %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ] ; CHECK-NEXT: ret i32 %[[A_PHI]] } @@ -1802,7 +1794,6 @@ %v3 = load i1, i1* %ptr br i1 %v3, label %loop_latch, label %loop_exit ; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.split.us ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr ; CHECK-NEXT: br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1 @@ -1819,7 +1810,7 @@ ; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit.loopexit1: -; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ] +; CHECK-NEXT: %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ] ; CHECK-NEXT: br label %loop_exit ; ; CHECK: loop_exit: @@ -1916,7 +1907,6 @@ %v4 = load i1, i1* %ptr br i1 %v4, label %loop_begin, label %loop_exit ; CHECK: inner_loop_exit.loopexit: -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ] ; CHECK-NEXT: br label %inner_loop_exit ; ; CHECK: inner_loop_exit.loopexit1: @@ -1924,7 +1914,7 @@ ; CHECK-NEXT: br label %inner_loop_exit ; ; CHECK: inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ] +; CHECK-NEXT: %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr ; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit @@ -2010,7 +2000,6 @@ %v3 = load i1, i1* %ptr br i1 %v3, label %inner_loop_latch, label %inner_loop_exit ; CHECK: inner_inner_loop_exit: -; CHECK-NEXT: %[[A_INNER_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit.split.us ] ; CHECK-NEXT: %[[V:.*]] = load i1, i1* %ptr ; CHECK-NEXT: br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1 @@ -2028,7 +2017,7 @@ ; CHECK-NEXT: br label %inner_loop_exit ; ; CHECK: inner_loop_exit.loopexit1: -; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_PHI]], %inner_inner_loop_exit ] +; CHECK-NEXT: %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit ] ; CHECK-NEXT: br label %inner_loop_exit ; ; CHECK: inner_loop_exit: @@ -2296,56 +2285,96 @@ entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop_begin +; CHECK-NEXT: switch i32 %cond2, label %[[ENTRY_SPLIT_EXIT:.*]] [ +; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_A]] +; CHECK-NEXT: i32 13, label %[[ENTRY_SPLIT_B:.*]] +; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_A]] +; CHECK-NEXT: i32 42, label %[[ENTRY_SPLIT_C:.*]] +; CHECK-NEXT: ] loop_begin: %var_val = load i32, i32* %var - switch i32 %cond2, label %loop_a [ - i32 0, label %loop_b - i32 1, label %loop_b - i32 13, label %loop_c - i32 2, label %loop_b - i32 42, label %loop_exit + switch i32 %cond2, label %loop_exit [ + i32 0, label %loop_a + i32 1, label %loop_a + i32 13, label %loop_b + i32 2, label %loop_a + i32 42, label %loop_c ] -; CHECK: loop_begin: -; CHECK-NEXT: %[[V:.*]] = load i32, i32* %var -; CHECK-NEXT: switch i32 %cond2, label %loop_a [ -; CHECK-NEXT: i32 0, label %loop_b -; CHECK-NEXT: i32 1, label %loop_b -; CHECK-NEXT: i32 13, label %loop_c -; CHECK-NEXT: i32 2, label %loop_b -; CHECK-NEXT: i32 42, label %loop_exit -; CHECK-NEXT: ] loop_a: call void @a() br label %loop_latch -; CHECK: loop_a: +; Unswitched 'a' loop. +; +; CHECK: [[ENTRY_SPLIT_A]]: +; CHECK-NEXT: br label %[[LOOP_BEGIN_A:.*]] +; +; CHECK: [[LOOP_BEGIN_A]]: +; CHECK-NEXT: %{{.*}} = load i32, i32* %var +; CHECK-NEXT: br label %[[LOOP_A:.*]] +; +; CHECK: [[LOOP_A]]: ; CHECK-NEXT: call void @a() -; CHECK-NEXT: br label %loop_latch +; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]] +; +; CHECK: [[LOOP_LATCH_A]]: +; CHECK: br label %[[LOOP_BEGIN_A]] loop_b: call void @b() br label %loop_latch -; CHECK: loop_b: +; Unswitched 'b' loop. +; +; CHECK: [[ENTRY_SPLIT_B]]: +; CHECK-NEXT: br label %[[LOOP_BEGIN_B:.*]] +; +; CHECK: [[LOOP_BEGIN_B]]: +; CHECK-NEXT: %{{.*}} = load i32, i32* %var +; CHECK-NEXT: br label %[[LOOP_B:.*]] +; +; CHECK: [[LOOP_B]]: ; CHECK-NEXT: call void @b() -; CHECK-NEXT: br label %loop_latch +; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]] +; +; CHECK: [[LOOP_LATCH_B]]: +; CHECK: br label %[[LOOP_BEGIN_B]] loop_c: call void @c() noreturn nounwind br label %loop_latch -; CHECK: loop_c: +; Unswitched 'c' loop. +; +; CHECK: [[ENTRY_SPLIT_C]]: +; CHECK-NEXT: br label %[[LOOP_BEGIN_C:.*]] +; +; CHECK: [[LOOP_BEGIN_C]]: +; CHECK-NEXT: %{{.*}} = load i32, i32* %var +; CHECK-NEXT: br label %[[LOOP_C:.*]] +; +; CHECK: [[LOOP_C]]: ; CHECK-NEXT: call void @c() -; CHECK-NEXT: br label %loop_latch +; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]] +; +; CHECK: [[LOOP_LATCH_C]]: +; CHECK: br label %[[LOOP_BEGIN_C]] loop_latch: br label %loop_begin -; CHECK: loop_latch: -; CHECK-NEXT: br label %loop_begin loop_exit: %lcssa = phi i32 [ %var_val, %loop_begin ] ret i32 %lcssa +; Unswitched exit edge (no longer a loop). +; +; CHECK: [[ENTRY_SPLIT_EXIT]]: +; CHECK-NEXT: br label %loop_begin +; +; CHECK: loop_begin: +; CHECK-NEXT: %[[V:.*]] = load i32, i32* %var +; CHECK-NEXT: br label %loop_exit +; ; CHECK: loop_exit: ; CHECK-NEXT: %[[LCSSA:.*]] = phi i32 [ %[[V]], %loop_begin ] ; CHECK-NEXT: ret i32 %[[LCSSA]] @@ -2824,3 +2853,112 @@ ; CHECK: loop_exit: ; CHECK-NEXT: ret } + +; Non-trivial unswitching of a switch. +define i32 @test27(i1* %ptr, i32 %cond) { +; CHECK-LABEL: @test27( +entry: + br label %loop_begin +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 %cond, label %[[ENTRY_SPLIT_LATCH:.*]] [ +; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]] +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]] +; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_C:.*]] +; CHECK-NEXT: ] + +loop_begin: + switch i32 %cond, label %latch [ + i32 0, label %loop_a + i32 1, label %loop_b + i32 2, label %loop_c + ] + +loop_a: + call void @a() + br label %latch +; Unswitched 'a' loop. +; +; CHECK: [[ENTRY_SPLIT_A]]: +; CHECK-NEXT: br label %[[LOOP_BEGIN_A:.*]] +; +; CHECK: [[LOOP_BEGIN_A]]: +; CHECK-NEXT: br label %[[LOOP_A:.*]] +; +; CHECK: [[LOOP_A]]: +; CHECK-NEXT: call void @a() +; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]] +; +; CHECK: [[LOOP_LATCH_A]]: +; CHECK-NEXT: %[[V_A:.*]] = load i1, i1* %ptr +; CHECK: br i1 %[[V_A]], label %[[LOOP_BEGIN_A]], label %[[LOOP_EXIT_A:.*]] +; +; CHECK: [[LOOP_EXIT_A]]: +; CHECK-NEXT: br label %loop_exit + +loop_b: + call void @b() + br label %latch +; Unswitched 'b' loop. +; +; CHECK: [[ENTRY_SPLIT_B]]: +; CHECK-NEXT: br label %[[LOOP_BEGIN_B:.*]] +; +; CHECK: [[LOOP_BEGIN_B]]: +; CHECK-NEXT: br label %[[LOOP_B:.*]] +; +; CHECK: [[LOOP_B]]: +; CHECK-NEXT: call void @b() +; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]] +; +; CHECK: [[LOOP_LATCH_B]]: +; CHECK-NEXT: %[[V_B:.*]] = load i1, i1* %ptr +; CHECK: br i1 %[[V_B]], label %[[LOOP_BEGIN_B]], label %[[LOOP_EXIT_B:.*]] +; +; CHECK: [[LOOP_EXIT_B]]: +; CHECK-NEXT: br label %loop_exit + +loop_c: + call void @c() + br label %latch +; Unswitched 'c' loop. +; +; CHECK: [[ENTRY_SPLIT_C]]: +; CHECK-NEXT: br label %[[LOOP_BEGIN_C:.*]] +; +; CHECK: [[LOOP_BEGIN_C]]: +; CHECK-NEXT: br label %[[LOOP_C:.*]] +; +; CHECK: [[LOOP_C]]: +; CHECK-NEXT: call void @c() +; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]] +; +; CHECK: [[LOOP_LATCH_C]]: +; CHECK-NEXT: %[[V_C:.*]] = load i1, i1* %ptr +; CHECK: br i1 %[[V_C]], label %[[LOOP_BEGIN_C]], label %[[LOOP_EXIT_C:.*]] +; +; CHECK: [[LOOP_EXIT_C]]: +; CHECK-NEXT: br label %loop_exit + +latch: + %v = load i1, i1* %ptr + br i1 %v, label %loop_begin, label %loop_exit +; Unswitched the 'latch' only loop. +; +; CHECK: [[ENTRY_SPLIT_LATCH]]: +; CHECK-NEXT: br label %[[LOOP_BEGIN_LATCH:.*]] +; +; CHECK: [[LOOP_BEGIN_LATCH]]: +; CHECK-NEXT: br label %[[LOOP_LATCH_LATCH:.*]] +; +; CHECK: [[LOOP_LATCH_LATCH]]: +; CHECK-NEXT: %[[V_LATCH:.*]] = load i1, i1* %ptr +; CHECK: br i1 %[[V_LATCH]], label %[[LOOP_BEGIN_LATCH]], label %[[LOOP_EXIT_LATCH:.*]] +; +; CHECK: [[LOOP_EXIT_LATCH]]: +; CHECK-NEXT: br label %loop_exit + +loop_exit: + ret i32 0 +; CHECK: loop_exit: +; CHECK-NEXT: ret i32 0 +} \ No newline at end of file