Index: llvm/include/llvm/IR/Dominators.h =================================================================== --- llvm/include/llvm/IR/Dominators.h +++ llvm/include/llvm/IR/Dominators.h @@ -290,6 +290,90 @@ void print(raw_ostream &OS, const Module *M = nullptr) const override; }; +//===------------------------------------- +/// \brief Class to defer updates to a DominatorTree. +/// +/// Definition: Applying updates to every edge insertion and deletion is +/// expensive and not necessary. When one needs the DominatorTree for analysis +/// they can request a flush() to perform a larger batch update. This has the +/// advantage of the DominatorTree inspecting the set of updates to find +/// duplicates or unnecessary subtree updates. +/// +/// The scope of DeferredDominance operates at a Function level. +/// +/// It is not necessary for the user to scrub the updates for duplicates or +/// updates that point to the same block (Delete, BB_A, BB_A). Performance +/// can be gained if the caller attempts to batch updates before submitting +/// to applyUpdates(ArrayRef) in cases where duplicate edge requests will +/// occur. +/// +/// It is required for the state of the LLVM IR to be applied *before* +/// submitting updates. The update routines must analyze the current state +/// between a pair of (From, To) basic blocks to determine if the update +/// needs to be queued. +/// Example (good): +/// TerminatorInstructionBB->removeFromParent(); +/// DDT->deleteEdge(BB, Successor); +/// Example (bad): +/// DDT->deleteEdge(BB, Successor); +/// TerminatorInstructionBB->removeFromParent(); +class DeferredDominance { +public: + DeferredDominance(DominatorTree &DT_) : DT(DT_) {} + + /// \brief Queues multiple updates and discards duplicates. + void applyUpdates(ArrayRef Updates); + + /// \brief Helper method for a single edge insertion. It's almost always + /// better to batch updates and call applyUpdates to quickly remove duplicate + /// edges. This is best used when there is only a single insertion needed to + /// update Dominators. + void insertEdge(BasicBlock *From, BasicBlock *To); + + /// \brief Helper method for a single edge deletion. It's almost always better + /// to batch updates and call applyUpdates to quickly remove duplicate edges. + /// This is best used when there is only a single deletion needed to update + /// Dominators. + void deleteEdge(BasicBlock *From, BasicBlock *To); + + /// \brief Delays the deletion of a basic block until a flush() event. + void deleteBB(BasicBlock *DelBB); + + /// \brief Returns true if DelBB is awaiting deletion at a flush() event. + bool pendingDeletedBB(BasicBlock *DelBB); + + /// \brief Flushes all pending updates and block deletions. Returns a + /// correct DominatorTree reference to be used by the caller for analysis. + DominatorTree &flush(); + + /// \brief Drops all internal state and forces a (slow) recalculation of the + /// DominatorTree based on the current state of the LLVM IR in F. This should + /// only be used in corner cases such as the Entry block of F being deleted. + void recalculate(Function &F); + + /// \brief Debug method to help view the state of pending updates. + LLVM_DUMP_METHOD void dump() const; + +private: + DominatorTree &DT; + SmallVector PendUpdates; + SmallPtrSet DeletedBBs; + + /// Apply an update (Kind, From, To) to the internal queued updates. The + /// update is only added when determined to be necessary. Checks for + /// self-domination, unnecessary updates, duplicate requests, and balanced + /// pairs of requests are all performed. Returns true if the update is + /// queued and false if it is discarded. + bool applyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From, + BasicBlock *To); + + /// Performs all pending basic block deletions. We have to defer the deletion + /// of these blocks until after the DominatorTree updates are applied. The + /// internal workings of the DominatorTree code expect every update's From + /// and To blocks to exist and to be a member of the same Function. + bool flushDelBB(); +}; + } // end namespace llvm #endif // LLVM_IR_DOMINATORS_H Index: llvm/include/llvm/Transforms/Scalar/JumpThreading.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -34,6 +34,7 @@ class BranchInst; class CmpInst; class Constant; +class DeferredDominance; class Function; class Instruction; class IntrinsicInst; @@ -77,6 +78,7 @@ TargetLibraryInfo *TLI; LazyValueInfo *LVI; AliasAnalysis *AA; + DeferredDominance *DDT; std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = false; @@ -107,8 +109,8 @@ // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, - AliasAnalysis *AA_, bool HasProfileData_, - std::unique_ptr BFI_, + AliasAnalysis *AA_, DeferredDominance *DDT_, + bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); Index: llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -27,6 +27,7 @@ class BlockFrequencyInfo; class BranchProbabilityInfo; +class DeferredDominance; class DominatorTree; class Function; class Instruction; @@ -38,7 +39,7 @@ class Value; /// Delete the specified block, which must have no predecessors. -void DeleteDeadBlock(BasicBlock *BB); +void DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT = nullptr); /// We know that BB has one predecessor. If there are any single-entry PHI nodes /// in it, fold them away. This handles the case when all entries to the PHI Index: llvm/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Local.h +++ llvm/include/llvm/Transforms/Utils/Local.h @@ -117,7 +117,8 @@ /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false, - const TargetLibraryInfo *TLI = nullptr); + const TargetLibraryInfo *TLI = nullptr, + DeferredDominance *DDT = nullptr); //===----------------------------------------------------------------------===// // Local dead code elimination. @@ -171,18 +172,21 @@ /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the 'and' to 0. -void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred); +void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, + DeferredDominance *DDT = nullptr); /// BB is a block with one predecessor and its predecessor is known to have one /// successor (BB!). Eliminate the edge between them, moving the instructions in /// the predecessor into BB. This deletes the predecessor block. -void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT = nullptr); +void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT = nullptr, + DeferredDominance *DDT = nullptr); /// BB is known to contain an unconditional branch, and contains no instructions /// other than PHI nodes, potential debug intrinsics and the branch. If /// possible, eliminate BB by rewriting all the predecessors to branch to the /// successor block and return true. If we can't transform, return false. -bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB); +bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + DeferredDominance *DDT = nullptr); /// Check for and eliminate duplicate PHI nodes in this block. This doesn't try /// to be clever about PHI nodes which differ only in the order of the incoming @@ -382,7 +386,8 @@ /// Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA = false); + bool PreserveLCSSA = false, + DeferredDominance *DDT = nullptr); /// Convert the CallInst to InvokeInst with the specified unwind edge basic /// block. This also splits the basic block where CI is located, because @@ -397,12 +402,13 @@ /// /// \param BB Block whose terminator will be replaced. Its terminator must /// have an unwind successor. -void removeUnwindEdge(BasicBlock *BB); +void removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT = nullptr); /// Remove all blocks that can not be reached from the function's entry. /// /// Returns true if any basic block was removed. -bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr); +bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr, + DeferredDominance *DDT = nullptr); /// Combine the metadata of two instructions so that K can replace J /// Index: llvm/lib/IR/Dominators.cpp =================================================================== --- llvm/lib/IR/Dominators.cpp +++ llvm/lib/IR/Dominators.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -389,3 +390,190 @@ DT.print(OS); } +//===----------------------------------------------------------------------===// +// DeferredDominance Implementation +//===----------------------------------------------------------------------===// +// +// The implementation details of the DeferredDominance class which allows +// one to queue updates to a DominatorTree. +// +//===----------------------------------------------------------------------===// + +/// \brief Queues multiple updates and discards duplicates. +void DeferredDominance::applyUpdates( + ArrayRef Updates) { + SmallVector Seen; + for (auto U : Updates) + // Avoid duplicates to applyUpdate() to save on analysis. + if (std::none_of(Seen.begin(), Seen.end(), + [U](DominatorTree::UpdateType S) { return S == U; })) { + Seen.push_back(U); + applyUpdate(U.getKind(), U.getFrom(), U.getTo()); + } +} + +/// \brief Helper method for a single edge insertion. It's almost always better +/// to batch updates and call applyUpdates to quickly remove duplicate edges. +/// This is best used when there is only a single insertion needed to update +/// Dominators. +void DeferredDominance::insertEdge(BasicBlock *From, BasicBlock *To) { + applyUpdate(DominatorTree::Insert, From, To); +} + +/// \brief Helper method for a single edge deletion. It's almost always better +/// to batch updates and call applyUpdates to quickly remove duplicate edges. +/// This is best used when there is only a single deletion needed to update +/// Dominators. +void DeferredDominance::deleteEdge(BasicBlock *From, BasicBlock *To) { + applyUpdate(DominatorTree::Delete, From, To); +} + +/// \brief Delays the deletion of a basic block until a flush() event. +void DeferredDominance::deleteBB(BasicBlock *DelBB) { + assert(DelBB && "Invalid push_back of nullptr DelBB."); + assert(pred_empty(DelBB) && "DelBB has one or more predecessors."); + // DelBB is unreachable and all its instructions are dead. + while (!DelBB->empty()) { + Instruction &I = DelBB->back(); + // Replace used instructions with an arbitrary value (undef). + if (!I.use_empty()) + I.replaceAllUsesWith(llvm::UndefValue::get(I.getType())); + DelBB->getInstList().pop_back(); + } + // Make sure DelBB has a valid terminator instruction. As long as DelBB is a + // Child of Function F it must contain valid IR. + new UnreachableInst(DelBB->getContext(), DelBB); + DeletedBBs.insert(DelBB); +} + +/// \brief Returns true if DelBB is awaiting deletion at a flush() event. +bool DeferredDominance::pendingDeletedBB(BasicBlock *DelBB) { + if (DeletedBBs.empty()) + return false; + return DeletedBBs.count(DelBB) != 0; +} + +/// \brief Flushes all pending updates and block deletions. Returns a +/// correct DominatorTree reference to be used by the caller for analysis. +DominatorTree &DeferredDominance::flush() { + // Updates to DT must happen before blocks are deleted below. Otherwise the + // DT traversal will encounter badref blocks and assert. + if (!PendUpdates.empty()) { + DT.applyUpdates(PendUpdates); + PendUpdates.clear(); + } + flushDelBB(); + return DT; +} + +/// \brief Drops all internal state and forces a (slow) recalculation of the +/// DominatorTree based on the current state of the LLVM IR in F. This should +/// only be used in corner cases such as the Entry block of F being deleted. +void DeferredDominance::recalculate(Function &F) { + // flushDelBB must be flushed before the recalculation. The state of the IR + // must be consistent before the DT traversal algorithm determines the + // actual DT. + if (flushDelBB() || !PendUpdates.empty()) { + DT.recalculate(F); + PendUpdates.clear(); + } +} + +/// \brief Debug method to help view the state of pending updates. +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DeferredDominance::dump() const { + raw_ostream &OS = llvm::dbgs(); + OS << "PendUpdates:\n"; + int I = 0; + for (auto U : PendUpdates) { + OS << " " << I << " : "; + ++I; + if (U.getKind() == DominatorTree::Insert) + OS << "Insert, "; + else + OS << "Delete, "; + BasicBlock *From = U.getFrom(); + if (From) { + auto S = From->getName(); + if (!From->hasName()) + S = "(no name)"; + OS << S << "(" << From << "), "; + } else { + OS << "(badref), "; + } + BasicBlock *To = U.getTo(); + if (To) { + auto S = To->getName(); + if (!To->hasName()) + S = "(no_name)"; + OS << S << "(" << To << ")\n"; + } else { + OS << "(badref)\n"; + } + } + OS << "DeletedBBs:\n"; + I = 0; + for (auto BB : DeletedBBs) { + OS << " " << I << " : "; + ++I; + if (BB->hasName()) + OS << BB->getName() << "("; + else + OS << "(no_name)("; + OS << BB << ")\n"; + } +} +#endif + +/// Apply an update (Kind, From, To) to the internal queued updates. The +/// update is only added when determined to be necessary. Checks for +/// self-domination, unnecessary updates, duplicate requests, and balanced +/// pairs of requests are all performed. Returns true if the update is +/// queued and false if it is discarded. +bool DeferredDominance::applyUpdate(DominatorTree::UpdateKind Kind, + BasicBlock *From, BasicBlock *To) { + if (From == To) + return false; // Cannot dominate self; discard update. + + // Discard updates by inspecting the current state of successors of From. + // Since applyUpdate() must be called *after* the Terminator of From is + // altered we can determine if the update is unnecessary. + bool HasEdge = std::any_of(succ_begin(From), succ_end(From), + [To](BasicBlock *B) { return B == To; }); + if (Kind == DominatorTree::Insert && !HasEdge) + return false; // Unnecessary Insert: edge does not exist in IR. + if (Kind == DominatorTree::Delete && HasEdge) + return false; // Unnecessary Delete: edge still exists in IR. + + // Analyze pending updates to determine if the update is unnecessary. + DominatorTree::UpdateType Update = {Kind, From, To}; + DominatorTree::UpdateType Invert = {Kind != DominatorTree::Insert + ? DominatorTree::Insert + : DominatorTree::Delete, + From, To}; + for (auto I = PendUpdates.begin(), E = PendUpdates.end(); I != E; ++I) { + if (Update == *I) + return false; // Discard duplicate updates. + if (Invert == *I) { + // Update and Invert are both valid (equivalent to a no-op). Remove + // Invert from PendUpdates and discard the Update. + PendUpdates.erase(I); + return false; + } + } + PendUpdates.push_back(Update); // Save the valid update. + return true; +} + +/// Performs all pending basic block deletions. We have to defer the deletion +/// of these blocks until after the DominatorTree updates are applied. The +/// internal workings of the DominatorTree code expect every update's From +/// and To blocks to exist and to be a member of the same Function. +bool DeferredDominance::flushDelBB() { + if (DeletedBBs.empty()) + return false; + for (auto *BB : DeletedBBs) + BB->eraseFromParent(); + DeletedBBs.clear(); + return true; +} Index: llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -77,6 +77,7 @@ bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -88,6 +89,7 @@ INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation", "Value Propagation", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation", "Value Propagation", false, false) Index: llvm/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -131,10 +131,11 @@ bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - if (PrintLVIAfterJumpThreading) - AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); AU.addRequired(); } @@ -148,6 +149,7 @@ INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -278,8 +280,12 @@ if (skipFunction(F)) return false; auto TLI = &getAnalysis().getTLI(); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto DT = &getAnalysis().getDomTree(); auto LVI = &getAnalysis().getLVI(); auto AA = &getAnalysis().getAAResults(); + DeferredDominance DDT(*DT); std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = F.hasProfileData(); @@ -289,12 +295,11 @@ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { dbgs() << "LVI for function '" << F.getName() << "':\n"; - LVI->printLVI(F, getAnalysis().getDomTree(), - dbgs()); + LVI->printLVI(F, *DT, dbgs()); } return Changed; } @@ -302,8 +307,12 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto &DT = AM.getResult(F); auto &LVI = AM.getResult(F); auto &AA = AM.getResult(F); + DeferredDominance DDT(DT); std::unique_ptr BFI; std::unique_ptr BPI; @@ -313,25 +322,28 @@ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + PA.preserve(); + PA.preserve(); return PA; } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, - bool HasProfileData_, + DeferredDominance *DDT_, bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; AA = AA_; + DDT = DDT_; BFI.reset(); BPI.reset(); // When profile data is available, we need to update edge weights after @@ -353,7 +365,7 @@ // back edges. This works for normal cases but not for unreachable blocks as // they may have cycle with no back edge. bool EverChanged = false; - EverChanged |= removeUnreachableBlocks(F, LVI); + EverChanged |= removeUnreachableBlocks(F, LVI, DDT); FindLoopHeaders(F); @@ -368,6 +380,10 @@ ++I; + // Don't thread branches over a block that's slated for deletion. + if (DDT->pendingDeletedBB(BB)) + continue; + // If the block is trivially dead, zap it. This eliminates the successor // edges which simplifies the CFG. if (pred_empty(BB) && @@ -376,7 +392,7 @@ << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); LVI->eraseBlock(BB); - DeleteDeadBlock(BB); + DeleteDeadBlock(BB, DDT); Changed = true; continue; } @@ -400,7 +416,7 @@ // awesome, but it allows us to use AssertingVH to prevent nasty // dangling pointer issues within LazyValueInfo. LVI->eraseBlock(BB); - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + if (TryToSimplifyUncondBranchFromEmptyBlock(BB, DDT)) Changed = true; } } @@ -408,6 +424,7 @@ } while (Changed); LoopHeaders.clear(); + DDT->flush(); return EverChanged; } @@ -931,8 +948,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // If the block is trivially dead, just return and let the caller nuke it. // This simplifies other transformations. - if (pred_empty(BB) && - BB != &BB->getParent()->getEntryBlock()) + if (DDT->pendingDeletedBB(BB) || + (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock())) return false; // If this block has a single predecessor, and if that pred has a single @@ -948,7 +965,7 @@ LoopHeaders.insert(BB); LVI->eraseBlock(SinglePred); - MergeBasicBlockIntoOnlyPred(BB); + MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT); // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by // BB code within one basic block `BB`), we need to invalidate the LVI @@ -1031,18 +1048,23 @@ // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa(Condition)) { unsigned BestSucc = GetBestDestForJumpOnUndef(BB); + std::vector Updates; // Fold the branch/switch. TerminatorInst *BBTerm = BB->getTerminator(); + Updates.reserve(BBTerm->getNumSuccessors()); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; - BBTerm->getSuccessor(i)->removePredecessor(BB, true); + BasicBlock *Succ = BBTerm->getSuccessor(i); + Succ->removePredecessor(BB, true); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } DEBUG(dbgs() << " In block '" << BB->getName() << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); + DDT->applyUpdates(Updates); return true; } @@ -1053,7 +1075,7 @@ DEBUG(dbgs() << " In block '" << BB->getName() << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; - ConstantFoldTerminator(BB, true); + ConstantFoldTerminator(BB, true, nullptr, DDT); return true; } @@ -1086,7 +1108,8 @@ if (Ret != LazyValueInfo::Unknown) { unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0; unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1; - CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); + BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove); + ToRemoveSucc->removePredecessor(BB, true); BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); CondBr->eraseFromParent(); if (CondCmp->use_empty()) @@ -1104,6 +1127,7 @@ ConstantInt::getFalse(CondCmp->getType()); ReplaceFoldableUses(CondCmp, CI); } + DDT->deleteEdge(BB, ToRemoveSucc); return true; } @@ -1182,9 +1206,12 @@ Optional Implication = isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue); if (Implication) { - BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB); - BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI); + BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1); + BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0); + RemoveSucc->removePredecessor(BB); + BranchInst::Create(KeepSucc, BI); BI->eraseFromParent(); + DDT->deleteEdge(BB, RemoveSucc); return true; } CurrentBB = CurrentPred; @@ -1591,17 +1618,22 @@ if (PredWithKnownDest == (size_t)std::distance(pred_begin(BB), pred_end(BB))) { bool SeenFirstBranchToOnlyDest = false; + std::vector Updates; + Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1); for (BasicBlock *SuccBB : successors(BB)) { - if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) { SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. - else + } else { SuccBB->removePredecessor(BB, true); // This is unreachable successor. + Updates.push_back({DominatorTree::Delete, BB, SuccBB}); + } } // Finally update the terminator. TerminatorInst *Term = BB->getTerminator(); BranchInst::Create(OnlyDest, Term); Term->eraseFromParent(); + DDT->applyUpdates(Updates); // If the condition is now dead due to the removal of the old terminator, // erase it. @@ -1964,6 +1996,10 @@ PredTerm->setSuccessor(i, NewBB); } + DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}, + {DominatorTree::Insert, PredBB, NewBB}, + {DominatorTree::Delete, PredBB, BB}}); + // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. @@ -1983,20 +2019,42 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB, ArrayRef Preds, const char *Suffix) { + SmallVector NewBBs; + // Collect the frequencies of all predecessors of BB, which will be used to - // update the edge weight on BB->SuccBB. - BlockFrequency PredBBFreq(0); + // update the edge weight of the result of splitting predecessors. + DenseMap FreqMap; if (HasProfileData) for (auto Pred : Preds) - PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB); + FreqMap.insert(std::make_pair( + Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB))); + + // In the case when BB is a LandingPad block we create 2 new predecessors + // instead of just one. + if (BB->isLandingPad()) { + std::string NewName = std::string(Suffix) + ".split-lp"; + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs); + } else { + NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix)); + } - BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix); + std::vector Updates; + Updates.reserve((2 * Preds.size()) + NewBBs.size()); + for (auto NewBB : NewBBs) { + BlockFrequency NewBBFreq(0); + Updates.push_back({DominatorTree::Insert, NewBB, BB}); + for (auto Pred : predecessors(NewBB)) { + Updates.push_back({DominatorTree::Delete, Pred, BB}); + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + if (HasProfileData) // Update frequencies between Pred -> NewBB. + NewBBFreq += FreqMap.lookup(Pred); + } + if (HasProfileData) // Apply the summed frequency to NewBB. + BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); + } - // Set the block frequency of the newly created PredBB, which is the sum of - // frequencies of Preds. - if (HasProfileData) - BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency()); - return PredBB; + DDT->applyUpdates(Updates); + return NewBBs[0]; } bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { @@ -2140,6 +2198,7 @@ } // And finally, do it! Start by factoring the predecessors if needed. + std::vector Updates; BasicBlock *PredBB; if (PredBBs.size() == 1) PredBB = PredBBs[0]; @@ -2148,6 +2207,7 @@ << " common predecessors.\n"); PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm"); } + Updates.push_back({DominatorTree::Delete, PredBB, BB}); // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. @@ -2160,7 +2220,11 @@ BranchInst *OldPredBranch = dyn_cast(PredBB->getTerminator()); if (!OldPredBranch || !OldPredBranch->isUnconditional()) { - PredBB = SplitEdge(PredBB, BB); + BasicBlock *OldPredBB = PredBB; + PredBB = SplitEdge(OldPredBB, BB); + Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB}); + Updates.push_back({DominatorTree::Insert, PredBB, BB}); + Updates.push_back({DominatorTree::Delete, OldPredBB, BB}); OldPredBranch = cast(PredBB->getTerminator()); } @@ -2202,6 +2266,10 @@ // Otherwise, insert the new instruction into the block. New->setName(BI->getName()); PredBB->getInstList().insert(OldPredBranch->getIterator(), New); + // Update Dominance from simplified New instruction operands. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (BasicBlock *SuccBB = dyn_cast(New->getOperand(i))) + Updates.push_back({DominatorTree::Insert, PredBB, SuccBB}); } } @@ -2257,6 +2325,7 @@ // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); + DDT->applyUpdates(Updates); ++NumDupes; return true; @@ -2329,6 +2398,8 @@ // The select is now dead. SI->eraseFromParent(); + DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB}, + {DominatorTree::Insert, Pred, NewBB}}); // Update any other PHI nodes in BB. for (BasicBlock::iterator BI = BB->begin(); PHINode *Phi = dyn_cast(BI); ++BI) @@ -2407,11 +2478,25 @@ // Expand the select. TerminatorInst *Term = SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + BasicBlock *SplitBB = SI->getParent(); + BasicBlock *NewBB = Term->getParent(); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); NewPN->addIncoming(SI->getFalseValue(), BB); SI->replaceAllUsesWith(NewPN); SI->eraseFromParent(); + // NewBB and SplitBB are newly created blocks which require insertion. + std::vector Updates; + Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3); + Updates.push_back({DominatorTree::Insert, BB, SplitBB}); + Updates.push_back({DominatorTree::Insert, BB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, SplitBB}); + // BB's successors were moved to SplitBB, update DDT accordingly. + for (auto *Succ : successors(SplitBB)) { + Updates.push_back({DominatorTree::Delete, BB, Succ}); + Updates.push_back({DominatorTree::Insert, SplitBB, Succ}); + } + DDT->applyUpdates(Updates); return true; } return false; @@ -2498,8 +2583,8 @@ if (!TrueDestIsSafe && !FalseDestIsSafe) return false; - BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; - BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; ValueToValueMapTy UnguardedMapping, GuardedMapping; Instruction *AfterGuard = Guard->getNextNode(); @@ -2508,18 +2593,29 @@ return false; // Duplicate all instructions before the guard and the guard itself to the // branch where implication is not proved. - GuardedBlock = DuplicateInstructionsInSplitBetween( - BB, GuardedBlock, AfterGuard, GuardedMapping); + BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredGuardedBlock, AfterGuard, GuardedMapping); assert(GuardedBlock && "Could not create the guarded block?"); // Duplicate all instructions before the guard in the unguarded branch. // Since we have successfully duplicated the guarded block and this block // has fewer instructions, we expect it to succeed. - UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock, - Guard, UnguardedMapping); + BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredUnguardedBlock, Guard, UnguardedMapping); assert(UnguardedBlock && "Could not create the unguarded block?"); DEBUG(dbgs() << "Moved guard " << *Guard << " to block " << GuardedBlock->getName() << "\n"); - + // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between + // PredBB and BB. We need to perform two inserts and one delete for each of + // the above calls to update Dominators. + DDT->applyUpdates( + {// Guarded block split. + {DominatorTree::Delete, PredGuardedBlock, BB}, + {DominatorTree::Insert, PredGuardedBlock, GuardedBlock}, + {DominatorTree::Insert, GuardedBlock, BB}, + // Unguarded block split. + {DominatorTree::Delete, PredUnguardedBlock, BB}, + {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock}, + {DominatorTree::Insert, UnguardedBlock, BB}}); // Some instructions before the guard may still have uses. For them, we need // to create Phi nodes merging their copies in both guarded and unguarded // branches. Those instructions that have no uses can be just removed. Index: llvm/lib/Transforms/Utils/BasicBlockUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -45,16 +45,22 @@ using namespace llvm; -void llvm::DeleteDeadBlock(BasicBlock *BB) { +void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) { assert((pred_begin(BB) == pred_end(BB) || // Can delete self loop. BB->getSinglePredecessor() == BB) && "Block is not dead!"); TerminatorInst *BBTerm = BB->getTerminator(); + std::vector Updates; // Loop through all of our successors and make sure they know that one // of their predecessors is going away. - for (BasicBlock *Succ : BBTerm->successors()) + if (DDT) + Updates.reserve(BBTerm->getNumSuccessors()); + for (BasicBlock *Succ : BBTerm->successors()) { Succ->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } // Zap all the instructions in the block. while (!BB->empty()) { @@ -69,8 +75,12 @@ BB->getInstList().pop_back(); } - // Zap the block! - BB->eraseFromParent(); + if (DDT) { + DDT->applyUpdates(Updates); + DDT->deleteBB(BB); // Deferred deletion of BB. + } else { + BB->eraseFromParent(); // Zap the block! + } } void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -100,7 +100,8 @@ /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + DeferredDominance *DDT) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); @@ -123,6 +124,8 @@ // Replace the conditional branch with an unconditional one. Builder.CreateBr(Destination); BI->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, OldDest); return true; } @@ -193,9 +196,12 @@ createBranchWeights(Weights)); } // Remove this entry. - DefaultDest->removePredecessor(SI->getParent()); + BasicBlock *ParentBB = SI->getParent(); + DefaultDest->removePredecessor(ParentBB); i = SI->removeCase(i); e = SI->case_end(); + if (DDT) + DDT->deleteEdge(ParentBB, DefaultDest); continue; } @@ -221,14 +227,20 @@ // Insert the new branch. Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); + std::vector Updates; + if (DDT) + Updates.reserve(SI->getNumSuccessors() - 1); // Remove entries from PHI nodes which we no longer branch to... for (BasicBlock *Succ : SI->successors()) { // Found case matching a constant operand? - if (Succ == TheOnlyDest) + if (Succ == TheOnlyDest) { TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest - else + } else { Succ->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } } // Delete the old switch. @@ -236,6 +248,8 @@ SI->eraseFromParent(); if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + if (DDT) + DDT->applyUpdates(Updates); return true; } @@ -281,14 +295,23 @@ if (auto *BA = dyn_cast(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); + std::vector Updates; + if (DDT) + Updates.reserve(IBI->getNumDestinations() - 1); + // Insert the new branch. Builder.CreateBr(TheOnlyDest); for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - if (IBI->getDestination(i) == TheOnlyDest) + if (IBI->getDestination(i) == TheOnlyDest) { TheOnlyDest = nullptr; - else - IBI->getDestination(i)->removePredecessor(IBI->getParent()); + } else { + BasicBlock *ParentBB = IBI->getParent(); + BasicBlock *DestBB = IBI->getDestination(i); + DestBB->removePredecessor(ParentBB); + if (DDT) + Updates.push_back({DominatorTree::Delete, ParentBB, DestBB}); + } } Value *Address = IBI->getAddress(); IBI->eraseFromParent(); @@ -303,6 +326,8 @@ new UnreachableInst(BB->getContext(), BB); } + if (DDT) + DDT->applyUpdates(Updates); return true; } } @@ -579,7 +604,8 @@ /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, + DeferredDominance *DDT) { // This only adjusts blocks with PHI nodes. if (!isa(BB->begin())) return; @@ -602,13 +628,18 @@ // of the block. if (PhiIt != OldPhiIt) PhiIt = &BB->front(); } + if (DDT) + DDT->deleteEdge(Pred, BB); } /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its /// predecessor is known to have one successor (DestBB!). Eliminate the edge /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT, + DeferredDominance *DDT) { + assert(!(DT && DDT) && "Cannot call with both DT and DDT."); + // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); @@ -621,6 +652,23 @@ BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); + bool ReplaceEntryBB = false; + if (PredBB == &DestBB->getParent()->getEntryBlock()) + ReplaceEntryBB = true; + + // Deferred DT update: Collect all the edges that enter PredBB. These + // dominator edges will be redirected to DestBB. + std::vector Updates; + if (DDT && !ReplaceEntryBB) { + Updates.reserve((2 * std::distance(pred_begin(PredBB), pred_end(PredBB))) + + 1); + Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); + for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { + Updates.push_back({DominatorTree::Delete, *I, PredBB}); + Updates.push_back({DominatorTree::Insert, *I, DestBB}); + } + } + // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -641,7 +689,7 @@ // If the PredBB is the entry block of the function, move DestBB up to // become the entry block after we erase PredBB. - if (PredBB == &DestBB->getParent()->getEntryBlock()) + if (ReplaceEntryBB) DestBB->moveAfter(PredBB); if (DT) { @@ -653,8 +701,19 @@ DT->eraseNode(PredBB); } } - // Nuke BB. - PredBB->eraseFromParent(); + + if (DDT) { + DDT->deleteBB(PredBB); // Deferred deletion of BB. + if (ReplaceEntryBB) + // The entry block was removed and there is no external interface for the + // dominator tree to be notified of this change. In this corner-case we + // recalculate the entire tree. + DDT->recalculate(*(DestBB->getParent())); + else + DDT->applyUpdates(Updates); + } else { + PredBB->eraseFromParent(); // Nuke BB. + } } /// CanMergeValues - Return true if we can choose one of these values to use @@ -861,7 +920,8 @@ /// potential side-effect free intrinsics and the branch. If possible, /// eliminate BB by rewriting all the predecessors to branch to the successor /// block and return true. If we can't transform, return false. -bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + DeferredDominance *DDT) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -902,6 +962,17 @@ DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + std::vector Updates; + if (DDT) { + Updates.reserve((2 * std::distance(pred_begin(BB), pred_end(BB))) + 1); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + // All predecessors of BB will be moved to Succ. + for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + Updates.push_back({DominatorTree::Delete, *I, BB}); + Updates.push_back({DominatorTree::Insert, *I, Succ}); + } + } + if (isa(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in // the successor, then we need to add incoming edges for the PHI nodes @@ -946,7 +1017,13 @@ // Everything that jumped to BB now goes to Succ. BB->replaceAllUsesWith(Succ); if (!Succ->hasName()) Succ->takeName(BB); - BB->eraseFromParent(); // Delete the old basic block. + + if (DDT) { + DDT->deleteBB(BB); // Deferred deletion of the old basic block. + DDT->applyUpdates(Updates); + } else { + BB->eraseFromParent(); // Delete the old basic block. + } return true; } @@ -1448,13 +1525,19 @@ } unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA) { + bool PreserveLCSSA, DeferredDominance *DDT) { BasicBlock *BB = I->getParent(); + std::vector Updates; + // Loop over all of the successors, removing BB's entry from any PHI // nodes. - for (BasicBlock *Successor : successors(BB)) + if (DDT) + Updates.reserve(BB->getTerminator()->getNumSuccessors()); + for (BasicBlock *Successor : successors(BB)) { Successor->removePredecessor(BB, PreserveLCSSA); - + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Successor}); + } // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. if (UseLLVMTrap) { @@ -1474,11 +1557,13 @@ BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } + if (DDT) + DDT->applyUpdates(Updates); return NumInstrsRemoved; } /// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II) { +static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) { SmallVector Args(II->arg_begin(), II->arg_end()); SmallVector OpBundles; II->getOperandBundlesAsDefs(OpBundles); @@ -1491,11 +1576,16 @@ II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. - BranchInst::Create(II->getNormalDest(), II); + BasicBlock *NormalDestBB = II->getNormalDest(); + BranchInst::Create(NormalDestBB, II); // Update PHI nodes in the unwind destination - II->getUnwindDest()->removePredecessor(II->getParent()); + BasicBlock *BB = II->getParent(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + UnwindDestBB->removePredecessor(BB); II->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDestBB); } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -1536,7 +1626,8 @@ } static bool markAliveBlocks(Function &F, - SmallPtrSetImpl &Reachable) { + SmallPtrSetImpl &Reachable, + DeferredDominance *DDT = nullptr) { SmallVector Worklist; BasicBlock *BB = &F.front(); Worklist.push_back(BB); @@ -1556,7 +1647,7 @@ if (II->getIntrinsicID() == Intrinsic::assume) { if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(II, false); + changeToUnreachable(II, false, false, DDT); Changed = true; break; } @@ -1573,7 +1664,8 @@ // still be useful for widening. if (match(II->getArgOperand(0), m_Zero())) if (!isa(II->getNextNode())) { - changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false); + changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/false, + false, DDT); Changed = true; break; } @@ -1583,7 +1675,7 @@ if (auto *CI = dyn_cast(&I)) { Value *Callee = CI->getCalledValue(); if (isa(Callee) || isa(Callee)) { - changeToUnreachable(CI, /*UseLLVMTrap=*/false); + changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT); Changed = true; break; } @@ -1593,7 +1685,7 @@ // though. if (!isa(CI->getNextNode())) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI->getNextNode(), false); + changeToUnreachable(CI->getNextNode(), false, false, DDT); Changed = true; } break; @@ -1612,7 +1704,7 @@ if (isa(Ptr) || (isa(Ptr) && SI->getPointerAddressSpace() == 0)) { - changeToUnreachable(SI, true); + changeToUnreachable(SI, true, false, DDT); Changed = true; break; } @@ -1624,16 +1716,20 @@ // Turn invokes that call 'nounwind' functions into ordinary calls. Value *Callee = II->getCalledValue(); if (isa(Callee) || isa(Callee)) { - changeToUnreachable(II, true); + changeToUnreachable(II, true, false, DDT); Changed = true; } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. - BranchInst::Create(II->getNormalDest(), II); - II->getUnwindDest()->removePredecessor(II->getParent()); + BasicBlock *NormalDestBB = II->getNormalDest(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + BranchInst::Create(NormalDestBB, II); + UnwindDestBB->removePredecessor(II->getParent()); II->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDestBB); } else - changeToCall(II); + changeToCall(II, DDT); Changed = true; } } else if (auto *CatchSwitch = dyn_cast(Terminator)) { @@ -1679,7 +1775,7 @@ } } - Changed |= ConstantFoldTerminator(BB, true); + Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT); for (BasicBlock *Successor : successors(BB)) if (Reachable.insert(Successor).second) Worklist.push_back(Successor); @@ -1687,11 +1783,11 @@ return Changed; } -void llvm::removeUnwindEdge(BasicBlock *BB) { +void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) { TerminatorInst *TI = BB->getTerminator(); if (auto *II = dyn_cast(TI)) { - changeToCall(II); + changeToCall(II, DDT); return; } @@ -1719,15 +1815,18 @@ UnwindDest->removePredecessor(BB); TI->replaceAllUsesWith(NewTI); TI->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDest); } /// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. If `LVI` is passed, this function preserves LazyValueInfo /// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, + DeferredDominance *DDT) { SmallPtrSet Reachable; - bool Changed = markAliveBlocks(F, Reachable); + bool Changed = markAliveBlocks(F, Reachable, DDT); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -1737,25 +1836,39 @@ NumRemoved += F.size()-Reachable.size(); // Loop over all of the basic blocks that are not reachable, dropping all of - // their internal references... - for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { - if (Reachable.count(&*BB)) + // their internal references. Update DDT and LVI if available. + std::vector Updates; + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { + auto *BB = &*I; + if (Reachable.count(BB)) continue; - - for (BasicBlock *Successor : successors(&*BB)) + for (BasicBlock *Successor : successors(BB)) { if (Reachable.count(Successor)) - Successor->removePredecessor(&*BB); + Successor->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Successor}); + } if (LVI) - LVI->eraseBlock(&*BB); + LVI->eraseBlock(BB); BB->dropAllReferences(); } - for (Function::iterator I = ++F.begin(); I != F.end();) - if (!Reachable.count(&*I)) - I = F.getBasicBlockList().erase(I); - else + for (Function::iterator I = ++F.begin(); I != F.end();) { + auto *BB = &*I; + if (Reachable.count(BB)) { + ++I; + continue; + } + if (DDT) { + DDT->deleteBB(BB); // deferred deletion of BB. ++I; + } else { + I = F.getBasicBlockList().erase(I); + } + } + if (DDT) + DDT->applyUpdates(Updates); return true; } Index: llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll =================================================================== --- llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll +++ llvm/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll @@ -19,10 +19,13 @@ ; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined ; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined ; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, 400> +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<399, 400> ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] ; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, 401> +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%exit' is: constantrange<400, 401> ; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 ; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%exit' is: constantrange<0, -1> ; CHECK-NEXT: %cont = icmp slt i32 %iv.next, 400 ; CHECK-NOT: loop loop: Index: llvm/test/Transforms/JumpThreading/ddt-crash.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/JumpThreading/ddt-crash.ll @@ -0,0 +1,265 @@ +; RUN: opt < %s -jump-threading -disable-output + +%struct.ham = type { i8, i8, i16, i32 } +%struct.zot = type { i32 (...)** } +%struct.quux.0 = type { %struct.wombat } +%struct.wombat = type { %struct.zot } + +@global = external global %struct.ham*, align 8 +@global.1 = external constant i8* + +declare i32 @wombat.2() + +define void @blam() { +bb: + %tmp = load i32, i32* undef + %tmp1 = icmp eq i32 %tmp, 0 + br i1 %tmp1, label %bb11, label %bb2 + +bb2: + %tmp3 = tail call i32 @wombat.2() + switch i32 %tmp3, label %bb4 [ + i32 0, label %bb5 + i32 1, label %bb7 + i32 2, label %bb7 + i32 3, label %bb11 + ] + +bb4: + br label %bb7 + +bb5: + %tmp6 = tail call i32 @wombat.2() + br label %bb7 + +bb7: + %tmp8 = phi i32 [ 0, %bb5 ], [ 1, %bb4 ], [ 2, %bb2 ], [ 2, %bb2 ] + %tmp9 = icmp eq i32 %tmp8, 0 + br i1 %tmp9, label %bb11, label %bb10 + +bb10: + ret void + +bb11: + ret void +} + +define void @spam(%struct.ham* %arg) { +bb: + %tmp = load i8, i8* undef, align 8 + switch i8 %tmp, label %bb11 [ + i8 1, label %bb11 + i8 2, label %bb11 + i8 3, label %bb1 + i8 4, label %bb1 + ] + +bb1: + br label %bb2 + +bb2: + %tmp3 = phi i32 [ 0, %bb1 ], [ %tmp3, %bb8 ] + br label %bb4 + +bb4: + %tmp5 = load i8, i8* undef, align 8 + switch i8 %tmp5, label %bb11 [ + i8 0, label %bb11 + i8 1, label %bb10 + i8 2, label %bb10 + i8 3, label %bb6 + i8 4, label %bb6 + ] + +bb6: + br label %bb7 + +bb7: + br i1 undef, label %bb8, label %bb10 + +bb8: + %tmp9 = icmp eq %struct.ham* undef, %arg + br i1 %tmp9, label %bb10, label %bb2 + +bb10: + switch i32 %tmp3, label %bb4 [ + i32 0, label %bb14 + i32 1, label %bb11 + i32 2, label %bb12 + ] + +bb11: + unreachable + +bb12: + %tmp13 = load %struct.ham*, %struct.ham** undef + br label %bb14 + +bb14: + %tmp15 = phi %struct.ham* [ %tmp13, %bb12 ], [ null, %bb10 ] + br label %bb16 + +bb16: + %tmp17 = load i8, i8* undef, align 8 + switch i8 %tmp17, label %bb11 [ + i8 0, label %bb11 + i8 11, label %bb18 + i8 12, label %bb18 + ] + +bb18: + br label %bb19 + +bb19: + br label %bb20 + +bb20: + %tmp21 = load %struct.ham*, %struct.ham** undef + switch i8 undef, label %bb22 [ + i8 0, label %bb4 + i8 11, label %bb10 + i8 12, label %bb10 + ] + +bb22: + br label %bb23 + +bb23: + %tmp24 = icmp eq %struct.ham* %tmp21, null + br i1 %tmp24, label %bb35, label %bb25 + +bb25: + %tmp26 = icmp eq %struct.ham* %tmp15, null + br i1 %tmp26, label %bb34, label %bb27 + +bb27: + %tmp28 = load %struct.ham*, %struct.ham** undef + %tmp29 = icmp eq %struct.ham* %tmp28, %tmp21 + br i1 %tmp29, label %bb35, label %bb30 + +bb30: + br label %bb31 + +bb31: + %tmp32 = load i8, i8* undef, align 8 + %tmp33 = icmp eq i8 %tmp32, 0 + br i1 %tmp33, label %bb31, label %bb34 + +bb34: + br label %bb35 + +bb35: + %tmp36 = phi i1 [ true, %bb34 ], [ false, %bb23 ], [ true, %bb27 ] + br label %bb37 + +bb37: + %tmp38 = icmp eq %struct.ham* %tmp15, null + br i1 %tmp38, label %bb39, label %bb41 + +bb39: + %tmp40 = load %struct.ham*, %struct.ham** @global + br label %bb41 + +bb41: + %tmp42 = select i1 %tmp36, %struct.ham* undef, %struct.ham* undef + ret void +} + +declare i32 @foo(...) + +define void @zot() align 2 personality i8* bitcast (i32 (...)* @foo to i8*) { +bb: + invoke void @bar() + to label %bb1 unwind label %bb3 + +bb1: + invoke void @bar() + to label %bb2 unwind label %bb4 + +bb2: + invoke void @bar() + to label %bb6 unwind label %bb17 + +bb3: + %tmp = landingpad { i8*, i32 } + catch i8* bitcast (i8** @global.1 to i8*) + catch i8* null + unreachable + +bb4: + %tmp5 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @global.1 to i8*) + catch i8* null + unreachable + +bb6: + invoke void @bar() + to label %bb7 unwind label %bb19 + +bb7: + invoke void @bar() + to label %bb10 unwind label %bb8 + +bb8: + %tmp9 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @global.1 to i8*) + catch i8* null + unreachable + +bb10: + %tmp11 = load i32 (%struct.zot*)*, i32 (%struct.zot*)** undef, align 8 + %tmp12 = invoke i32 %tmp11(%struct.zot* nonnull undef) + to label %bb13 unwind label %bb21 + +bb13: + invoke void @bar() + to label %bb14 unwind label %bb23 + +bb14: + %tmp15 = load i32 (%struct.zot*)*, i32 (%struct.zot*)** undef, align 8 + %tmp16 = invoke i32 %tmp15(%struct.zot* nonnull undef) + to label %bb26 unwind label %bb23 + +bb17: + %tmp18 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @global.1 to i8*) + catch i8* null + unreachable + +bb19: + %tmp20 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @global.1 to i8*) + catch i8* null + unreachable + +bb21: + %tmp22 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @global.1 to i8*) + catch i8* null + unreachable + +bb23: + %tmp24 = phi %struct.quux.0* [ null, %bb26 ], [ null, %bb14 ], [ undef, %bb13 ] + %tmp25 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @global.1 to i8*) + catch i8* null + br label %bb30 + +bb26: + %tmp27 = load i32 (%struct.zot*)*, i32 (%struct.zot*)** undef, align 8 + %tmp28 = invoke i32 %tmp27(%struct.zot* nonnull undef) + to label %bb29 unwind label %bb23 + +bb29: + unreachable + +bb30: + %tmp31 = icmp eq %struct.quux.0* %tmp24, null + br i1 %tmp31, label %bb32, label %bb29 + +bb32: + unreachable +} + +declare void @bar() Index: llvm/test/Transforms/JumpThreading/lvi-tristate.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/JumpThreading/lvi-tristate.ll @@ -0,0 +1,50 @@ +; RUN: opt -jump-threading -simplifycfg -S < %s | FileCheck %s +; CHECK-NOT: bb6: +; CHECK-NOT: bb7: +; CHECK-NOT: bb8: +; CHECK-NOT: bb11: +; CHECK-NOT: bb12: +; CHECK: bb: +; CHECK: bb2: +; CHECK: bb4: +; CHECK: bb10: +; CHECK: bb13: +declare void @ham() + +define void @hoge() { +bb: + %tmp = and i32 undef, 1073741823 + %tmp1 = icmp eq i32 %tmp, 2 + br i1 %tmp1, label %bb12, label %bb2 + +bb2: + %tmp3 = icmp eq i32 %tmp, 3 + br i1 %tmp3, label %bb13, label %bb4 + +bb4: + %tmp5 = icmp eq i32 %tmp, 5 + br i1 %tmp5, label %bb6, label %bb7 + +bb6: + tail call void @ham() + br label %bb7 + +bb7: + br i1 %tmp3, label %bb13, label %bb8 + +bb8: + %tmp9 = icmp eq i32 %tmp, 4 + br i1 %tmp9, label %bb13, label %bb10 + +bb10: + br i1 %tmp9, label %bb11, label %bb13 + +bb11: + br label %bb13 + +bb12: + br label %bb2 + +bb13: + ret void +}