Index: include/llvm/Analysis/PostDominators.h =================================================================== --- include/llvm/Analysis/PostDominators.h +++ include/llvm/Analysis/PostDominators.h @@ -15,7 +15,7 @@ #define LLVM_ANALYSIS_POSTDOMINATORS_H #include "llvm/IR/Dominators.h" - +#include "llvm/Analysis/DominanceFrontier.h" namespace llvm { /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to @@ -112,6 +112,130 @@ } }; +template +class ForwardPostDominanceFrontierBase : public DominanceFrontierBase { +private: + typedef GraphTraits BlockTraits; + +public: + typedef DominatorTreeBase DomTreeT; + typedef DomTreeNodeBase DomTreeNodeT; + typedef typename DominanceFrontierBase::DomSetType DomSetType; + + ForwardPostDominanceFrontierBase() : DominanceFrontierBase(true) {} + + void analyze(PostDominatorTree &DT) { + this->Roots = DT.getRoots(); + if (const DomTreeNode *Root = DT.getRootNode()) + calculate(DT, Root); + } + + const DomSetType &calculate(const PostDominatorTree &DT, + const DomTreeNodeT *Node); +}; + +template +const typename ForwardPostDominanceFrontierBase::DomSetType & +ForwardPostDominanceFrontierBase::calculate(const PostDominatorTree &DT, + const DomTreeNodeT *Node) { + // Loop over CFG successors to calculate DFlocal[Node] + BasicBlock *BB = Node->getBlock(); + DomSetType &S = this->Frontiers[BB]; // The new set to fill in... + if (this->getRoots().empty()) + return S; + + if (BB) + for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) { + BasicBlock *P = *SI; + // Does Node immediately dominate this predecessor? + DomTreeNode *SINode = DT[P]; + if (SINode && SINode->getIDom() != Node) + S.insert(P); + } + + // At this point, S is DFlocal. Now we union in DFup's of our children... + // Loop through and visit the nodes that Node immediately dominates (Node's + // children in the IDomTree) + // + for (DomTreeNode::const_iterator NI = Node->begin(), NE = Node->end(); + NI != NE; ++NI) { + DomTreeNode *IDominee = *NI; + const DomSetType &ChildDF = calculate(DT, IDominee); + + typename DomSetType::const_iterator CDFI = ChildDF.begin(), + CDFE = ChildDF.end(); + for (; CDFI != CDFE; ++CDFI) { + if (!DT.properlyDominates(Node, DT[*CDFI])) + S.insert(*CDFI); + } + } + + return S; +} + +class PostDominanceFrontier : public FunctionPass { + ForwardPostDominanceFrontierBase Base; + +public: + typedef DominatorTreeBase DomTreeT; + typedef DomTreeNodeBase DomTreeNodeT; + typedef typename DominanceFrontierBase::DomSetType DomSetType; + typedef DominanceFrontierBase::iterator iterator; + typedef DominanceFrontierBase::const_iterator const_iterator; + + static char ID; // Pass ID, replacement for typeid + PostDominanceFrontier(); + ForwardPostDominanceFrontierBase &getBase() { return Base; } + + inline const std::vector &getRoots() const { + return Base.getRoots(); + } + + BasicBlock *getRoot() const { return Base.getRoot(); } + + bool isPostDominator() const { return Base.isPostDominator(); } + iterator begin() { return Base.begin(); } + const_iterator begin() const { return Base.begin(); } + + iterator end() { return Base.end(); } + + const_iterator end() const { return Base.end(); } + + iterator find(BasicBlock *B) { return Base.find(B); } + + const_iterator find(BasicBlock *B) const { return Base.find(B); } + + iterator addBasicBlock(BasicBlock *BB, const DomSetType &frontier) { + return Base.addBasicBlock(BB, frontier); + } + + void removeBlock(BasicBlock *BB) { return Base.removeBlock(BB); } + + void addToFrontier(iterator I, BasicBlock *Node) { + return Base.addToFrontier(I, Node); + } + + void removeFromFrontier(iterator I, BasicBlock *Node) { + return Base.removeFromFrontier(I, Node); + } + + bool compareDomSet(DomSetType &DS1, const DomSetType &DS2) const { + return Base.compareDomSet(DS1, DS2); + } + + bool compare(DominanceFrontierBase &Other) const { + return Base.compare(Other); + } + + void releaseMemory() override; + + bool runOnFunction(Function &) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; +FunctionPass *createPostDomFrontier(); +EXTERN_TEMPLATE_INSTANTIATION(class DominanceFrontierBase); +EXTERN_TEMPLATE_INSTANTIATION(class ForwardDominanceFrontierBase); } // End llvm namespace #endif Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -226,6 +226,7 @@ void initializePostDomOnlyViewerPass(PassRegistry&); void initializePostDomPrinterPass(PassRegistry&); void initializePostDomViewerPass(PassRegistry&); +void initializePostDominanceFrontierPass(PassRegistry &); void initializePostDominatorTreePass(PassRegistry&); void initializePostRASchedulerPass(PassRegistry&); void initializePostMachineSchedulerPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -150,6 +150,7 @@ (void) llvm::createMemCpyOptPass(); (void) llvm::createLoopDeletionPass(); (void) llvm::createPostDomTree(); + (void)llvm::createPostDomFrontier(); (void) llvm::createInstructionNamerPass(); (void) llvm::createMetaRenamerPass(); (void) llvm::createFunctionAttrsPass(); Index: lib/Analysis/Analysis.cpp =================================================================== --- lib/Analysis/Analysis.cpp +++ lib/Analysis/Analysis.cpp @@ -59,6 +59,7 @@ initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); + initializePostDominanceFrontierPass(Registry); initializeRegionInfoPassPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); Index: lib/Analysis/PostDominators.cpp =================================================================== --- lib/Analysis/PostDominators.cpp +++ lib/Analysis/PostDominators.cpp @@ -27,6 +27,7 @@ //===----------------------------------------------------------------------===// char PostDominatorTree::ID = 0; +char PostDominanceFrontier::ID = 0; INITIALIZE_PASS(PostDominatorTree, "postdomtree", "Post-Dominator Tree Construction", true, true) @@ -48,3 +49,33 @@ return new PostDominatorTree(); } +//===----------------------------------------------------------------------===// +// PostDominanceFrontier Implementation +//===----------------------------------------------------------------------===// + +INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier", + "Post-Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier", + "Post-Dominance Frontier Construction", true, true) + +FunctionPass *llvm::createPostDomFrontier() { + return new PostDominanceFrontier(); +} + +PostDominanceFrontier::PostDominanceFrontier() : FunctionPass(ID), Base() { + initializeDominanceFrontierPass(*PassRegistry::getPassRegistry()); +} + +void PostDominanceFrontier::releaseMemory() { Base.releaseMemory(); } + +bool PostDominanceFrontier::runOnFunction(Function &) { + releaseMemory(); + Base.analyze(getAnalysis()); + return false; +} + +void PostDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -36,6 +36,15 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/CFG.h" +#include + using namespace llvm; #define DEBUG_TYPE "dse" @@ -48,10 +57,16 @@ AliasAnalysis *AA; MemoryDependenceAnalysis *MD; DominatorTree *DT; + PostDominanceFrontier *PDF; const TargetLibraryInfo *TLI; - + SmallVector, 32> + BackEdges; + std::map, bool> + BackEdgesMap; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) { + DSE() + : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr), + PDF(nullptr) { initializeDSEPass(*PassRegistry::getPassRegistry()); } @@ -63,8 +78,13 @@ MD = &getAnalysis(); DT = &getAnalysis().getDomTree(); TLI = AA->getTargetLibraryInfo(); + PDF = &getAnalysis(); bool Changed = false; + FindFunctionBackedges(F, BackEdges); + for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) + BackEdgesMap[BackEdges[i]] = true; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. @@ -82,14 +102,23 @@ SmallSetVector &DeadStackObjects, const DataLayout &DL); + void handleNonLocalStoreDeletion(StoreInst *SI); + StoreInst *getStoreCandidates(BasicBlock *B, StoreInst *SI); + bool dfsCandidateBlock(BasicBlock *SrcBlock, BasicBlock *SinkBlock, + StoreInst *SI); + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } }; } @@ -98,6 +127,8 @@ INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(PostDominanceFrontier) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false) @@ -476,7 +507,6 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { bool MadeChange = false; - // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { Instruction *Inst = BBI++; @@ -493,10 +523,14 @@ MemDepResult InstDep = MD->getDependency(Inst); - // Ignore any store where we can't find a local dependence. - // FIXME: cross-block DSE would be fun. :) - if (!InstDep.isDef() && !InstDep.isClobber()) + if (!InstDep.isDef() && !InstDep.isClobber() && !InstDep.isNonLocal()) continue; + if (InstDep.isNonLocal()) { + if (StoreInst *SI = dyn_cast(Inst)) { + handleNonLocalStoreDeletion(SI); + } + continue; + } // If we're storing the same value back to a pointer that we just // loaded from, then the store can be removed. @@ -618,8 +652,9 @@ // If this block ends in a return, unwind, or unreachable, all allocas are // dead at its end, which means stores to them are also dead. - if (BB.getTerminator()->getNumSuccessors() == 0) - MadeChange |= handleEndBlock(BB); + if (TerminatorInst *TI = BB.getTerminator()) + if (TI->getNumSuccessors() == 0) + MadeChange |= handleEndBlock(BB); return MadeChange; } @@ -869,3 +904,117 @@ return !AA->isNoAlias(StackLoc, LoadedLoc); }); } + +// Check all paths from the SrcBlock till SinkBlock to see if Store SI is +// safe to be remove. +// Returns true if the candidate store SI is safe to delete else returns false +bool DSE::dfsCandidateBlock(BasicBlock *SrcBlock, BasicBlock *SinkBlock, + StoreInst *SI) { + std::stack Stck; + std::map Visited; + Value *Pointer = SI->getPointerOperand(); + uint64_t PtrSize = AA->getTypeStoreSize(Pointer->getType()); + for (succ_iterator I = succ_begin(SrcBlock), E = succ_end(SrcBlock); I != E; + ++I) { + if (!Visited[*I]) { + Stck.push(*I); + Visited[*I] = true; + } + } + while (!Stck.empty()) { + BasicBlock *B = Stck.top(); + Stck.pop(); + auto BI = B->begin(); + auto BE = B->end(); + for (; BI != BE; ++BI) { + Instruction *I = BI; + AliasAnalysis::ModRefResult Res = AA->getModRefInfo(I, Pointer, PtrSize); + if (Res != AliasAnalysis::NoModRef) { + if (StoreInst *CSI = dyn_cast(I)) { + Value *MemPtr = CSI->getPointerOperand(); + uint64_t MemSize = AA->getTypeStoreSize(MemPtr->getType()); + AliasResult R = AA->alias(Pointer, PtrSize, MemPtr, MemSize); + if (R == MustAlias) + break; + else + continue; + } + return false; + } + } + if (B == SinkBlock || BI != BE) + continue; + for (succ_iterator I = succ_begin(B), E = succ_end(B); I != E; ++I) { + if (!Visited[*I]) { + Stck.push(*I); + Visited[*I] = true; + } + } + } + return true; +} + +// Given a Store instruction SI find it Block B has potential candidate store +// for deletion. +StoreInst *DSE::getStoreCandidates(BasicBlock *B, StoreInst *SI) { + Value *Pointer = SI->getPointerOperand(); + uint64_t PtrSize = AA->getTypeStoreSize(Pointer->getType()); + for (auto BI = B->end(), BE = B->begin(); BI != BE;) { + Instruction *I = --BI; + AliasAnalysis::ModRefResult Res = AA->getModRefInfo(I, Pointer, PtrSize); + if (Res != AliasAnalysis::NoModRef) { + if (StoreInst *SI = dyn_cast(I)) { + Value *MemPtr = SI->getPointerOperand(); + uint64_t MemSize = AA->getTypeStoreSize(MemPtr->getType()); + AliasResult R = AA->alias(Pointer, PtrSize, MemPtr, MemSize); + if (R == MustAlias) { + return SI; + } else if (R == NoAlias) { + continue; + } + } + return nullptr; + } + } + return nullptr; +} + +// Function to handle non-local dead stores. +// This routine checks all predecessor blocks till post dom frontier to find +// potential candidate for dead store elimination. +void DSE::handleNonLocalStoreDeletion(StoreInst *SI) { + + BasicBlock *BB = SI->getParent(); + DominanceFrontier::const_iterator it = PDF->find(BB); + if (it == PDF->end()) + return; + + const DominanceFrontier::DomSetType &S = it->second; + std::map Visited; + std::stack BBStack; + BBStack.push(BB); + Visited[BB] = true; + while (!BBStack.empty()) { + BasicBlock *CurrBlock = BBStack.top(); + BBStack.pop(); + for (pred_iterator I = pred_begin(CurrBlock), E = pred_end(CurrBlock); + I != E; ++I) { + BasicBlock *B = *I; + if (Visited[B]) + continue; + if (S.find(B) != S.end()) + continue; + if (BackEdgesMap[std::make_pair(B, CurrBlock)]) + continue; + StoreInst *CandidateSI = getStoreCandidates(B, SI); + if (CandidateSI) { + bool IsSafe = dfsCandidateBlock(B, BB, SI); + if (!IsSafe) + continue; + DeleteDeadInstruction(CandidateSI, *MD, TLI); + } + BBStack.push(B); + Visited[B] = true; + } + } +} Index: test/Transforms/DeadStoreElimination/cross_block_dse.ll =================================================================== --- test/Transforms/DeadStoreElimination/cross_block_dse.ll +++ test/Transforms/DeadStoreElimination/cross_block_dse.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -basicaa -dse -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@x = common global i32 0 +@y = common global i32 0 + +define void @test_01(i32 %N) { + %1 = alloca i32 + store i32 %N, i32* %1 + store i32 10, i32* @x + %2 = load i32, i32* %1 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +;