Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -103,7 +103,8 @@ void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); void initializeDCELegacyPassPass(PassRegistry&); -void initializeDSELegacyPassPass(PassRegistry&); +void initializeDSELegacyMemDepPass(PassRegistry&); +void initializeDSELegacyMSSAPass(PassRegistry&); void initializeDataFlowSanitizerPass(PassRegistry&); void initializeDeadInstEliminationPass(PassRegistry&); void initializeDeadMachineInstructionElimPass(PassRegistry&); Index: include/llvm/Transforms/Scalar/DeadStoreElimination.h =================================================================== --- include/llvm/Transforms/Scalar/DeadStoreElimination.h +++ include/llvm/Transforms/Scalar/DeadStoreElimination.h @@ -25,7 +25,7 @@ /// This class implements a trivial dead store elimination. We consider /// only the redundant stores that are local to a single Basic Block. -class DSEPass : public PassInfoMixin { +template class DSEPass : public PassInfoMixin> { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -364,7 +364,8 @@ // redo DCE, etc. FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(DSEPass()); + FPM.addPass(DSEPass()); + FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); // Finally, do an expensive DCE pass to catch all the dead code exposed by @@ -679,7 +680,8 @@ MainFPM.addPass(MemCpyOptPass()); // Nuke dead stores. - MainFPM.addPass(DSEPass()); + MainFPM.addPass(DSEPass()); + MainFPM.addPass(DSEPass()); // FIXME: at this point, we run a bunch of loop passes: // indVarSimplify, loopDeletion, loopInterchange, loopUnrool, Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -141,7 +141,8 @@ FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) -FUNCTION_PASS("dse", DSEPass()) +FUNCTION_PASS("dse", DSEPass()) +FUNCTION_PASS("dsem", DSEPass()) FUNCTION_PASS("dot-cfg", CFGPrinterPass()) FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass()) FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false)) Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -24,6 +25,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -40,6 +42,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemorySSA.h" #include using namespace llvm; @@ -54,6 +57,9 @@ "enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE")); +static cl::opt EnableMSSA("use-dse-mssa", cl::init(false), cl::Hidden, + cl::desc("Use the new MemorySSA-backed DSE.")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1185,10 +1191,425 @@ return MadeChange; } +struct WalkResult { + enum { + NextDef, + NextPhi, + KilledByUse, + SplitPoint, + ReachedEnd, + } State; + MemoryAccess *MA; +}; + +class DSEWalker { + Function *F; + + unsigned InstNum; + DenseMap InstNums; + // ^ Post-order numbering of MemoryPhis and instructions, a) that throw, or b) + // are DSE candidates. Used to detect intervening MayThrows or loop latches + SmallVector MayThrows; + SmallPtrSet Returns; + // ^ Values returned by F. + DenseSet NonEscapes; + // ^ Args and insts that don't escape on unwind of F. + InstOverlapIntervalsTy IOL; + // ^ isOverwrite needs this. +public: + SmallVector Stores; + // ^ Stores to visit, post-ordered + +private: + AliasAnalysis *AA; + MemorySSA *MSSA; + const PostDominatorTree *PDT; + const TargetLibraryInfo *TLI; + + unsigned addPO(Instruction &I) { + InstNums[&I] = InstNum++; + DEBUG(dbgs() << "PO: " << I << ", " << InstNums[&I] << "\n"); + return InstNums[&I]; + } + + unsigned addPO(MemoryPhi &P) { + InstNums[&P] = InstNum++; + DEBUG(dbgs() << "PO: " << P << ", " << InstNums[&P] << "\n"); + return InstNums[&P]; + } + + bool nonEscapingOnUnwind(Instruction &I) { + return isa(&I) || + (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, false, true)); + } + + MemoryLocation getWriteLoc(Instruction *I) const { + MemoryLocation Loc = getLocForWrite(I, *AA); + if (!Loc.Ptr) { + if (auto CS = CallSite(I)) { + if (Function *F = CS.getCalledFunction()) { + StringRef FnName = F->getName(); + if ((TLI->has(LibFunc_strcpy) && + FnName == TLI->getName(LibFunc_strcpy)) || + (TLI->has(LibFunc_strncpy) && + FnName == TLI->getName(LibFunc_strncpy)) || + (TLI->has(LibFunc_strcat) && + FnName == TLI->getName(LibFunc_strcat)) || + (TLI->has(LibFunc_strncat) && + FnName == TLI->getName(LibFunc_strncat))) + return MemoryLocation::getForArgument(ImmutableCallSite(I), 0, + *TLI); + } + } + } + return Loc; + } + + // For free and lifetime_end, the size of MemLoc doesn't matter. + Value *getLifetimeEndish(Instruction &I) { + const DataLayout &DL = F->getParent()->getDataLayout(); + if (isFreeCall(&I, TLI)) + return GetUnderlyingObject(cast(&I)->getArgOperand(0), DL); + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_end) + // TODO: this isn't entirely correct, since lifetime_end contains a + // size param. + return GetUnderlyingObject(cast(&I)->getArgOperand(1), DL); + + return nullptr; + } + +public: + // Represents a DSE candidate and its cached escape info. + struct Candidate { + MemoryDef *D; + MemoryLocation Loc; + const Value *Und; + bool Returned; + // ^ Is this candidate's memory location returned by the function? + bool Escapes; + // ^ If execution in the function unwinds after this candidate writes, will + // its memory location escapes? + }; + + void deleteDead(MemoryDef &D) { + Instruction &I = *D.getMemoryInst(); + DEBUG(dbgs() << "DSE-ing:\n\t" << D << "\n\t" << I << "\n"); + + SmallVector DeadPool(I.value_op_begin(), I.value_op_end()); + DenseSet Deleted; // Prevents double deletion. + MSSA->removeMemoryAccess(&D); + I.eraseFromParent(); + IOL.erase(&I); + + while (!DeadPool.empty()) { + auto *Cand = dyn_cast(DeadPool.pop_back_val()); + if (Cand && !Deleted.count(Cand) && + // Check if safe to delete, accounting for possible volatile or + // atomic. + isInstructionTriviallyDead(Cand, TLI)) { + DeadPool.insert(DeadPool.end(), Cand->value_op_begin(), + Cand->value_op_end()); + if (MemoryAccess *MA = MSSA->getMemoryAccess(Cand)) + MSSA->removeMemoryAccess(MA); + Cand->eraseFromParent(); + IOL.erase(Cand); + Deleted.insert(Cand); + ++NumFastOther; + } + } + } + + // TODO: this fails to catch the following case: + // + // p = load x; + // while (...) { store p, x; store _, x; ... } + // + // memoryIsNotModifiedBetween is able to detect this. + bool isNoop(const MemoryDef &D) { + if (auto *SI = dyn_cast(D.getMemoryInst())) { + MemoryAccess *Clob = MSSA->getWalker()->getClobberingMemoryAccess(SI); + if (auto *LI = dyn_cast(SI->getValueOperand())) { + if (auto *U = dyn_cast(MSSA->getMemoryAccess(LI))) + // If LI is a MemoryUse, then MSSA guarantees that its defining access + // is accurate. + return U->getDefiningAccess() == Clob && + AA->isMustAlias(MemoryLocation::get(LI), + MemoryLocation::get(SI)); + // Otherwise, LI is an abnormal load, i.e. atomic and/or volatile. + return Clob == MSSA->getMemoryAccess(LI) && + AA->isMustAlias(MemoryLocation::get(LI), + MemoryLocation::get(SI)); + } else if (auto *MUD = dyn_cast(Clob)) { + // Storing zero to calloc-ed memory counts as no-op. + if (!MSSA->isLiveOnEntryDef(MUD)) { + Constant *C; + return (C = dyn_cast(SI->getValueOperand())) && + C->isNullValue() && isCallocLikeFn(MUD->getMemoryInst(), TLI); + } + } + } + // TODO: Can also handle memmove(a <- a) and memcpy(a <- a), though the + // latter is technically UB. + return false; + } + + unsigned po(Instruction &I) { + assert(InstNums.find(&I) != InstNums.end() && "Unindexed instruction."); + return InstNums.find(&I)->second; + } + + unsigned po(MemoryPhi &P) { + assert(InstNums.find(&P) != InstNums.end() && "Unindexed MemoryPhi."); + return InstNums.find(&P)->second; + } + + bool throwInRange(unsigned Earlier, unsigned Later) const { + DEBUG(dbgs() << "checking for throws between " << Earlier << " and " + << Later << "\n"); + assert(Earlier >= Later && + "Larger number == later in post-order == earlier in rpo."); + return std::upper_bound(MayThrows.begin(), MayThrows.end(), Earlier) != + std::lower_bound(MayThrows.begin(), MayThrows.end(), Later); + } + + // Could Def prevent the DSE of a candidate store to Loc (which necessarily + // isn't volatile or atomic)? Example reasons for a yes: + // - Def's atomicity is stronger than monotonic + // - Def itself uses Loc, e.g., memcpy(... <- Loc) + bool isDSEBarrier(MemoryDef &D, const Candidate &Cand) { + Instruction *I = D.getMemoryInst(); + + if (getLifetimeEndish(*I)) + return false; + + if (I->isAtomic()) { + auto F = [](AtomicOrdering A) { + return A == AtomicOrdering::Monotonic || A == AtomicOrdering::Unordered; + }; + if (auto *LI = dyn_cast(I)) + // Compensate for AA's skittishness around atomics. + return !(F(LI->getOrdering()) && + AA->isNoAlias(MemoryLocation::get(LI), Cand.Loc)); + else if (auto *SI = dyn_cast(I)) + return !F(SI->getOrdering()); + else if (isa(I) && !Cand.Escapes && !Cand.Returned) + // From the old DSE: "Fences only constraints the ordering of already + // visible stores, it does not make a store visible to other threads. + // So, skipping over a fence does not change a store from being dead." + return false; + } + + return AA->getModRefInfo(I, Cand.Loc) & MRI_Ref; + } + + // Given the current walk location DefOrPhi, attempt to move downwards to the + // next MemoryDef that could DSE Cand. + WalkResult walkNext(MemoryAccess *DefOrPhi, const Candidate &Cand) { + DEBUG(dbgs() << "descending past " << *DefOrPhi << "\n"); + WalkResult Res = {WalkResult::ReachedEnd, nullptr}; + + // Ensure that the uselist 1) doesn't MRI_Ref Cand.Loc, and 2) contains at + // most one MemoryDef or MemoryPhi. + for (Use &U : DefOrPhi->uses()) { + if (auto *Phi = dyn_cast(U.getUser())) { + unsigned EarlierNum; + if (auto *D = dyn_cast(DefOrPhi)) + EarlierNum = InstNums.find(D->getMemoryInst())->second; + else + EarlierNum = InstNums.find(DefOrPhi)->second; + + DEBUG(dbgs() << "found phi: " << *Phi << ", " + << InstNums.find(Phi)->second << ", " << EarlierNum + << "\n"); + + if (Res.MA || InstNums.find(Phi)->second > EarlierNum) + // More than one MemoryDef or phi in the uselist implies a split point + // in the MSSA graph. A Phi with a lower (higher) RPO (PO) number + // means that we've encountered a loop latch. + return {WalkResult::SplitPoint, nullptr}; + Res = {WalkResult::NextPhi, Phi}; + } else if (auto *Load = dyn_cast(U.getUser())) { + if (AA->getModRefInfo(Load->getMemoryInst(), Cand.Loc) & MRI_Ref) { + // For a pair of stores to DSE, there can't be any intervening uses of + // the stored-to memory. + DEBUG(dbgs() << "used by " << *Load << "\n"); + return {WalkResult::KilledByUse, Load}; + } + } else if (auto *D = dyn_cast(U.getUser())) { + if (isDSEBarrier(*D, Cand)) { + DEBUG(dbgs() << "used by " << *D << "\n"); + return {WalkResult::KilledByUse, D}; + } else if (Res.MA) + return {WalkResult::SplitPoint, nullptr}; + Res = {WalkResult::NextDef, D}; + } else + llvm_unreachable("Unexpected MemorySSA node type."); + } + + if (Res.State == WalkResult::ReachedEnd) + Res.MA = DefOrPhi; + return Res; + } + + Candidate makeCand(MemoryDef &D) const { + assert(hasMemoryWrite(D.getMemoryInst(), *TLI) && + "DSE candidates must write to an analyzable memory location."); + const DataLayout &DL = F->getParent()->getDataLayout(); + MemoryLocation Loc = getWriteLoc(D.getMemoryInst()); + assert(Loc.Ptr && "Expected a Loc!"); + const Value *Und = GetUnderlyingObject(Loc.Ptr, DL); + bool Returned = Returns.count(Und); + DEBUG(dbgs() << "is " << D << " returned? " << Returned << "\n"); + bool Escapes = + !(NonEscapes.count(Und) || ([&]() { + SmallVector Unds; + GetUnderlyingObjects(const_cast(Und), Unds, DL); + return all_of(Unds, [&](Value *V) { return NonEscapes.count(V); }); + })()); + DEBUG(dbgs() << "does " << D << " escape? " << Escapes << "\n"); + return {&D, Loc, Und, Returned, Escapes}; + } + + bool canDSE(const Candidate &Earlier, const MemoryDef &Later, + bool NonLocal = false) { + DEBUG(dbgs() << "can dse " << Later << "?\n"); + Instruction &EarlierI = *Earlier.D->getMemoryInst(); + Instruction &LaterI = *Later.getMemoryInst(); + + const DataLayout &DL = F->getParent()->getDataLayout(); + if (Value *Ptr = getLifetimeEndish(LaterI)) { + // TODO: Possibly avoid repeated calls to GetUnderlyingObject through + // getLifetimeEndish. + DEBUG(dbgs() << "checking alias with free:\n\t" << *Earlier.Und << "\n\t" + << *Ptr << "\n"); + return AA->isMustAlias(Earlier.Und, Ptr); + } + + MemoryLocation LLoc = getWriteLoc(&LaterI); + if (!LLoc.Ptr || + (NonLocal && + !PDT->dominates(Later.getBlock(), Earlier.D->getBlock())) || + (Earlier.Escapes && throwInRange(po(EarlierI), po(LaterI)))) + return false; + + int64_t EOff, LOff; + OverwriteResult O = + isOverwrite(LLoc, Earlier.Loc, DL, *TLI, EOff, LOff, &EarlierI, IOL); + DEBUG(dbgs() << "Overwrite: " << O << "\n"); + return O == OverwriteComplete; + } + + DSEWalker(Function &F_, AliasAnalysis &AA_, MemorySSA &MSSA_, + const PostDominatorTree &PDT_, const TargetLibraryInfo &TLI_) + : F(&F_), InstNum(0), AA(&AA_), MSSA(&MSSA_), PDT(&PDT_), TLI(&TLI_) { + // Record non-escaping args. + for (Argument &Arg : F->args()) + if (Arg.hasByValOrInAllocaAttr()) + NonEscapes.insert(&Arg); + + // Number instructions of interest by post-order + for (BasicBlock *BB : post_order(F)) { + for (Instruction &I : reverse(*BB)) { + if (I.mayThrow()) { + MayThrows.push_back(addPO(I)); + } else if (auto *Def = + dyn_cast_or_null(MSSA->getMemoryAccess(&I))) { + addPO(I); + // Only consider true DSE candidates. Volatile/atomic stores don't + // qualify, for instance. + if (hasMemoryWrite(&I, *TLI) && isRemovable(&I)) { + DEBUG(dbgs() << "candidate: " << I << "\n"); + Stores.push_back(Def); + } + } + + if (nonEscapingOnUnwind(I)) { + DEBUG(dbgs() << "found non-escaping mem: " << I << "\n"); + NonEscapes.insert(&I); + } + } + + if (MemoryPhi *Phi = MSSA->getMemoryAccess(BB)) + // Phis numbers are used to recognize loop latches. + addPO(*Phi); + + if (auto *RI = dyn_cast(BB->getTerminator())) { + const DataLayout &DL = F->getParent()->getDataLayout(); + // TODO: Possibly ignore unreachable blocks, which would require DT. + if (Value *RetVal = RI->getReturnValue()) + Returns.insert(GetUnderlyingObject(RetVal, DL)); + } + } + } +}; + +static bool eliminateDeadStoresMSSA(Function &F, AliasAnalysis &AA, + MemorySSA &MSSA, + const PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + DEBUG(MSSA.print(dbgs())); + DSEWalker Walker(F, AA, MSSA, PDT, TLI); + + bool Changed = false; + for (MemoryDef *D : Walker.Stores) { + DEBUG(dbgs() << "inspecting " << *D->getMemoryInst() << "\n"); + + if (Walker.isNoop(*D)) { + DEBUG(dbgs() << "deleting no-op store " << *D << "\n"); + Walker.deleteDead(*D); + Changed = true; + continue; + } + + DSEWalker::Candidate Cand = Walker.makeCand(*D); + WalkResult Res; + // Attempt to DSE within the same block because post-dom checks are limited + // to BasicBlock granularity. + bool next = false; + for (Res = Walker.walkNext(D, Cand); Res.State == WalkResult::NextDef && + Res.MA->getBlock() == D->getBlock(); + Res = Walker.walkNext(Res.MA, Cand)) { + if (Walker.canDSE(Cand, *cast(Res.MA), /* NonLocal */ false)) { + Walker.deleteDead(*D); + Changed = true; + next = true; + break; + } + } + + if (next) + continue; + + // Non-local search. + for (; Res.State <= WalkResult::NextPhi; + Res = Walker.walkNext(Res.MA, Cand)) { + if (auto *Def = dyn_cast(Res.MA)) { + if (Walker.canDSE(Cand, *Def, /* NonLocal */ true)) { + Walker.deleteDead(*D); + Changed = true; + break; + } + } + } + + DEBUG(dbgs() << "Finished walking. " << Res.State << "\n"); + if (Res.State == WalkResult::ReachedEnd && !Cand.Escapes && + !Cand.Returned) { + DEBUG(dbgs() << "Caught unused write to non-escaping memory.\n"); + Walker.deleteDead(*D); + Changed = true; + } + } + return Changed; +} + //===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// -PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { +template <> +PreservedAnalyses DSEPass::run(Function &F, + FunctionAnalysisManager &AM) { AliasAnalysis *AA = &AM.getResult(F); DominatorTree *DT = &AM.getResult(F); MemoryDependenceResults *MD = &AM.getResult(F); @@ -1204,54 +1625,100 @@ return PA; } +template <> +PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { + if (!eliminateDeadStoresMSSA(F, AM.getResult(F), + AM.getResult(F).getMSSA(), + AM.getResult(F), + AM.getResult(F))) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} + namespace { /// A legacy pass for the legacy pass manager that wraps \c DSEPass. -class DSELegacyPass : public FunctionPass { +template class DSELegacyPass : public FunctionPass { public: DSELegacyPass() : FunctionPass(ID) { - initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); + if (UseMSSA) + initializeDSELegacyMSSAPass(*PassRegistry::getPassRegistry()); + else + initializeDSELegacyMemDepPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - DominatorTree *DT = &getAnalysis().getDomTree(); AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); const TargetLibraryInfo *TLI = &getAnalysis().getTLI(); - - return eliminateDeadStores(F, AA, MD, DT, TLI); + if (UseMSSA) { + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); + MemorySSA &MSSA = getAnalysis().getMSSA(); + return eliminateDeadStoresMSSA(F, *AA, MSSA, PDT, *TLI); + } else { + DominatorTree *DT = &getAnalysis().getDomTree(); + MemoryDependenceResults *MD = + &getAnalysis().getMemDep(); + return eliminateDeadStores(F, AA, MD, DT, TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + + if (UseMSSA) { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } } static char ID; // Pass identification, replacement for typeid }; } // end anonymous namespace -char DSELegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, +using DSELegacyMemDep = DSELegacyPass; +template <> char DSELegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(DSELegacyMemDep, "dsemd", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, +INITIALIZE_PASS_END(DSELegacyMemDep, "dsemd", "Dead Store Elimination", false, false) +using DSELegacyMSSA = DSELegacyPass; +template <> char DSELegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(DSELegacyMSSA, "dse", + "Dead Store Elimination (Memory SSA)", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(DSELegacyMSSA, "dse", "Dead Store Elimination (Memory SSA)", + false, false) + FunctionPass *llvm::createDeadStoreEliminationPass() { - return new DSELegacyPass(); + if (EnableMSSA) + return new DSELegacyPass(); + return new DSELegacyPass(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -40,7 +40,8 @@ initializeDCELegacyPassPass(Registry); initializeDeadInstEliminationPass(Registry); initializeScalarizerPass(Registry); - initializeDSELegacyPassPass(Registry); + initializeDSELegacyMemDepPass(Registry); + initializeDSELegacyMSSAPass(Registry); initializeGuardWideningLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeNewGVNPass(Registry); Index: test/Transforms/DeadStoreElimination/global-dse.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/global-dse.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -S | FileCheck %s + +define void @simple0(i8* %a) { +; CHECK-LABEL: @simple0( +; CHECK-NEXT: bb0: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i8 undef, i8* [[A:%.*]] +; CHECK-NEXT: ret void +; +bb0: + store i8 undef, i8* %a + br label %bb1 +bb1: + store i8 undef, i8* %a + ret void +} + +define void @simple1_postdom(i8* %a) { +; CHECK-LABEL: @simple1_postdom( +; CHECK-NEXT: bb0: +; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i8 undef, i8* [[A:%.*]] +; CHECK-NEXT: ret void +; +bb0: + br i1 undef, label %bb1, label %bb2 +bb1: + store i8 undef, i8* %a + br label %bb3 +bb2: + br label %bb3 +bb3: + store i8 undef, i8* %a + ret void +} + +define void @simple2_killed_by_load(i8* %a, i8* %b) { +; CHECK-LABEL: @simple2_killed_by_load( +; CHECK-NEXT: bb0: +; CHECK-NEXT: store i8 undef, i8* [[A:%.*]] +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[B:%.*]] +; CHECK-NEXT: store i8 undef, i8* [[A]] +; CHECK-NEXT: ret void +; +bb0: + store i8 undef, i8* %a + br label %bb1 +bb1: + %tmp = load i8, i8* %b + store i8 undef, i8* %a + ret void +} + +declare void @might_throw() + +define void @escaping_dse_blocked_by_maythrow(i8* %a) { +; CHECK-LABEL: @escaping_dse_blocked_by_maythrow( +; CHECK-NEXT: bb0: +; CHECK-NEXT: store i8 undef, i8* [[A:%.*]] +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @might_throw() +; CHECK-NEXT: store i8 undef, i8* [[A]] +; CHECK-NEXT: ret void +; +bb0: + store i8 undef, i8* %a + br label %bb1 +bb1: + call void @might_throw() + store i8 undef, i8* %a + ret void +} + +define i8 @but_nonescaping_can_dse() { +; CHECK-LABEL: @but_nonescaping_can_dse( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[A:%.*]] = alloca i8 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @might_throw() +; CHECK-NEXT: store i8 undef, i8* [[A]] +; CHECK-NEXT: [[RV:%.*]] = load i8, i8* [[A]] +; CHECK-NEXT: ret i8 [[RV]] +; +bb0: + %a = alloca i8 + store i8 undef, i8* %a + br label %bb1 +bb1: + call void @might_throw() + store i8 undef, i8* %a + %rv = load i8, i8* %a + ret i8 %rv +} + +define void @dse_nonescaping_nonreturned() { +; CHECK-LABEL: @dse_nonescaping_nonreturned( +; CHECK-NEXT: bb0: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @might_throw() +; CHECK-NEXT: ret void +; +bb0: + %a = alloca i8 + store i8 undef, i8* %a + br label %bb1 +bb1: + call void @might_throw() + store i8 undef, i8* %a + ret void +}