Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -105,7 +105,7 @@ void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); void initializeDCELegacyPassPass(PassRegistry&); -void initializeDSEPass(PassRegistry&); +void initializeDSELegacyPassPass(PassRegistry&); void initializeDeadInstEliminationPass(PassRegistry&); void initializeDeadMachineInstructionElimPass(PassRegistry&); void initializeDelinearizationPass(PassRegistry &); Index: include/llvm/Transforms/Scalar/DeadStoreElimination.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/DeadStoreElimination.h @@ -0,0 +1,34 @@ +//===- DeadStoreElimination.h - Fast Dead Store Elimination -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a trivial dead store elimination that only considers +// basic-block local redundant stores. +// +// FIXME: This should eventually be extended to be a post-dominator tree +// traversal. Doing so would be pretty trivial. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_DSE_H +#define LLVM_TRANSFORMS_SCALAR_DSE_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// This class implements a trivial dead store elimination. We consider +/// only the redundant stores that are local to a single Basic Block. +class DSEPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, AnalysisManager &FAM); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_DSE_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -64,6 +64,7 @@ #include "llvm/Transforms/PGOInstrumentation.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/DCE.h" +#include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/LoopRotation.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -111,6 +111,7 @@ FUNCTION_PASS("aa-eval", AAEvaluator()) FUNCTION_PASS("adce", ADCEPass()) FUNCTION_PASS("dce", DCEPass()) +FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("early-cse", EarlyCSEPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass()) Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -15,7 +15,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -36,6 +36,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -45,80 +46,15 @@ STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); -namespace { - struct DSE : public FunctionPass { - AliasAnalysis *AA; - MemoryDependenceResults *MD; - DominatorTree *DT; - const TargetLibraryInfo *TLI; - - static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) { - initializeDSEPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - - AA = &getAnalysis().getAAResults(); - MD = &getAnalysis().getMemDep(); - DT = &getAnalysis().getDomTree(); - TLI = &getAnalysis().getTLI(); - - bool Changed = false; - for (BasicBlock &I : F) - // Only check non-dead blocks. Dead blocks may have strange pointer - // cycles that will confuse alias analysis. - if (DT->isReachableFromEntry(&I)) - Changed |= runOnBasicBlock(I); - - AA = nullptr; MD = nullptr; DT = nullptr; - return Changed; - } - - bool runOnBasicBlock(BasicBlock &BB); - bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI); - bool HandleFree(CallInst *F); - bool handleEndBlock(BasicBlock &BB); - void RemoveAccessedObjects(const MemoryLocation &LoadedLoc, - SmallSetVector &DeadStackObjects, - const DataLayout &DL); - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - } - }; -} - -char DSE::ID = 0; -INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false) - -FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through -/// and zero out all the operands of this instruction. If any of them become -/// dead, delete them and the computation tree that feeds them. -/// +/// Delete this instruction. Before we do, go through and zero out all the +/// operands of this instruction. If any of them become dead, delete them and +/// the computation tree that feeds them. /// If ValueSet is non-null, remove any deleted instructions from it as well. -/// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, @@ -156,9 +92,8 @@ } while (!NowDeadInsts.empty()); } - -/// hasMemoryWrite - Does this instruction write some memory? This only returns -/// true for things that we can analyze with other helpers below. +/// Does this instruction write some memory? This only returns true for things +/// that we can analyze with other helpers below. static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) { if (isa(I)) return true; @@ -197,9 +132,9 @@ return false; } -/// getLocForWrite - Return a Location stored to by the specified instruction. -/// If isRemovable returns true, this function and getLocForRead completely -/// describe the memory operations for this instruction. +/// Return a Location stored to by the specified instruction. If isRemovable +/// returns true, this function and getLocForRead completely describe the memory +/// operations for this instruction. static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { if (StoreInst *SI = dyn_cast(Inst)) return MemoryLocation::get(SI); @@ -228,8 +163,8 @@ } } -/// getLocForRead - Return the location read by the specified "hasMemoryWrite" -/// instruction if any. +/// Return the location read by the specified "hasMemoryWrite" instruction if +/// any. static MemoryLocation getLocForRead(Instruction *Inst, const TargetLibraryInfo &TLI) { assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case"); @@ -241,9 +176,8 @@ return MemoryLocation(); } - -/// isRemovable - If the value of this instruction and the memory it writes to -/// is unused, may we delete this instruction? +/// If the value of this instruction and the memory it writes to is unused, may +/// we delete this instruction? static bool isRemovable(Instruction *I) { // Don't remove volatile/atomic stores. if (StoreInst *SI = dyn_cast(I)) @@ -307,7 +241,7 @@ return II && II->getIntrinsicID() == Intrinsic::memset; } -/// getStoredPointerOperand - Return the pointer that is being written to. +/// Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand(); @@ -458,7 +392,7 @@ return OverwriteUnknown; } -/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a +/// If 'Inst' might be a self read (i.e. a noop copy of a /// memory region into an identical pointer) then it doesn't actually make its /// input dead in the traditional sense. Consider this case: /// @@ -503,212 +437,13 @@ } -//===----------------------------------------------------------------------===// -// DSE Pass -//===----------------------------------------------------------------------===// - -bool DSE::runOnBasicBlock(BasicBlock &BB) { - const DataLayout &DL = BB.getModule()->getDataLayout(); - bool MadeChange = false; - - // Do a top-down walk on the BB. - for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { - Instruction *Inst = &*BBI++; - - // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst, TLI)) { - MadeChange |= HandleFree(F); - continue; - } - - // If we find something that writes memory, get its memory dependence. - if (!hasMemoryWrite(Inst, *TLI)) - continue; - - // If we're storing the same value back to a pointer that we just - // loaded from, then the store can be removed. - if (StoreInst *SI = dyn_cast(Inst)) { - - auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) { - // DeleteDeadInstruction can delete the current instruction. Save BBI - // in case we need it. - WeakVH NextInst(&*BBI); - - DeleteDeadInstruction(DeadInst, *MD, *TLI); - - if (!NextInst) // Next instruction deleted. - BBI = BB.begin(); - else if (BBI != BB.begin()) // Revisit this instruction if possible. - --BBI; - ++NumRedundantStores; - MadeChange = true; - }; - - if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { - if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - isRemovable(SI) && - MemoryIsNotModifiedBetween(DepLoad, SI)) { - - DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " - << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); - - RemoveDeadInstAndUpdateBBI(SI); - continue; - } - } - - // Remove null stores into the calloc'ed objects - Constant *StoredConstant = dyn_cast(SI->getValueOperand()); - - if (StoredConstant && StoredConstant->isNullValue() && - isRemovable(SI)) { - Instruction *UnderlyingPointer = dyn_cast( - GetUnderlyingObject(SI->getPointerOperand(), DL)); - - if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && - MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) { - DEBUG(dbgs() - << "DSE: Remove null store to the calloc'ed object:\n DEAD: " - << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); - - RemoveDeadInstAndUpdateBBI(SI); - continue; - } - } - } - - MemDepResult InstDep = MD->getDependency(Inst); - - // Ignore any store where we can't find a local dependence. - // FIXME: cross-block DSE would be fun. :) - if (!InstDep.isDef() && !InstDep.isClobber()) - continue; - - // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst, *AA); - - // If we didn't get a useful location, fail. - if (!Loc.Ptr) - continue; - - while (InstDep.isDef() || InstDep.isClobber()) { - // Get the memory clobbered by the instruction we depend on. MemDep will - // skip any instructions that 'Loc' clearly doesn't interact with. If we - // end up depending on a may- or must-aliased load, then we can't optimize - // away the store and we bail out. However, if we depend on on something - // that overwrites the memory location we *can* potentially optimize it. - // - // Find out what memory location the dependent instruction stores. - Instruction *DepWrite = InstDep.getInst(); - MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); - // If we didn't get a useful location, or if it isn't a size, bail out. - if (!DepLoc.Ptr) - break; - - // If we find a write that is a) removable (i.e., non-volatile), b) is - // completely obliterated by the store to 'Loc', and c) which we know that - // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && - !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = - isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); - if (OR == OverwriteComplete) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " - << *DepWrite << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, *TLI); - ++NumFastStores; - MadeChange = true; - - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst->getIterator(); - if (BBI != BB.begin()) - --BBI; - break; - } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) || - ((OR == OverwriteBegin && - isShortenableAtTheBeginning(DepWrite)))) { - // TODO: base this on the target vector size so that if the earlier - // store was too small to get vector writes anyway then its likely - // a good idea to shorten it - // Power of 2 vector writes are probably always a bad idea to optimize - // as any store/memset/memcpy is likely using vector instructions so - // shortening it to not vector size is likely to be slower - MemIntrinsic *DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); - bool IsOverwriteEnd = (OR == OverwriteEnd); - if (!IsOverwriteEnd) - InstWriteOffset = int64_t(InstWriteOffset + Loc.Size); - - if ((llvm::isPowerOf2_64(InstWriteOffset) && - DepWriteAlign <= InstWriteOffset) || - ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " - << (IsOverwriteEnd ? "END" : "BEGIN") << ": " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " << DepLoc.Size << ")" - << *Inst << '\n'); - - int64_t NewLength = - IsOverwriteEnd - ? InstWriteOffset - DepWriteOffset - : DepLoc.Size - (InstWriteOffset - DepWriteOffset); - - Value *DepWriteLength = DepIntrinsic->getLength(); - Value *TrimmedLength = - ConstantInt::get(DepWriteLength->getType(), NewLength); - DepIntrinsic->setLength(TrimmedLength); - - if (!IsOverwriteEnd) { - int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset); - Value *Indices[1] = { - ConstantInt::get(DepWriteLength->getType(), OffsetMoved)}; - GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( - DepIntrinsic->getRawDest(), Indices, "", DepWrite); - DepIntrinsic->setDest(NewDestGEP); - } - MadeChange = true; - } - } - } - - // If this is a may-aliased store that is clobbering the store value, we - // can keep searching past it for another must-aliased pointer that stores - // to the same location. For example, in: - // store -> P - // store -> Q - // store -> P - // we can remove the first store to P even though we don't know if P and Q - // alias. - if (DepWrite == &BB.front()) break; - - // Can't look past this instruction if it might read 'Loc'. - if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) - break; - - InstDep = MD->getPointerDependencyFrom(Loc, false, - DepWrite->getIterator(), &BB); - } - } - - // If this block ends in a return, unwind, or unreachable, all allocas are - // dead at its end, which means stores to them are also dead. - if (BB.getTerminator()->getNumSuccessors() == 0) - MadeChange |= handleEndBlock(BB); - - return MadeChange; -} - /// Returns true if the memory which is accessed by the second instruction is not /// modified between the first and the second instruction. /// Precondition: Second instruction must be dominated by the first /// instruction. -bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI, - Instruction *SecondI) { +static bool MemoryIsNotModifiedBetween(Instruction *FirstI, + Instruction *SecondI, + AliasAnalysis *AA) { SmallVector WorkList; SmallPtrSet Visited; BasicBlock::iterator FirstBBI(FirstI); @@ -777,9 +512,11 @@ } } -/// HandleFree - Handle frees of entire structures whose dependency is a store +/// Handle frees of entire structures whose dependency is a store /// to a field of that structure. -bool DSE::HandleFree(CallInst *F) { +static bool HandleFree(CallInst *F, AliasAnalysis *AA, + MemoryDependenceResults *MD, DominatorTree *DT, + const TargetLibraryInfo *TLI) { bool MadeChange = false; MemoryLocation Loc = MemoryLocation(F->getOperand(0)); @@ -828,13 +565,43 @@ return MadeChange; } -/// handleEndBlock - Remove dead stores to stack-allocated locations in the -/// function end block. Ex: +/// Check to see if the specified location may alias any of the stack objects in +/// the DeadStackObjects set. If so, they become live because the location is +/// being loaded. +static void RemoveAccessedObjects(const MemoryLocation &LoadedLoc, + SmallSetVector &DeadStackObjects, + const DataLayout &DL, AliasAnalysis *AA, + const TargetLibraryInfo *TLI) { + const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); + + // A constant can't be in the dead pointer set. + if (isa(UnderlyingPointer)) + return; + + // If the kill pointer can be easily reduced to an alloca, don't bother doing + // extraneous AA queries. + if (isa(UnderlyingPointer) || isa(UnderlyingPointer)) { + DeadStackObjects.remove(const_cast(UnderlyingPointer)); + return; + } + + // Remove objects that could alias LoadedLoc. + DeadStackObjects.remove_if([&](Value *I) { + // See if the loaded location could alias the stack location. + MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI)); + return !AA->isNoAlias(StackLoc, LoadedLoc); + }); +} + +/// Remove dead stores to stack-allocated locations in the function end block. +/// Ex: /// %A = alloca i32 /// ... /// store i32 1, i32* %A /// ret void -bool DSE::handleEndBlock(BasicBlock &BB) { +static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, + MemoryDependenceResults *MD, + const TargetLibraryInfo *TLI) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the @@ -967,7 +734,7 @@ // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. - RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL); + RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. @@ -978,29 +745,275 @@ return MadeChange; } -/// RemoveAccessedObjects - Check to see if the specified location may alias any -/// of the stack objects in the DeadStackObjects set. If so, they become live -/// because the location is being loaded. -void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc, - SmallSetVector &DeadStackObjects, - const DataLayout &DL) { - const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); +static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, + MemoryDependenceResults *MD, DominatorTree *DT, + const TargetLibraryInfo *TLI) { + const DataLayout &DL = BB.getModule()->getDataLayout(); + bool MadeChange = false; - // A constant can't be in the dead pointer set. - if (isa(UnderlyingPointer)) - return; + // Do a top-down walk on the BB. + for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { + Instruction *Inst = &*BBI++; - // If the kill pointer can be easily reduced to an alloca, don't bother doing - // extraneous AA queries. - if (isa(UnderlyingPointer) || isa(UnderlyingPointer)) { - DeadStackObjects.remove(const_cast(UnderlyingPointer)); - return; + // Handle 'free' calls specially. + if (CallInst *F = isFreeCall(Inst, TLI)) { + MadeChange |= HandleFree(F, AA, MD, DT, TLI); + continue; + } + + // If we find something that writes memory, get its memory dependence. + if (!hasMemoryWrite(Inst, *TLI)) + continue; + + // If we're storing the same value back to a pointer that we just + // loaded from, then the store can be removed. + if (StoreInst *SI = dyn_cast(Inst)) { + + auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) { + // DeleteDeadInstruction can delete the current instruction. Save BBI + // in case we need it. + WeakVH NextInst(&*BBI); + + DeleteDeadInstruction(DeadInst, *MD, *TLI); + + if (!NextInst) // Next instruction deleted. + BBI = BB.begin(); + else if (BBI != BB.begin()) // Revisit this instruction if possible. + --BBI; + ++NumRedundantStores; + MadeChange = true; + }; + + if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + isRemovable(SI) && + MemoryIsNotModifiedBetween(DepLoad, SI, AA)) { + + DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " + << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); + + RemoveDeadInstAndUpdateBBI(SI); + continue; + } + } + + // Remove null stores into the calloc'ed objects + Constant *StoredConstant = dyn_cast(SI->getValueOperand()); + + if (StoredConstant && StoredConstant->isNullValue() && + isRemovable(SI)) { + Instruction *UnderlyingPointer = dyn_cast( + GetUnderlyingObject(SI->getPointerOperand(), DL)); + + if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && + MemoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) { + DEBUG(dbgs() + << "DSE: Remove null store to the calloc'ed object:\n DEAD: " + << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); + + RemoveDeadInstAndUpdateBBI(SI); + continue; + } + } + } + + MemDepResult InstDep = MD->getDependency(Inst); + + // Ignore any store where we can't find a local dependence. + // FIXME: cross-block DSE would be fun. :) + if (!InstDep.isDef() && !InstDep.isClobber()) + continue; + + // Figure out what location is being stored to. + MemoryLocation Loc = getLocForWrite(Inst, *AA); + + // If we didn't get a useful location, fail. + if (!Loc.Ptr) + continue; + + while (InstDep.isDef() || InstDep.isClobber()) { + // Get the memory clobbered by the instruction we depend on. MemDep will + // skip any instructions that 'Loc' clearly doesn't interact with. If we + // end up depending on a may- or must-aliased load, then we can't optimize + // away the store and we bail out. However, if we depend on on something + // that overwrites the memory location we *can* potentially optimize it. + // + // Find out what memory location the dependent instruction stores. + Instruction *DepWrite = InstDep.getInst(); + MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); + // If we didn't get a useful location, or if it isn't a size, bail out. + if (!DepLoc.Ptr) + break; + + // If we find a write that is a) removable (i.e., non-volatile), b) is + // completely obliterated by the store to 'Loc', and c) which we know that + // 'Inst' doesn't load from, then we can remove it. + if (isRemovable(DepWrite) && + !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = + isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD, *TLI); + ++NumFastStores; + MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst->getIterator(); + if (BBI != BB.begin()) + --BBI; + break; + } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) || + ((OR == OverwriteBegin && + isShortenableAtTheBeginning(DepWrite)))) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + MemIntrinsic *DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + bool IsOverwriteEnd = (OR == OverwriteEnd); + if (!IsOverwriteEnd) + InstWriteOffset = int64_t(InstWriteOffset + Loc.Size); + + if ((llvm::isPowerOf2_64(InstWriteOffset) && + DepWriteAlign <= InstWriteOffset) || + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " + << (IsOverwriteEnd ? "END" : "BEGIN") << ": " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " << DepLoc.Size << ")" + << *Inst << '\n'); + + int64_t NewLength = + IsOverwriteEnd + ? InstWriteOffset - DepWriteOffset + : DepLoc.Size - (InstWriteOffset - DepWriteOffset); + + Value *DepWriteLength = DepIntrinsic->getLength(); + Value *TrimmedLength = + ConstantInt::get(DepWriteLength->getType(), NewLength); + DepIntrinsic->setLength(TrimmedLength); + + if (!IsOverwriteEnd) { + int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset); + Value *Indices[1] = { + ConstantInt::get(DepWriteLength->getType(), OffsetMoved)}; + GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( + DepIntrinsic->getRawDest(), Indices, "", DepWrite); + DepIntrinsic->setDest(NewDestGEP); + } + MadeChange = true; + } + } + } + + // If this is a may-aliased store that is clobbering the store value, we + // can keep searching past it for another must-aliased pointer that stores + // to the same location. For example, in: + // store -> P + // store -> Q + // store -> P + // we can remove the first store to P even though we don't know if P and Q + // alias. + if (DepWrite == &BB.front()) break; + + // Can't look past this instruction if it might read 'Loc'. + if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) + break; + + InstDep = MD->getPointerDependencyFrom(Loc, false, + DepWrite->getIterator(), &BB); + } } - // Remove objects that could alias LoadedLoc. - DeadStackObjects.remove_if([&](Value *I) { - // See if the loaded location could alias the stack location. - MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI)); - return !AA->isNoAlias(StackLoc, LoadedLoc); - }); + // If this block ends in a return, unwind, or unreachable, all allocas are + // dead at its end, which means stores to them are also dead. + if (BB.getTerminator()->getNumSuccessors() == 0) + MadeChange |= handleEndBlock(BB, AA, MD, TLI); + + return MadeChange; +} + +static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, + MemoryDependenceResults *MD, DominatorTree *DT, + const TargetLibraryInfo *TLI) { + bool MadeChange = false; + for (BasicBlock &BB : F) + // Only check non-dead blocks. Dead blocks may have strange pointer + // cycles that will confuse alias analysis. + if (DT->isReachableFromEntry(&BB)) + MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// DSE Pass +//===----------------------------------------------------------------------===// +PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { + AliasAnalysis *AA = &AM.getResult(F); + DominatorTree *DT = &AM.getResult(F); + MemoryDependenceResults *MD = &AM.getResult(F); + const TargetLibraryInfo *TLI = &AM.getResult(F); + + return eliminateDeadStores(F, AA, MD, DT, TLI) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +/// A legacy pass for the legacy pass manager that wraps \c DSEPass. +class DSELegacyPass : public FunctionPass { +public: + DSELegacyPass() : FunctionPass(ID) { + initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + DominatorTree *DT = &getAnalysis().getDomTree(); + AliasAnalysis *AA = &getAnalysis().getAAResults(); + MemoryDependenceResults *MD = + &getAnalysis().getMemDep(); + const TargetLibraryInfo *TLI = + &getAnalysis().getTLI(); + + return eliminateDeadStores(F, AA, MD, DT, TLI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + + static char ID; // Pass identification, replacement for typeid +}; + +char DSELegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, + false) + +FunctionPass *llvm::createDeadStoreEliminationPass() { + return new DSELegacyPass(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -40,7 +40,7 @@ initializeDCELegacyPassPass(Registry); initializeDeadInstEliminationPass(Registry); initializeScalarizerPass(Registry); - initializeDSEPass(Registry); + initializeDSELegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); Index: test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- test/Transforms/DeadStoreElimination/simple.ll +++ test/Transforms/DeadStoreElimination/simple.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -basicaa -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind