Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemorySSA.h" #include using namespace llvm; @@ -55,6 +56,9 @@ cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE")); +static cl::opt + UseMemorySSA("use-memoryssa-dse", cl::Hidden, cl::init(false), + cl::desc("Use MemorySSA for DeadStoreElimination")); //===----------------------------------------------------------------------===// // Helper functions @@ -670,9 +674,10 @@ /// ... /// store i32 1, i32* %A /// ret void -static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, - MemoryDependenceResults *MD, - const TargetLibraryInfo *TLI) { +static bool +handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, MemoryDependenceResults *MD, + const TargetLibraryInfo *TLI, + SmallPtrSetImpl *AccessesToDelete = nullptr) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the @@ -731,7 +736,11 @@ dbgs() << '\n'); // DCE instructions only used to calculate that store. - deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects); + if (UseMemorySSA) { + AccessesToDelete->insert(Dead); + DeadStackObjects.remove(Dead); + } else + deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -742,7 +751,11 @@ if (isInstructionTriviallyDead(&*BBI, TLI)) { DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " << *&*BBI << '\n'); - deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects); + if (UseMemorySSA) { + AccessesToDelete->insert(&*BBI); + DeadStackObjects.remove(&*BBI); + } else + deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -1038,20 +1051,372 @@ } //===----------------------------------------------------------------------===// +// MemorySSA implementation below. +// +// TODO: Delete instructions without invalidating the iterator. +// TODO: Optimize dead stores in dominating blocks in handleFreeMSSA. +//===----------------------------------------------------------------------===// + +static MemoryLocation getLocForReadMSSA(Instruction *Inst) { + if (auto *LI = dyn_cast(Inst)) + return MemoryLocation::get(LI); + if (auto *MTI = dyn_cast(Inst)) + return MemoryLocation::getForSource(MTI); + + // FIXME: Handle more instructions (e.g., vaarg, atomics). + return MemoryLocation(); +} + +/// Delete this instruction. Before we do, go through and zero out all the +/// operands of this instruction. If any of them become dead, delete them and +/// the computation tree that feeds them. +static void deleteDeadInstructionMSSA(Instruction *I, MemorySSA *MSSA, + const TargetLibraryInfo *TLI) { + SmallVector NowDeadInsts; + + NowDeadInsts.push_back(I); + --NumFastOther; + + do { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // Before we touch this instruction, remove it from MSSA! + if (MemoryAccess *MA = MSSA->getMemoryAccess(DeadInst)) + MSSA->removeMemoryAccess(MA); + + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { + Value *Op = DeadInst->getOperand(op); + DeadInst->setOperand(op, nullptr); + + // If this operand just became dead, add it to the NowDeadInsts list. + if (!Op->use_empty()) + continue; + + if (Instruction *OpI = dyn_cast(Op)) + if (isInstructionTriviallyDead(OpI, TLI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + } while (!NowDeadInsts.empty()); +} + +static bool eliminateNoopStoreMSSA(Instruction *Inst, AliasAnalysis *AA, + const DataLayout &DL, + const TargetLibraryInfo *TLI) { + // Must be a store instruction. + StoreInst *SI = dyn_cast(Inst); + if (!SI) + return false; + + // If we're storing the same value back to a pointer that we just loaded from, + // then the store can be removed. + if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) { + + DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " + << *DepLoad << "\n STORE: " << *SI << '\n'); + + ++NumRedundantStores; + return true; + } + } + + // Remove null stores into the calloc'ed objects + Constant *StoredConstant = dyn_cast(SI->getValueOperand()); + if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { + Instruction *UnderlyingPointer = + dyn_cast(GetUnderlyingObject(SI->getPointerOperand(), DL)); + + if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && + memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) { + DEBUG( + dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " + << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); + + ++NumRedundantStores; + return true; + } + } + return false; +} + +/// Handle frees of entire structures whose dependency is a store +/// to a field of that structure. +static bool handleFreeMSSA(CallInst *F, AliasAnalysis *AA, MemorySSA *MSSA, + const TargetLibraryInfo *TLI, + SmallPtrSetImpl *AccessesToDelete) { + bool MadeChange = false; + const DataLayout &DL = F->getModule()->getDataLayout(); + + MemoryAccess *FreeMA = MSSA->getMemoryAccess(F); + + // Iterate over all local memory accesses. + auto *BlockAccesses = MSSA->getBlockAccesses(F->getParent()); + assert(BlockAccesses && "Expected memory accesses in block."); + + MemorySSA::AccessList::const_reverse_iterator I(FreeMA->getIterator()); + for (auto E = BlockAccesses->rend(); I != E; I++) { + // If we have a use that may-aliases we must give up. + if (const MemoryUse *MU = dyn_cast(&*I)) { + Instruction *DepRead = MU->getMemoryInst(); + assert(DepRead && "Expected an associated inst with memory use."); + + MemoryLocation ReadLoc = getLocForReadMSSA(DepRead); + if (!ReadLoc.Ptr || + !AA->isNoAlias(MemoryLocation(F->getArgOperand(0)), ReadLoc)) + break; + continue; + } + + // Check to see if this def clobbers FirstWrite. + if (const MemoryDef *MD2 = dyn_cast(&*I)) { + Instruction *DepWrite = MD2->getMemoryInst(); + if (!DepWrite) + break; + + if (!hasMemoryWrite(DepWrite, *TLI) || !isRemovable(DepWrite)) + break; + + Value *DepPointer = + GetUnderlyingObject(getStoredPointerOperand(DepWrite), DL); + + if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) + break; + + DEBUG(dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " + << *DepWrite << '\n'); + + AccessesToDelete->insert(DepWrite); + ++NumFastStores; + MadeChange = true; + + // Inst's old Dependency is now deleted. Compute the next dependency, + // which may also be dead, as in + // s[0] = 0; + // s[1] = 0; // This has just been deleted. + // free(s); + continue; + } + } + // FIXME: Look at unconditional predecessors. + + return MadeChange; +} + +static bool eliminateDeadStores(BasicBlock *BB, AliasAnalysis *AA, + MemorySSA *MSSA, DominatorTree *DT, + const TargetLibraryInfo *TLI) { + bool MadeChange = false; + + // A map of interval maps representing partially-overwritten value parts. + InstOverlapIntervalsTy IOL; + + const MemorySSA::AccessList *BlockAccesses = MSSA->getBlockAccesses(BB); + if (!BlockAccesses) + return false; + + SmallPtrSet AccessesToDelete; + const DataLayout &DL = BB->getModule()->getDataLayout(); + + // Walk accesses in the block. + MemorySSA::AccessList::const_iterator I = BlockAccesses->begin(), + E = BlockAccesses->end(); + for (; I != E; ++I) { + const MemoryDef *MD = dyn_cast(&*I); + if (!MD) + continue; + + // Handle 'free' calls specially. + if (Instruction *FreeInst = MD->getMemoryInst()) { + if (CallInst *F = isFreeCall(FreeInst, TLI)) { + MadeChange |= handleFreeMSSA(F, AA, MSSA, TLI, &AccessesToDelete); + continue; + } + } + + // Check to see if Inst writes to memory. If not, continue. + Instruction *Inst = MD->getMemoryInst(); + if (!Inst || !hasMemoryWrite(Inst, *TLI)) + continue; + + if (eliminateNoopStoreMSSA(Inst, AA, DL, TLI)) { + MadeChange = true; + AccessesToDelete.insert(Inst); + continue; + } + + // Figure out what location is being stored to. If we don't get a useful + // location, fail. + MemoryLocation Loc = getLocForWrite(Inst, *AA); + if (!Loc.Ptr) + continue; + + // FIXME: Non-local DSE would be fun. :) + MemorySSA::AccessList::const_reverse_iterator RI(I->getIterator()); + for (auto RE = BlockAccesses->rend(); RI != RE; ++RI) { + // Get the memory clobbered by the instruction we depend on. If we end up + // depending on a may- or must-aliased load, then we can't optimize away + // the store and we bail out. However, if we depend on something that + // overwrites the memory location we *can* potentially optimize it. + // Quit when we hit the block's phi node. + if (isa(*RI)) + break; + + // If we have a use that may-aliases with Inst we must give up. + if (const MemoryUse *MU = dyn_cast(&*RI)) { + Instruction *DepRead = MU->getMemoryInst(); + assert(DepRead && "Expected an associated inst with memory use."); + + MemoryLocation ReadLoc = getLocForReadMSSA(DepRead); + if (!ReadLoc.Ptr || !AA->isNoAlias(Loc, ReadLoc)) + break; + + continue; + } + + // Find out what memory location the dependent instruction stores. + const MemoryDef *InstDep = cast(&*RI); + Instruction *DepWrite = InstDep->getMemoryInst(); + if (!DepWrite) + break; + + // If we find a write that is a) removable (i.e., non-volatile), b) is + // completely obliterated by the store to 'Loc', and c) which we know that + // 'Inst' doesn't load from, then we can remove it. + MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); + if (DepLoc.Ptr && isRemovable(DepWrite) && + !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, + InstWriteOffset, DepWrite, IOL); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite + << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + AccessesToDelete.insert(DepWrite); + ++NumFastStores; + MadeChange = true; + + // We erased DepWrite; start over. + continue; + } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) || + ((OR == OverwriteBegin && + isShortenableAtTheBeginning(DepWrite)))) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + MemIntrinsic *DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + bool IsOverwriteEnd = (OR == OverwriteEnd); + if (!IsOverwriteEnd) + InstWriteOffset = int64_t(InstWriteOffset + Loc.Size); + + if ((llvm::isPowerOf2_64(InstWriteOffset) && + DepWriteAlign <= InstWriteOffset) || + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW " + << (IsOverwriteEnd ? "END" : "BEGIN") << ": " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " << DepLoc.Size << ")" + << *Inst << '\n'); + + int64_t NewLength = + IsOverwriteEnd + ? InstWriteOffset - DepWriteOffset + : DepLoc.Size - (InstWriteOffset - DepWriteOffset); + + Value *DepWriteLength = DepIntrinsic->getLength(); + Value *TrimmedLength = + ConstantInt::get(DepWriteLength->getType(), NewLength); + DepIntrinsic->setLength(TrimmedLength); + + if (!IsOverwriteEnd) { + int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset); + Value *Indices[1] = { + ConstantInt::get(DepWriteLength->getType(), OffsetMoved)}; + GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds( + DepIntrinsic->getRawDest(), Indices, "", DepWrite); + DepIntrinsic->setDest(NewDestGEP); + } + MadeChange = true; + } + } + } + + // If this is a may-aliased store that is clobbering the store value, we + // can keep searching past it for another must-aliased pointer that stores + // to the same location. For example, in: + // store -> P + // store -> Q + // store -> P + // we can remove the first store to P even though we don't know if P and Q + // alias. + + // Can't look past this instruction if it might read 'Loc'. + if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) + break; + } + } + + // If this block ends in a return, unwind, or unreachable, all allocas are + // dead at its end, which means stores to them are also dead. + if (BB->getTerminator()->getNumSuccessors() == 0) + MadeChange |= handleEndBlock(*BB, AA, nullptr, TLI, &AccessesToDelete); + + for (auto DeadInst : AccessesToDelete) + deleteDeadInstructionMSSA(DeadInst, MSSA, TLI); + + return MadeChange; +} + +static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, MemorySSA *MSSA, + DominatorTree *DT, + const TargetLibraryInfo *TLI) { + bool MadeChange = false; + for (BasicBlock &BB : F) + // Only check non-dead blocks. Dead blocks may have strange pointer + // cycles that will confuse alias analysis. + if (DT->isReachableFromEntry(&BB)) + MadeChange |= eliminateDeadStores(&BB, AA, MSSA, DT, TLI); + + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// End of MemorySSA implementation. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { AliasAnalysis *AA = &AM.getResult(F); DominatorTree *DT = &AM.getResult(F); - MemoryDependenceResults *MD = &AM.getResult(F); const TargetLibraryInfo *TLI = &AM.getResult(F); - if (!eliminateDeadStores(F, AA, MD, DT, TLI)) - return PreservedAnalyses::all(); + if (UseMemorySSA) { + MemorySSA *MSSA = &AM.getResult(F); + if (!eliminateDeadStores(F, AA, MSSA, DT, TLI)) + return PreservedAnalyses::all(); + } else { + MemoryDependenceResults *MD = &AM.getResult(F); + if (!eliminateDeadStores(F, AA, MD, DT, TLI)) + return PreservedAnalyses::all(); + } + PreservedAnalyses PA; PA.preserve(); PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } @@ -1069,23 +1434,34 @@ DominatorTree *DT = &getAnalysis().getDomTree(); AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); const TargetLibraryInfo *TLI = &getAnalysis().getTLI(); - return eliminateDeadStores(F, AA, MD, DT, TLI); + if (UseMemorySSA) { + MemorySSA *MSSA = &getAnalysis().getMSSA(); + MSSA->verifyMemorySSA(); + return eliminateDeadStores(F, AA, MSSA, DT, TLI); + } else { + MemoryDependenceResults *MD = + &getAnalysis().getMemDep(); + return eliminateDeadStores(F, AA, MD, DT, TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + if (UseMemorySSA) { + AU.addRequired(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addPreserved(); + } } static char ID; // Pass identification, replacement for typeid @@ -1098,6 +1474,7 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, Index: test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- test/Transforms/DeadStoreElimination/simple.ll +++ test/Transforms/DeadStoreElimination/simple.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -basicaa -dse -S | FileCheck %s +; RUN: opt < %s -basicaa -memoryssa -dse -use-memoryssa-dse -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"