Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -18,6 +18,7 @@ #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -29,6 +30,9 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" @@ -85,6 +89,15 @@ cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); +static cl::opt + EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Use the new MemorySSA-backed DSE.")); + +static cl::opt MemorySSAMemoryAccessScanLimit( + "dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, + cl::desc("The number of memory instructions to scan dead store elimination " + "(default = 100)")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1322,22 +1335,607 @@ return MadeChange; } +//============================================================================= +// MemorySSA form of Dead store elimination. +// +// This uses MemorySSA to perform dead store elimination. MemorySSA will give +// us a graph of MemoryAccesses which will either be Defs (stores), Uses (loads) +// or Phis. The Uses will not be noalias of the Def they are a use of (unless +// we hit the memoryssa optimistaion limit). Unfortately we cannot say the same +// things of stores. So we walk forward from one store to the next, looking +// until we hit a store (or collection of stores) that cause the original to be +// dead. +// +// Because we are using MemorySSA we can also look across blocks. Anywhere where +// there are multiple possible successors we treat as a break. Otherwise, so +// long as the later store post-dominated the earlier one, the earlier one is +// dead and can be removed. +// +// There are numerous other things we need to handle along the way. +// - Stores that hit the end of the function can be removed if they are not +// used (ie if they are allocas or alloca-like object that dont escape) +// - Likewise for stores that hit a free or lifetime_end. +// - Noop stores are those that load and then store the same value. +// - Multiple stores together may overwrite an earlier store. These are +// handled with InstOverlapIntervals inside isOverwrite and +// removePartiallyOverlappedStores. +// - Constant stores may be merged into their predecessor if the modify a +// sub-potion of the earlier store. This is called +// PartialEarlierWithFullLater and works in reverse to normal dse - the +// later store is removed, with it's constant value merged into the earlier +// store. A few extra checks are needed to ensure this remains valid. +// - Any instruction that may throw usually acts a dse barrier (So long as the +// memory is non-escaping) +// - We have to be careful about stores that may also be self-reads. +// - Atomics can be optimised (especially unordered), but are often not worth +// it. We treat fences like maythrows so that the block dse in the same +// manner. +// +// The above MemDep based methods will eventually be removed (if they are unused +// by those below). + +struct NextResult { + enum WalkType { Bad, Next, End }; + WalkType type; + MemoryAccess *next; +}; + +// Find the next MemorySSA use from Current, weaning out things that dont alias. +NextResult getNextMemoryAccess(MemoryAccess *Current, MemoryLocation SILoc, + AliasAnalysis &AA, bool IsStart) { + NextResult Next = {NextResult::End, nullptr}; + + // Scan the uses to see if we have a single interesting Next MemoryAccess. + for (Use &U : Current->uses()) { + MemoryAccess *Use = cast(U.getUser()); + if (auto MA = dyn_cast(Use)) { + // Any MemoryUse's that may alias is a break. From the first node + // (SI == current) this will always be true. + if (IsStart || isRefSet(AA.getModRefInfo(MA->getMemoryInst(), SILoc))) + return {NextResult::Bad, nullptr}; + } else if (isa(Use) || isa(Use)) { + // If we see two different nodes, we are at a split point + if (Next.next && Next.next != Use) + return {NextResult::Bad, nullptr}; + Next = {NextResult::Next, Use}; + } else + llvm_unreachable("Unexpected MemoryAccess type"); + } + + return Next; +} + +// Delete dead memory defs +void deleteDeadInstruction(Instruction *SI, InstOverlapIntervalsTy &IOL, + MemorySSA &MSSA, const TargetLibraryInfo &TLI) { + MemorySSAUpdater updater(&MSSA); + SmallVector NowDeadInsts; + NowDeadInsts.push_back(SI); + --NumFastOther; + + while (!NowDeadInsts.empty()) { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // Remove the Instruction from MSSA and IOL + if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) + updater.removeMemoryAccess(MA); + IOL.erase(DeadInst); + + // Remove it's operands + for (Use &O : DeadInst->operands()) + if (Instruction *OpI = dyn_cast(O)) { + O = nullptr; + if (isInstructionTriviallyDead(OpI, &TLI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + } +} + +// Merge constant stores into predecessors +void mergePartialMemoryDefs(StoreInst *Earlier, StoreInst *Later, + InstOverlapIntervalsTy &IOL, const DataLayout &DL, + MemorySSA &MSSA, const TargetLibraryInfo &TLI) { + // If the store we find is: + // a) partially overwritten by the store to 'Loc' + // b) the later store is fully contained in the earlier one and + // c) they both have a constant value + // Merge the two stores, replacing the earlier store's value with a + // merge of both values. + // TODO: Deal with other constant types (vectors, etc), and probably + // some mem intrinsics (if needed) + + APInt EarlierValue = + cast(Earlier->getValueOperand())->getValue(); + APInt LaterValue = cast(Later->getValueOperand())->getValue(); + unsigned LaterBits = LaterValue.getBitWidth(); + assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); + LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); + + const Value *P1 = Earlier->getPointerOperand()->stripPointerCasts(); + const Value *P2 = Later->getPointerOperand()->stripPointerCasts(); + int64_t InstWriteOffset, DepWriteOffset; + GetPointerBaseWithConstantOffset(P1, DepWriteOffset, DL); + GetPointerBaseWithConstantOffset(P2, InstWriteOffset, DL); + + // Offset of the smaller store inside the larger store + unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; + unsigned LShiftAmount = + DL.isBigEndian() ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits + : BitOffsetDiff; + APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, + LShiftAmount + LaterBits); + // Clear the bits we'll be replacing, then OR with the smaller + // store, shifted appropriately. + APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); + LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier << "\n Later: " + << *Later << "\n Merged Value: " << Merged << '\n'); + + Earlier->setOperand( + 0, ConstantInt::get(Earlier->getValueOperand()->getType(), Merged)); + deleteDeadInstruction(Later, IOL, MSSA, TLI); + IOL.erase(Earlier); + ++NumModifiedStores; +} + +// FIXME: Make this use MSSA +bool eliminateNoopStore(MemoryDef &MD, InstOverlapIntervalsTy &IOL, + const DataLayout &DL, AliasAnalysis &AA, + MemorySSA &MSSA, const TargetLibraryInfo &TLI) { + // Must be a store instruction. + StoreInst *SI = dyn_cast(MD.getMemoryInst()); + if (!SI) + return false; + + // If we're storing the same value back to a pointer that we just loaded from, + // then the store can be removed. + if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, &AA)) { + + LLVM_DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " + << *DepLoad << "\n STORE: " << *SI << '\n'); + + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumRedundantStores; + return true; + } + } + + // Remove null stores into the calloc'ed objects + Constant *StoredConstant = dyn_cast(SI->getValueOperand()); + if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { + Instruction *UnderlyingPointer = + dyn_cast(GetUnderlyingObject(SI->getPointerOperand(), DL)); + + if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, &TLI) && + memoryIsNotModifiedBetween(UnderlyingPointer, SI, &AA)) { + LLVM_DEBUG( + dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " + << *SI << "\n OBJECT: " << *UnderlyingPointer << '\n'); + + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumRedundantStores; + return true; + } + } + return false; +} + +// We treat Fences as if they are mayThrow instructions, to get the same +// behaviour. +bool isThrowLikeInstruction(Instruction *I) { + return isa(I) || I->mayThrow(); +} + +// Check for any extra throws between SI and NI that block DSE. This only +// checks extra maythrows (those that arn't MemoryDef's). MemoryDef that may +// throw are handled during the walk from one def to the next. +bool mayThrowBetween(Instruction *SI, Instruction *NI, const Value *SILocUnd, + SmallSetVector &NonEscapingStackObjects, + DenseMap &InstructionPostOrders, + SmallVector, 4> + &ExtraMayThrowInstructions, + DominatorTree &DT) { + // First see if we can ignore it by using the fact that SI is an alloca/alloca + // like object that doesn't escape. + if (NonEscapingStackObjects.count(SILocUnd)) + return false; + + // We know that SI dominates NI and that NI post dominates SI. For one of the + // potential maythrows to remain interesting it needs to have a PO between + // SIPO and NIPO, and dominate SI. + assert(InstructionPostOrders.count(SI)); + assert(InstructionPostOrders.count(NI)); + unsigned SIPO = InstructionPostOrders[SI]; + unsigned NIPO = InstructionPostOrders[NI]; + assert(std::is_sorted(ExtraMayThrowInstructions.begin(), + ExtraMayThrowInstructions.end())); + assert(SIPO > NIPO); + auto lit = std::upper_bound( + ExtraMayThrowInstructions.begin(), ExtraMayThrowInstructions.end(), + std::pair(NIPO + 1, nullptr)); + for (auto it = lit; it != ExtraMayThrowInstructions.end() && it->first < SIPO; + it++) + if (DT.dominates(SI->getParent(), it->second->getParent())) + return true; + return false; +} + +// Look for frees or lifetime_ends that mark the end of the lifetime of the +// object. +static bool +handleFree(Instruction *SI, const Value *SILocUnd, Instruction *NI, + MemoryLocation &NILoc, + SmallSetVector &NonEscapingStackObjects, + DenseMap &InstructionPostOrders, + SmallVector, 4> + &ExtraMayThrowInstructions, + InstOverlapIntervalsTy &IOL, AliasAnalysis &AA, MemorySSA &MSSA, + PostDominatorTree &PDT, DominatorTree &DT, const DataLayout &DL, + const TargetLibraryInfo &TLI) { + const Value *NIUndPtr = nullptr; + if (isFreeCall(NI, &TLI)) { + NIUndPtr = GetUnderlyingObject(cast(NI)->getArgOperand(0), DL); + } else if (auto *In = dyn_cast(NI)) { + if (In->getIntrinsicID() == Intrinsic::lifetime_end) + NIUndPtr = GetUnderlyingObject(NILoc.Ptr, DL); + } + + if (!NIUndPtr) + return false; + + // If we have hit the lifetime end, we can deleted the store if it postdoms + // and there are not throws between + if (!AA.isMustAlias(SILocUnd, NIUndPtr) || + !PDT.dominates(NI->getParent(), SI->getParent()) || + mayThrowBetween(SI, NI, SILocUnd, NonEscapingStackObjects, + InstructionPostOrders, ExtraMayThrowInstructions, DT)) + return false; + + LLVM_DEBUG(dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " << *SI + << '\n'); + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumFastStores; + return true; +} + +// Remove dead stores to stack-allocated locations that reach the end of the +// function. Ex: %A = alloca i32 +// ... +// store i32 1, i32* %A +// ret void +static bool +handleEnd(Instruction *SI, + SmallSetVector &NonEscapingStackObjects, + InstOverlapIntervalsTy &IOL, MemorySSA &MSSA, const DataLayout &DL, + const TargetLibraryInfo &TLI) { + // See through pointer-to-pointer bitcasts + SmallVector Pointers; + GetUnderlyingObjects(getStoredPointerOperand(SI), Pointers, DL); + + // Stores to stack values are valid candidates for removal (so long as they + // are not returned). + for (Value *Pointer : Pointers) + if (!NonEscapingStackObjects.count(Pointer) || + (isAllocLikeFn(Pointer, &TLI) && + PointerMayBeCaptured(Pointer, true, true))) + return false; + + LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *SI + << "\n Objects: "; + for (SmallVectorImpl::iterator I = Pointers.begin(), + E = Pointers.end(); + I != E; ++I) { + dbgs() << **I; + if (std::next(I) != E) + dbgs() << ", "; + } + dbgs() << '\n'); + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumFastStores; + return true; +} + +// Check for anything that act as a DSE barrier. +// Such as atomics stronger that monotonic. +// Anything that looks like a self read. +// And if NI may throw. +static bool +isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, const Value *SILocUnd, + Instruction *NI, MemoryLocation &NILoc, ModRefInfo &MaxModRefInfo, + std::vector &MayModStores, + SmallSetVector &NonEscapingStackObjects, + AliasAnalysis &AA, const TargetLibraryInfo &TLI) { + // If NI may throw it acts as a barrier, unless we are to an alloca/alloca + // like object that does not escape. + if (isThrowLikeInstruction(NI) && !NonEscapingStackObjects.count(SILocUnd)) + return true; + + // Here we mostly only care about if SI -> NI refs, not if it mods. So we + // make a quick optimisation of treating any mayReadFromMemory as noref. The + // exception where we do care about refs is PartialEarlierWithFullLater, which + // needs to know if a store modified the loc between the two. So we save + // stores and defer the mod check to later if needed. + ModRefInfo NIModRefInfo = ModRefInfo::NoModRef; + if (NI->mayReadFromMemory()) + NIModRefInfo = AA.getModRefInfo(NI, SILoc); + else + MayModStores.push_back(NI); + + if (NI->isAtomic()) { + if (auto *NSI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NSI->getOrdering())) + return true; + } else if (auto *NLI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NLI->getOrdering()) || + !AA.isNoAlias(MemoryLocation::get(NLI), SILoc)) + return true; + } else if (isa(NI)) + // We can remove the dead stores, irrespective of the fence and its + // ordering (release/acquire/seq_cst). Fences only constraints the + // ordering of already visible stores, it does not make a store visible to + // other threads. So, skipping over a fence does not change a store from + // being dead. We treat fences as maythrows, to be conservative so that + // they block optimisations in the same way + return false; + else + return true; + } else if (isRefSet(NIModRefInfo)) + return true; + + if (hasAnalyzableMemoryWrite(NI, TLI) && isPossibleSelfRead(NI, NILoc, SI, TLI, AA)) + return true; + + MaxModRefInfo = unionModRef(MaxModRefInfo, NIModRefInfo); + return false; +} + +// TODOD Combine into a single getLocForWrite version above +static MemoryLocation getLocForWriteEx(Instruction *SI, AliasAnalysis &AA, + const TargetLibraryInfo &TLI) { + MemoryLocation Loc = getLocForWrite(SI); + if (Loc.Ptr) + return Loc; + + if (hasAnalyzableMemoryWrite(SI, TLI)) { + CallSite CS(SI); + // All the supported functions so far happen to have dest as their first + // argument. + return MemoryLocation(CS.getArgument(0)); + } + return MemoryLocation(); +} + +static bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, + MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + + // All MemoryDef stores + SmallVector Stores; + // Any that should be skipped as they are already deleted + SmallPtrSet SkipStores; + // A map of interval maps representing partially-overwritten value parts + InstOverlapIntervalsTy IOL; + // Post-orders for each interesting instruction, basic block and throw + // These count up in post order, so lower numbers are later. Used to + // detect loop latches and throws between memdefs. + DenseMap InstructionPostOrders; + // Extra maythrow instructions are any that are not memdefs (those we will + // hit during the walk naturally). We keep the PO and the instruction to + // check if there are throws in a given range. + SmallVector, 4> ExtraMayThrowInstructions; + // Keep track of all of the stack objects that are dead at the end of the + // function. They may be returned. + SmallSetVector NonEscapingStackObjects; + + // Get all memory writes and post orders of anything interesting + unsigned PO = 0; + for (BasicBlock *BB : post_order(&F)) { + for (Instruction &I : reverse(*BB)) { + bool MayThrow = isThrowLikeInstruction(&I); + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + + if (MayThrow || MD) { + unsigned IPO = PO++; + InstructionPostOrders[&I] = IPO; + + if (!MD) + ExtraMayThrowInstructions.push_back({IPO, &I}); + if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + Stores.push_back(MD); + } + + // Track alloca and alloca-like objects + if (isa(&I)) + NonEscapingStackObjects.insert(&I); + // Okay, so these are dead heap objects, but if the pointer never escapes + // then it's leaked by this function anyways. + else if (isAllocLikeFn(&I, &TLI) && + !PointerMayBeCaptured(&I, false, true)) + NonEscapingStackObjects.insert(&I); + } + + InstructionPostOrders[BB] = PO++; + } + // Treat byval or inalloca arguments the same as Allocas, stores to them are + // dead at the end of the function. + for (Argument &AI : F.args()) + if (AI.hasByValOrInAllocaAttr()) + NonEscapingStackObjects.insert(&AI); + + // For each store: + for (MemoryDef *SIMD : Stores) { + if (SkipStores.count(SIMD)) + continue; + Instruction *SI = SIMD->getMemoryInst(); + MemoryLocation SILoc = getLocForWriteEx(SI, AA, TLI); + assert(SILoc.Ptr && "SILoc should not be null"); + const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); + + // Check for noop stores that store what was read to the same address + if (eliminateNoopStore(*SIMD, IOL, DL, AA, MSSA, TLI)) { + MadeChange = true; + continue; + } + + // Limit on the number of memory instruction we can look at + int InstructionLimit = MemorySSAMemoryAccessScanLimit; + // Used by PartialEarlierWithFullLater to ensure that we were free of + // anything that could modref (excluding the current instruction) + ModRefInfo MaxModRefInfo = ModRefInfo::NoModRef; + // MayModStores + std::vector MayModStores; + // Prev node in case we hit a loop latch + MemoryAccess *Prev = SIMD; + + // Walk forward for stores that kill this + for (NextResult Next = getNextMemoryAccess(SIMD, SILoc, AA, true); + Next.type != NextResult::Bad && --InstructionLimit != 0; + Prev = Next.next, + Next = getNextMemoryAccess(Next.next, SILoc, AA, false)) { + + // We have hit the end of the walk. If the store is non-escaping, it is + // dead and we can kill it. + if (Next.type == NextResult::End) { + if (handleEnd(SI, NonEscapingStackObjects, IOL, MSSA, DL, TLI)) + MadeChange = true; + break; + } + + if (auto ND = dyn_cast(Next.next)) { + Instruction *NI = ND->getMemoryInst(); + MemoryLocation NILoc = getLocForWriteEx(NI, AA, TLI); + + // If we hit a free or lifetime end, we can kill the store + if (handleFree(SI, SILocUnd, NI, NILoc, NonEscapingStackObjects, + InstructionPostOrders, ExtraMayThrowInstructions, IOL, + AA, MSSA, PDT, DT, DL, TLI)) { + MadeChange = true; + break; + } + + // Check for anything that looks like it will be a barrier to further + // removal + ModRefInfo MaxModRefInfoPrev = MaxModRefInfo; + if (isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc, MaxModRefInfo, + MayModStores, NonEscapingStackObjects, AA, TLI)) + break; + + // We must post-dom the instructions to safely remove it + if (!PDT.dominates(ND->getBlock(), SIMD->getBlock())) + continue; + + // Before we try to remove anything, check for any extra throwing + // instructions that block us from DSEing + if (mayThrowBetween(SI, NI, SILocUnd, NonEscapingStackObjects, + InstructionPostOrders, ExtraMayThrowInstructions, + DT)) + break; + + // Get what type of overwrite this might be + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(NILoc, SILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, SI, IOL, AA, &F); + + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI + << "\n KILLER: " << *NI << '\n'); + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumFastStores; + MadeChange = true; + break; + } else if (OR == OW_PartialEarlierWithFullLater) { + // We currently merge values between stores of constants + auto *Earlier = dyn_cast(SI); + auto *Later = dyn_cast(NI); + if (!Earlier || !isa(Earlier->getValueOperand()) || + !Later || !isa(Later->getValueOperand())) + continue; + + // There are a few extra checks if it is a + // PartialEarlierWithFullLater, as the removal happens in the opposite + // order to normal (NI is removed, not SI) + if (!isRemovable(NI) || ND->getBlock() != SIMD->getBlock() || + isModOrRefSet(MaxModRefInfoPrev)) + continue; + + // And one last check of stores between the two instructions that + // we defered the check of, to ensure they do not mod. + if (std::any_of(MayModStores.begin(), MayModStores.end(), + [&](Instruction *I) { + return I != NI && + isModSet(AA.getModRefInfo(I, SILoc)); + })) + continue; + + // Merge the two stores + mergePartialMemoryDefs(Earlier, Later, IOL, DL, MSSA, TLI); + // This removes ND, so we start over from the last entry, ensuring we + // won't look at ND. We dont reset the InstructionLimit. + Next = {NextResult::Next, Prev}; + SkipStores.insert(ND); + // Remove the effects of NI from MaxModRefInfo/MayModStores + MaxModRefInfo = MaxModRefInfoPrev; + assert(!MayModStores.empty() && MayModStores.back() == NI); + MayModStores.pop_back(); + MadeChange = true; + } + } else if (auto NP = dyn_cast(Next.next)) { + // Detect loops by checking if the PO instr numbers are earlier + Value *PrevValue = nullptr; + if (auto SMD = dyn_cast(Prev)) + PrevValue = SMD->getMemoryInst(); + else if (auto SMP = dyn_cast(Prev)) + PrevValue = SMP->getBlock(); + assert(InstructionPostOrders.count(NP->getBlock())); + assert(InstructionPostOrders.count(PrevValue)); + if (InstructionPostOrders[NP->getBlock()] > + InstructionPostOrders[PrevValue]) + break; + } + } + } + + if (EnablePartialOverwriteTracking) + MadeChange |= removePartiallyOverlappedStores(&AA, DL, IOL); + + return MadeChange; +} + //===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { - AliasAnalysis *AA = &AM.getResult(F); - DominatorTree *DT = &AM.getResult(F); - MemoryDependenceResults *MD = &AM.getResult(F); - const TargetLibraryInfo *TLI = &AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + const TargetLibraryInfo &TLI = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + + if (EnableMemorySSA) { + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + PostDominatorTree &PDT = AM.getResult(F); - if (!eliminateDeadStores(F, AA, MD, DT, TLI)) - return PreservedAnalyses::all(); + if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI)) + return PreservedAnalyses::all(); + } else { + MemoryDependenceResults &MD = AM.getResult(F); + + if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI)) + return PreservedAnalyses::all(); + } PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); + if (EnableMemorySSA) + PA.preserve(); + else + PA.preserve(); return PA; } @@ -1356,25 +1954,42 @@ if (skipFunction(F)) return false; - DominatorTree *DT = &getAnalysis().getDomTree(); - AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + AliasAnalysis &AA = getAnalysis().getAAResults(); + DominatorTree &DT = getAnalysis().getDomTree(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(); + + if (EnableMemorySSA) { + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); - return eliminateDeadStores(F, AA, MD, DT, TLI); + return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + } else { + MemoryDependenceResults &MD = + getAnalysis().getMemDep(); + + return eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + + if (EnableMemorySSA) { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addPreserved(); + } } }; @@ -1385,8 +2000,10 @@ INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, Index: llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll +++ llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll @@ -40,7 +40,6 @@ %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above Index: llvm/test/Transforms/DeadStoreElimination/merge-stores.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/merge-stores.ll +++ llvm/test/Transforms/DeadStoreElimination/merge-stores.ll @@ -39,7 +39,6 @@ %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above Index: llvm/test/Transforms/DeadStoreElimination/multiblock.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/DeadStoreElimination/multiblock.ll @@ -0,0 +1,775 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +declare void @unknown_func() +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare noalias i8* @malloc(i32) +declare void @free(i8* nocapture) + + +define void @test1(i32* noalias %P) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + %DEAD = load i32, i32* %P + %DEAD2 = add i32 %DEAD, 1 + store i32 %DEAD2, i32* %P + br label %bb1 +bb1: + store i32 0, i32* %P + ret void +} + + +define void @test2(i32* noalias %P) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test3(i32* noalias %P) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 0, i32* %P + br label %bb3 +bb3: + ret void +} + + +define void @test4(i32* noalias %P) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %x = load i32, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test5(i32* noalias %P) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 1, i32* %P + br label %bb3 +bb2: + store i32 1, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test6(i32* noalias %P) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test7(i32* noalias %P, i32* noalias %Q) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[Q:%.*]] +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 1, i32* %Q + br i1 true, label %bb1, label %bb2 +bb1: + load i32, i32* %P + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %Q + store i32 0, i32* %P + ret void +} + + +define void @test8(i32* noalias %P) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 1, i32* %P + br label %bb3 +bb2: + store i32 1, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test9(i32* noalias %P) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ret void +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test10(i32* noalias %P) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i8 1, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i8 1, i8* %P2 + ret void +} + + +define void @test11(i32* noalias %P) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i8 1, i8* %P2 + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test12(i32* noalias %P) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: store i8 0, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ret void +bb3: + store i8 0, i8* %P2 + ret void +} + + +define void @test13(i32* noalias %P) { +; CHECK-LABEL: @test13( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br label %for +for: + store i32 0, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test14(i32* noalias %P) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + store i32 1, i32* %P + br label %for +for: + store i32 0, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test15(i32* noalias %P) { +; CHECK-LABEL: @test15( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i8 2, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + br i1 true, label %bb1, label %bb3 +bb1: + store i32 1, i32* %P + br label %bb3 +bb3: + store i8 2, i8* %P2 + ret void +} + + +define void @test16(i32* noalias %P) { +; CHECK-LABEL: @test16( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + ret void +} + + +define void @test17(i32* noalias %P) { +; CHECK-LABEL: @test17( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + call void @unknown_func() + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + ret void +} + + +define void @test18(i32* noalias %P) { +; CHECK-LABEL: @test18( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i8 1, i8* [[P2]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: store i8 2, i8* [[P2]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br label %for +for: + store i8 1, i8* %P2 + %x = load i32, i32* %P + store i8 2, i8* %P2 + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test19(i32* noalias %P) { +; CHECK-LABEL: @test19( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + br label %bb3 +bb3: + ret void +} + + +define void @test20(i32* noalias %P) { +; CHECK-LABEL: @test20( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + ret void +} + + +define void @test21(i32* noalias %P) { +; CHECK-LABEL: @test21( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br label %for +for: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + br i1 false, label %for, label %end +end: + ret void +} + + +define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { +; CHECK-LABEL: @test22( +; CHECK-NEXT: store i32 2, i32* [[P:%.*]] +; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] +; CHECK-NEXT: ret i32 [[L]] +; + store i32 1, i32* %Q + store i32 2, i32* %P + store i32 3, i32* %Q + %l = load i32, i32* %R + ret i32 %l +} + + +define void @test23(i32* noalias %P) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 0, i32* %P + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test24(i32* noalias %P) { +; CHECK-LABEL: @test24( +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb2, label %bb1 +bb1: + store i32 0, i32* %P + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test25(i32* noalias %P) { +; CHECK-LABEL: @test25( +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: ret void +; + store i32 1, i32* %P + %P2 = bitcast i32* %P to i8* + br i1 true, label %bb2, label %bb1 +bb1: + br label %bb3 +bb2: + call void @free(i8* %P2) + ret void +bb3: + ret void +} + + +define i8* @test26() { +; CHECK-LABEL: @test26( +; CHECK-NEXT: bb1: +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i32 10) +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ] +; CHECK-NEXT: ret i8* [[R]] +; +bb1: + br i1 true, label %bb2, label %bb3 +bb2: + %m = call noalias i8* @malloc(i32 10) + store i8 1, i8* %m + br label %bb3 +bb3: + %r = phi i8* [ null, %bb1 ], [ %m, %bb2 ] + ret i8* %r +} + + +define void @test27() { +; CHECK-LABEL: @test27( +; CHECK-NEXT: bb1: +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i32 10) +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ] +; CHECK-NEXT: ret void +; +bb1: + br i1 true, label %bb2, label %bb3 +bb2: + %m = call noalias i8* @malloc(i32 10) + store i8 1, i8* %m + br label %bb3 +bb3: + %r = phi i8* [ null, %bb1 ], [ %m, %bb2 ] + ret void +} + + +define i8* @test28() { +; CHECK-LABEL: @test28( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i32 10) +; CHECK-NEXT: [[MC0:%.*]] = bitcast i8* [[M]] to i8* +; CHECK-NEXT: [[MC1:%.*]] = bitcast i8* [[MC0]] to i8* +; CHECK-NEXT: [[MC2:%.*]] = bitcast i8* [[MC1]] to i8* +; CHECK-NEXT: [[MC3:%.*]] = bitcast i8* [[MC2]] to i8* +; CHECK-NEXT: [[MC4:%.*]] = bitcast i8* [[MC3]] to i8* +; CHECK-NEXT: [[MC5:%.*]] = bitcast i8* [[MC4]] to i8* +; CHECK-NEXT: [[MC6:%.*]] = bitcast i8* [[MC5]] to i8* +; CHECK-NEXT: [[M0:%.*]] = bitcast i8* [[MC6]] to i8* +; CHECK-NEXT: store i8 2, i8* [[M]] +; CHECK-NEXT: ret i8* [[M0]] +; +bb0: + %m = call noalias i8* @malloc(i32 10) + %mc0 = bitcast i8* %m to i8* + %mc1 = bitcast i8* %mc0 to i8* + %mc2 = bitcast i8* %mc1 to i8* + %mc3 = bitcast i8* %mc2 to i8* + %mc4 = bitcast i8* %mc3 to i8* + %mc5 = bitcast i8* %mc4 to i8* + %mc6 = bitcast i8* %mc5 to i8* + %m0 = bitcast i8* %mc6 to i8* + store i8 2, i8* %m + ret i8* %m0 +} + + +define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %x, i32* noalias nocapture %b) local_unnamed_addr { +; CHECK-LABEL: @test_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY4_LR_PH_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body4.lr.ph.preheader: +; CHECK-NEXT: br label [[FOR_BODY4_LR_PH:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body4.lr.ph: +; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] +; CHECK-NEXT: br label [[FOR_BODY4:%.*]] +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: store i32 [[ADD9:%.*]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC11]] = add nuw nsw i32 [[I_028]], 1 +; CHECK-NEXT: [[EXITCOND29:%.*]] = icmp eq i32 [[INC11]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND29]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_LR_PH]] +; CHECK: for.body4: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[ADD9]], [[FOR_BODY4]] ] +; CHECK-NEXT: [[J_026:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY4]] ] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_026]], [[MUL]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[J_026]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[ADD9]] = add nsw i32 [[MUL7]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_026]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]] +; +entry: + %cmp27 = icmp sgt i32 %N, 0 + br i1 %cmp27, label %for.body4.lr.ph.preheader, label %for.cond.cleanup + +for.body4.lr.ph.preheader: ; preds = %entry + br label %for.body4.lr.ph + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void + +for.body4.lr.ph: ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3 + %i.028 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.028 + store i32 0, i32* %arrayidx, align 4 + %mul = mul nsw i32 %i.028, %N + br label %for.body4 + +for.cond.cleanup3: ; preds = %for.body4 + store i32 %add9, i32* %arrayidx, align 4 + %inc11 = add nuw nsw i32 %i.028, 1 + %exitcond29 = icmp eq i32 %inc11, %N + br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph + +for.body4: ; preds = %for.body4, %for.body4.lr.ph + %0 = phi i32 [ 0, %for.body4.lr.ph ], [ %add9, %for.body4 ] + %j.026 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ] + %add = add nsw i32 %j.026, %mul + %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add + %1 = load i32, i32* %arrayidx5, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %x, i32 %j.026 + %2 = load i32, i32* %arrayidx6, align 4 + %mul7 = mul nsw i32 %2, %1 + %add9 = add nsw i32 %mul7, %0 + %inc = add nuw nsw i32 %j.026, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 +} + Index: llvm/test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/simple.ll +++ llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -895,3 +895,111 @@ declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind +define void @test40(i32** noalias %Pp, i32* noalias %Q) { +; CHECK-LABEL: @test40( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AC:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[AC]]) +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PP:%.*]] to i8** +; CHECK-NEXT: [[PC:%.*]] = load i8*, i8** [[TMP0]], align 8 +; CHECK-NEXT: [[QC:%.*]] = bitcast i32* [[Q:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 [[AC]], i8* align 4 [[QC]], i64 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[PC]], i8* nonnull align 4 [[AC]], i64 4, i1 true) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[AC]]) +; CHECK-NEXT: ret void +; +entry: + %A = alloca i32, align 4 + %Ac = bitcast i32* %A to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %Ac) + %0 = bitcast i32** %Pp to i8** + %Pc = load i8*, i8** %0, align 8 + %Qc = bitcast i32* %Q to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %Ac, i8* align 4 %Qc, i64 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %Pc, i8* nonnull align 4 %Ac, i64 4, i1 true) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %Ac) + ret void +} + +; I think this case is currently handled incorrectly by memdeps dse +; throwing should leave store i32 1, not remove from the free. +declare void @free(i8* nocapture) +define void @test41(i32* noalias %P) { +; NOCHECK-LABEL: @test41( +; NOCHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; NOCHECK-NEXT: store i32 1, i32* [[P]] +; NOCHECK-NEXT: call void @unknown_func() +; NOCHECK-NEXT: call void @free(i8* [[P2]]) +; NOCHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + call void @unknown_func() + store i32 2, i32* %P + call void @free(i8* %P2) + ret void +} + +define void @test42(i32* %P, i32* %Q) { +; NOCHECK-LABEL: @test42( +; NOCHECK-NEXT: store i32 1, i32* [[P:%.*]] +; NOCHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; NOCHECK-NEXT: store i32 2, i32* [[Q:%.*]] +; NOCHECK-NEXT: store i8 3, i8* [[P2]] +; NOCHECK-NEXT: ret void +; + store i32 1, i32* %P + %P2 = bitcast i32* %P to i8* + store i32 2, i32* %Q + store i8 3, i8* %P2 + ret void +} + +define void @test42a(i32* %P, i32* %Q) { +; NOCHECK-LABEL: @test42a( +; NOCHECK-NEXT: store atomic i32 1, i32* [[P:%.*]] unordered, align 4 +; NOCHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; NOCHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 +; NOCHECK-NEXT: store atomic i8 3, i8* [[P2]] unordered, align 4 +; NOCHECK-NEXT: ret void +; + store atomic i32 1, i32* %P unordered, align 4 + %P2 = bitcast i32* %P to i8* + store atomic i32 2, i32* %Q unordered, align 4 + store atomic i8 3, i8* %P2 unordered, align 4 + ret void +} + +define void @test43(i32* %P, i32* noalias %Q) { +; CHECK-LABEL: @test43( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 50331649, i32* [[P:%.*]] +; CHECK-NEXT: store i32 2, i32* [[Q:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i32 1, i32* %P + %P2 = bitcast i32* %P to i8* + store i32 2, i32* %Q + store i8 3, i8* %P2 + ret void +} + +define void @test43a(i32* %P, i32* noalias %Q) { +; CHECK-LABEL: @test43a( +; CHECK-NEXT: entry: +; CHECK-NEXT: store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + store atomic i32 1, i32* %P unordered, align 4 + %P2 = bitcast i32* %P to i8* + store atomic i32 2, i32* %Q unordered, align 4 + store atomic i8 3, i8* %P2 unordered, align 4 + ret void +}