Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -6,11 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements a trivial dead store elimination that only considers -// basic-block local redundant stores. -// -// FIXME: This should eventually be extended to be a post-dominator tree -// traversal. Doing so would be pretty trivial. +// This file implements dead store elimination, which eliminates stores that +// are overwritten on all paths to a possible observation. // //===----------------------------------------------------------------------===// @@ -28,7 +25,6 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" @@ -105,10 +101,6 @@ cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); -static cl::opt - EnableMemorySSA("enable-dse-memoryssa", cl::init(true), cl::Hidden, - cl::desc("Use the new MemorySSA-backed DSE.")); - static cl::opt MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " @@ -153,69 +145,6 @@ using OverlapIntervalsTy = std::map; using InstOverlapIntervalsTy = DenseMap; -/// Delete this instruction. Before we do, go through and zero out all the -/// operands of this instruction. If any of them become dead, delete them and -/// the computation tree that feeds them. -/// If ValueSet is non-null, remove any deleted instructions from it as well. -static void -deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, - MemoryDependenceResults &MD, const TargetLibraryInfo &TLI, - InstOverlapIntervalsTy &IOL, - MapVector &ThrowableInst, - SmallSetVector *ValueSet = nullptr) { - SmallVector NowDeadInsts; - - NowDeadInsts.push_back(I); - --NumFastOther; - - // Keeping the iterator straight is a pain, so we let this routine tell the - // caller what the next instruction is after we're done mucking about. - BasicBlock::iterator NewIter = *BBI; - - // Before we touch this instruction, remove it from memdep! - do { - Instruction *DeadInst = NowDeadInsts.pop_back_val(); - // Mark the DeadInst as dead in the list of throwable instructions. - auto It = ThrowableInst.find(DeadInst); - if (It != ThrowableInst.end()) - ThrowableInst[It->first] = false; - ++NumFastOther; - - // Try to preserve debug information attached to the dead instruction. - salvageDebugInfo(*DeadInst); - salvageKnowledge(DeadInst); - - // This instruction is dead, zap it, in stages. Start by removing it from - // MemDep, which needs to know the operands and needs it to be in the - // function. - MD.removeInstruction(DeadInst); - - for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { - Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, nullptr); - - // If this operand just became dead, add it to the NowDeadInsts list. - if (!Op->use_empty()) continue; - - if (Instruction *OpI = dyn_cast(Op)) - if (isInstructionTriviallyDead(OpI, &TLI)) - NowDeadInsts.push_back(OpI); - } - - if (ValueSet) ValueSet->remove(DeadInst); - IOL.erase(DeadInst); - - if (NewIter == DeadInst->getIterator()) - NewIter = DeadInst->eraseFromParent(); - else - DeadInst->eraseFromParent(); - } while (!NowDeadInsts.empty()); - *BBI = NewIter; - // Pop dead entries from back of ThrowableInst till we find an alive entry. - while (!ThrowableInst.empty() && !ThrowableInst.back().second) - ThrowableInst.pop_back(); -} - /// Does this instruction write some memory? This only returns true for things /// that we can analyze with other helpers below. static bool hasAnalyzableMemoryWrite(Instruction *I, @@ -289,19 +218,6 @@ return MemoryLocation(); } -/// Return the location read by the specified "hasAnalyzableMemoryWrite" -/// instruction if any. -static MemoryLocation getLocForRead(Instruction *Inst, - const TargetLibraryInfo &TLI) { - assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); - - // The only instructions that both read and write are the mem transfer - // instructions (memcpy/memmove). - if (auto *MTI = dyn_cast(Inst)) - return MemoryLocation::getForSource(MTI); - return MemoryLocation(); -} - /// If the value of this instruction and the memory it writes to is unused, may /// we delete this instruction? static bool isRemovable(Instruction *I) { @@ -373,17 +289,6 @@ return isa(I); } -/// Return the pointer that is being written to. -static Value *getStoredPointerOperand(Instruction *I, - const TargetLibraryInfo &TLI) { - //TODO: factor this to reuse getLocForWrite - MemoryLocation Loc = getLocForWrite(I, TLI); - assert(Loc.Ptr && - "unable to find pointer written for analyzable instruction?"); - // TODO: most APIs don't expect const Value * - return const_cast(Loc.Ptr); -} - static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F) { @@ -442,12 +347,11 @@ /// \p Earlier, but they both write to the same underlying object. In that /// case, use isPartialOverwrite to check if \p Later partially overwrites /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined. -template static OverwriteResult isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, const MemoryLocation &Later, const MemoryLocation &Earlier, const DataLayout &DL, const TargetLibraryInfo &TLI, - int64_t &EarlierOff, int64_t &LaterOff, AATy &AA, + int64_t &EarlierOff, int64_t &LaterOff, BatchAAResults &AA, const Function *F) { // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). @@ -659,69 +563,14 @@ return OW_Unknown; } -/// If 'Inst' might be a self read (i.e. a noop copy of a -/// memory region into an identical pointer) then it doesn't actually make its -/// input dead in the traditional sense. Consider this case: -/// -/// memmove(A <- B) -/// memmove(A <- A) -/// -/// In this case, the second store to A does not make the first store to A dead. -/// The usual situation isn't an explicit A<-A store like this (which can be -/// trivially removed) but a case where two pointers may alias. -/// -/// This function detects when it is unsafe to remove a dependent instruction -/// because the DSE inducing instruction may be a self-read. -static bool isPossibleSelfRead(Instruction *Inst, - const MemoryLocation &InstStoreLoc, - Instruction *DepWrite, - const TargetLibraryInfo &TLI, - AliasAnalysis &AA) { - // Self reads can only happen for instructions that read memory. Get the - // location read. - MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); - if (!InstReadLoc.Ptr) - return false; // Not a reading instruction. - - // If the read and written loc obviously don't alias, it isn't a read. - if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) - return false; - - if (isa(Inst)) { - // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) - // but in practice memcpy(A <- B) either means that A and B are disjoint or - // are equal (i.e. there are not partial overlaps). Given that, if we have: - // - // memcpy/memmove(A <- B) // DepWrite - // memcpy(A <- B) // Inst - // - // with Inst reading/writing a >= size than DepWrite, we can reason as - // follows: - // - // - If A == B then both the copies are no-ops, so the DepWrite can be - // removed. - // - If A != B then A and B are disjoint locations in Inst. Since - // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. - // Therefore DepWrite can be removed. - MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); - - if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) - return false; - } - - // If DepWrite doesn't read memory or if we can't prove it is a must alias, - // then it can't be considered dead. - return true; -} - /// Returns true if the memory which is accessed by the second instruction is not /// modified between the first and the second instruction. /// Precondition: Second instruction must be dominated by the first /// instruction. -template static bool -memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, AATy &AA, - const DataLayout &DL, DominatorTree *DT) { +memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, + BatchAAResults &AA, const DataLayout &DL, + DominatorTree *DT) { // Do a backwards scan through the CFG from SecondI to FirstI. Look for // instructions which can modify the memory location accessed by SecondI. // @@ -801,272 +650,6 @@ return true; } -/// Find all blocks that will unconditionally lead to the block BB and append -/// them to F. -static void findUnconditionalPreds(SmallVectorImpl &Blocks, - BasicBlock *BB, DominatorTree *DT) { - for (BasicBlock *Pred : predecessors(BB)) { - if (Pred == BB) continue; - Instruction *PredTI = Pred->getTerminator(); - if (PredTI->getNumSuccessors() != 1) - continue; - - if (DT->isReachableFromEntry(Pred)) - Blocks.push_back(Pred); - } -} - -/// Handle frees of entire structures whose dependency is a store -/// to a field of that structure. -static bool handleFree(CallInst *F, AliasAnalysis *AA, - MemoryDependenceResults *MD, DominatorTree *DT, - const TargetLibraryInfo *TLI, - InstOverlapIntervalsTy &IOL, - MapVector &ThrowableInst) { - bool MadeChange = false; - - MemoryLocation Loc = MemoryLocation::getAfter(F->getOperand(0)); - SmallVector Blocks; - Blocks.push_back(F->getParent()); - - while (!Blocks.empty()) { - BasicBlock *BB = Blocks.pop_back_val(); - Instruction *InstPt = BB->getTerminator(); - if (BB == F->getParent()) InstPt = F; - - MemDepResult Dep = - MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); - while (Dep.isDef() || Dep.isClobber()) { - Instruction *Dependency = Dep.getInst(); - if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || - !isRemovable(Dependency)) - break; - - Value *DepPointer = - getUnderlyingObject(getStoredPointerOperand(Dependency, *TLI)); - - // Check for aliasing. - if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) - break; - - LLVM_DEBUG( - dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " - << *Dependency << '\n'); - - // DCE instructions only used to calculate that store. - BasicBlock::iterator BBI(Dependency); - deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, - ThrowableInst); - ++NumFastStores; - MadeChange = true; - - // Inst's old Dependency is now deleted. Compute the next dependency, - // which may also be dead, as in - // s[0] = 0; - // s[1] = 0; // This has just been deleted. - // free(s); - Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB); - } - - if (Dep.isNonLocal()) - findUnconditionalPreds(Blocks, BB, DT); - } - - return MadeChange; -} - -/// Check to see if the specified location may alias any of the stack objects in -/// the DeadStackObjects set. If so, they become live because the location is -/// being loaded. -static void removeAccessedObjects(const MemoryLocation &LoadedLoc, - SmallSetVector &DeadStackObjects, - const DataLayout &DL, AliasAnalysis *AA, - const TargetLibraryInfo *TLI, - const Function *F) { - const Value *UnderlyingPointer = getUnderlyingObject(LoadedLoc.Ptr); - - // A constant can't be in the dead pointer set. - if (isa(UnderlyingPointer)) - return; - - // If the kill pointer can be easily reduced to an alloca, don't bother doing - // extraneous AA queries. - if (isa(UnderlyingPointer) || isa(UnderlyingPointer)) { - DeadStackObjects.remove(UnderlyingPointer); - return; - } - - // Remove objects that could alias LoadedLoc. - DeadStackObjects.remove_if([&](const Value *I) { - // See if the loaded location could alias the stack location. - MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F)); - return !AA->isNoAlias(StackLoc, LoadedLoc); - }); -} - -/// Remove dead stores to stack-allocated locations in the function end block. -/// Ex: -/// %A = alloca i32 -/// ... -/// store i32 1, i32* %A -/// ret void -static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, - MemoryDependenceResults *MD, - const TargetLibraryInfo *TLI, - InstOverlapIntervalsTy &IOL, - MapVector &ThrowableInst) { - bool MadeChange = false; - - // Keep track of all of the stack objects that are dead at the end of the - // function. - SmallSetVector DeadStackObjects; - - // Find all of the alloca'd pointers in the entry block. - BasicBlock &Entry = BB.getParent()->front(); - for (Instruction &I : Entry) { - if (isa(&I)) - DeadStackObjects.insert(&I); - - // Okay, so these are dead heap objects, but if the pointer never escapes - // then it's leaked by this function anyways. - else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) - DeadStackObjects.insert(&I); - } - - // Treat byval or inalloca arguments the same, stores to them are dead at the - // end of the function. - for (Argument &AI : BB.getParent()->args()) - if (AI.hasPassPointeeByValueCopyAttr()) - DeadStackObjects.insert(&AI); - - const DataLayout &DL = BB.getModule()->getDataLayout(); - - // Scan the basic block backwards - for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ - --BBI; - - // If we find a store, check to see if it points into a dead stack value. - if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { - // See through pointer-to-pointer bitcasts - SmallVector Pointers; - getUnderlyingObjects(getStoredPointerOperand(&*BBI, *TLI), Pointers); - - // Stores to stack values are valid candidates for removal. - bool AllDead = true; - for (const Value *Pointer : Pointers) - if (!DeadStackObjects.count(Pointer)) { - AllDead = false; - break; - } - - if (AllDead) { - Instruction *Dead = &*BBI; - - LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " - << *Dead << "\n Objects: "; - for (SmallVectorImpl::iterator I = - Pointers.begin(), - E = Pointers.end(); - I != E; ++I) { - dbgs() << **I; - if (std::next(I) != E) - dbgs() << ", "; - } dbgs() - << '\n'); - - // DCE instructions only used to calculate that store. - deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, ThrowableInst, - &DeadStackObjects); - ++NumFastStores; - MadeChange = true; - continue; - } - } - - // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(&*BBI, TLI)) { - LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " - << *&*BBI << '\n'); - deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, ThrowableInst, - &DeadStackObjects); - ++NumFastOther; - MadeChange = true; - continue; - } - - if (isa(BBI)) { - // Remove allocas from the list of dead stack objects; there can't be - // any references before the definition. - DeadStackObjects.remove(&*BBI); - continue; - } - - if (auto *Call = dyn_cast(&*BBI)) { - // Remove allocation function calls from the list of dead stack objects; - // there can't be any references before the definition. - if (isAllocLikeFn(&*BBI, TLI)) - DeadStackObjects.remove(&*BBI); - - // If this call does not access memory, it can't be loading any of our - // pointers. - if (AA->doesNotAccessMemory(Call)) - continue; - - // If the call might load from any of our allocas, then any store above - // the call is live. - DeadStackObjects.remove_if([&](const Value *I) { - // See if the call site touches the value. - return isRefSet(AA->getModRefInfo( - Call, I, getPointerSize(I, DL, *TLI, BB.getParent()))); - }); - - // If all of the allocas were clobbered by the call then we're not going - // to find anything else to process. - if (DeadStackObjects.empty()) - break; - - continue; - } - - // We can remove the dead stores, irrespective of the fence and its ordering - // (release/acquire/seq_cst). Fences only constraints the ordering of - // already visible stores, it does not make a store visible to other - // threads. So, skipping over a fence does not change a store from being - // dead. - if (isa(*BBI)) - continue; - - MemoryLocation LoadedLoc; - - // If we encounter a use of the pointer, it is no longer considered dead - if (LoadInst *L = dyn_cast(BBI)) { - if (!L->isUnordered()) // Be conservative with atomic/volatile load - break; - LoadedLoc = MemoryLocation::get(L); - } else if (VAArgInst *V = dyn_cast(BBI)) { - LoadedLoc = MemoryLocation::get(V); - } else if (!BBI->mayReadFromMemory()) { - // Instruction doesn't read memory. Note that stores that weren't removed - // above will hit this case. - continue; - } else { - // Unknown inst; assume it clobbers everything. - break; - } - - // Remove any allocas from the DeadPointer set that are loaded, as this - // makes any stores above the access live. - removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI, BB.getParent()); - - // If all of the allocas were clobbered by the access then we're not going - // to find anything else to process. - if (DeadStackObjects.empty()) - break; - } - - return MadeChange; -} - static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, uint64_t &EarlierSize, int64_t LaterOffset, uint64_t LaterSize, bool IsOverwriteEnd) { @@ -1203,59 +786,10 @@ return Changed; } -static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI, - AliasAnalysis *AA, MemoryDependenceResults *MD, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - InstOverlapIntervalsTy &IOL, - MapVector &ThrowableInst, - DominatorTree *DT) { - // Must be a store instruction. - StoreInst *SI = dyn_cast(Inst); - if (!SI) - return false; - - // If we're storing the same value back to a pointer that we just loaded from, - // then the store can be removed. - if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { - if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - isRemovable(SI) && - memoryIsNotModifiedBetween(DepLoad, SI, *AA, DL, DT)) { - - LLVM_DEBUG( - dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " - << *DepLoad << "\n STORE: " << *SI << '\n'); - - deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); - ++NumRedundantStores; - return true; - } - } - - // Remove null stores into the calloc'ed objects - Constant *StoredConstant = dyn_cast(SI->getValueOperand()); - if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { - Instruction *UnderlyingPointer = - dyn_cast(getUnderlyingObject(SI->getPointerOperand())); - - if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && - memoryIsNotModifiedBetween(UnderlyingPointer, SI, *AA, DL, DT)) { - LLVM_DEBUG( - dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " - << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); - - deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, ThrowableInst); - ++NumRedundantStores; - return true; - } - } - return false; -} - -template static Constant *tryToMergePartialOverlappingStores( StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset, - int64_t DepWriteOffset, const DataLayout &DL, AATy &AA, DominatorTree *DT) { + int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA, + DominatorTree *DT) { if (Earlier && isa(Earlier->getValueOperand()) && DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) && @@ -1297,228 +831,6 @@ return nullptr; } -static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, - MemoryDependenceResults *MD, DominatorTree *DT, - const TargetLibraryInfo *TLI) { - const DataLayout &DL = BB.getModule()->getDataLayout(); - bool MadeChange = false; - - MapVector ThrowableInst; - - // A map of interval maps representing partially-overwritten value parts. - InstOverlapIntervalsTy IOL; - - // Do a top-down walk on the BB. - for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { - // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(&*BBI, TLI)) { - MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, ThrowableInst); - // Increment BBI after handleFree has potentially deleted instructions. - // This ensures we maintain a valid iterator. - ++BBI; - continue; - } - - Instruction *Inst = &*BBI++; - - if (Inst->mayThrow()) { - ThrowableInst[Inst] = true; - continue; - } - - // Check to see if Inst writes to memory. If not, continue. - if (!hasAnalyzableMemoryWrite(Inst, *TLI)) - continue; - - // eliminateNoopStore will update in iterator, if necessary. - if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, - ThrowableInst, DT)) { - MadeChange = true; - continue; - } - - // If we find something that writes memory, get its memory dependence. - MemDepResult InstDep = MD->getDependency(Inst); - - // Ignore any store where we can't find a local dependence. - // FIXME: cross-block DSE would be fun. :) - if (!InstDep.isDef() && !InstDep.isClobber()) - continue; - - // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst, *TLI); - - // If we didn't get a useful location, fail. - if (!Loc.Ptr) - continue; - - // Loop until we find a store we can eliminate or a load that - // invalidates the analysis. Without an upper bound on the number of - // instructions examined, this analysis can become very time-consuming. - // However, the potential gain diminishes as we process more instructions - // without eliminating any of them. Therefore, we limit the number of - // instructions we look at. - auto Limit = MD->getDefaultBlockScanLimit(); - while (InstDep.isDef() || InstDep.isClobber()) { - // Get the memory clobbered by the instruction we depend on. MemDep will - // skip any instructions that 'Loc' clearly doesn't interact with. If we - // end up depending on a may- or must-aliased load, then we can't optimize - // away the store and we bail out. However, if we depend on something - // that overwrites the memory location we *can* potentially optimize it. - // - // Find out what memory location the dependent instruction stores. - Instruction *DepWrite = InstDep.getInst(); - if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) - break; - MemoryLocation DepLoc = getLocForWrite(DepWrite, *TLI); - // If we didn't get a useful location, or if it isn't a size, bail out. - if (!DepLoc.Ptr) - break; - - // Find the last throwable instruction not removed by call to - // deleteDeadInstruction. - Instruction *LastThrowing = nullptr; - if (!ThrowableInst.empty()) - LastThrowing = ThrowableInst.back().first; - - // Make sure we don't look past a call which might throw. This is an - // issue because MemoryDependenceAnalysis works in the wrong direction: - // it finds instructions which dominate the current instruction, rather than - // instructions which are post-dominated by the current instruction. - // - // If the underlying object is a non-escaping memory allocation, any store - // to it is dead along the unwind edge. Otherwise, we need to preserve - // the store. - if (LastThrowing && DepWrite->comesBefore(LastThrowing)) { - const Value *Underlying = getUnderlyingObject(DepLoc.Ptr); - bool IsStoreDeadOnUnwind = isa(Underlying); - if (!IsStoreDeadOnUnwind) { - // We're looking for a call to an allocation function - // where the allocation doesn't escape before the last - // throwing instruction; PointerMayBeCaptured - // reasonably fast approximation. - IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) && - !PointerMayBeCaptured(Underlying, false, true); - } - if (!IsStoreDeadOnUnwind) - break; - } - - // If we find a write that is a) removable (i.e., non-volatile), b) is - // completely obliterated by the store to 'Loc', and c) which we know that - // 'Inst' doesn't load from, then we can remove it. - // Also try to merge two stores if a later one only touches memory written - // to by the earlier one. - if (isRemovable(DepWrite) && - !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, - DepWriteOffset, InstWriteOffset, *AA, - BB.getParent()); - if (OR == OW_MaybePartial) - OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, - DepWrite, IOL); - - if (OR == OW_Complete) { - LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite - << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, - ThrowableInst); - ++NumFastStores; - MadeChange = true; - - // We erased DepWrite; start over. - InstDep = MD->getDependency(Inst); - continue; - } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) || - ((OR == OW_Begin && - isShortenableAtTheBeginning(DepWrite)))) { - assert(!EnablePartialOverwriteTracking && "Do not expect to perform " - "when partial-overwrite " - "tracking is enabled"); - // The overwrite result is known, so these must be known, too. - uint64_t EarlierSize = DepLoc.Size.getValue(); - uint64_t LaterSize = Loc.Size.getValue(); - bool IsOverwriteEnd = (OR == OW_End); - MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, - InstWriteOffset, LaterSize, IsOverwriteEnd); - } else if (EnablePartialStoreMerging && - OR == OW_PartialEarlierWithFullLater) { - auto *Earlier = dyn_cast(DepWrite); - auto *Later = dyn_cast(Inst); - if (Constant *C = tryToMergePartialOverlappingStores( - Earlier, Later, InstWriteOffset, DepWriteOffset, DL, *AA, - DT)) { - auto *SI = new StoreInst( - C, Earlier->getPointerOperand(), false, Earlier->getAlign(), - Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); - - unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_nontemporal}; - SI->copyMetadata(*DepWrite, MDToKeep); - ++NumModifiedStores; - - // Delete the old stores and now-dead instructions that feed them. - deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, - ThrowableInst); - deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, - ThrowableInst); - MadeChange = true; - - // We erased DepWrite and Inst (Loc); start over. - break; - } - } - } - - // If this is a may-aliased store that is clobbering the store value, we - // can keep searching past it for another must-aliased pointer that stores - // to the same location. For example, in: - // store -> P - // store -> Q - // store -> P - // we can remove the first store to P even though we don't know if P and Q - // alias. - if (DepWrite == &BB.front()) break; - - // Can't look past this instruction if it might read 'Loc'. - if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) - break; - - InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, - DepWrite->getIterator(), &BB, - /*QueryInst=*/ nullptr, &Limit); - } - } - - if (EnablePartialOverwriteTracking) - MadeChange |= removePartiallyOverlappedStores(DL, IOL, *TLI); - - // If this block ends in a return, unwind, or unreachable, all allocas are - // dead at its end, which means stores to them are also dead. - if (BB.getTerminator()->getNumSuccessors() == 0) - MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, ThrowableInst); - - return MadeChange; -} - -static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, - MemoryDependenceResults *MD, DominatorTree *DT, - const TargetLibraryInfo *TLI) { - bool MadeChange = false; - for (BasicBlock &BB : F) - // Only check non-dead blocks. Dead blocks may have strange pointer - // cycles that will confuse alias analysis. - if (DT->isReachableFromEntry(&BB)) - MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); - - return MadeChange; -} - namespace { //============================================================================= // MemorySSA backed dead store elimination. @@ -2472,10 +1784,9 @@ } }; -bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, - MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { +bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, + DominatorTree &DT, PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { bool MadeChange = false; DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); @@ -2650,18 +1961,10 @@ AliasAnalysis &AA = AM.getResult(F); const TargetLibraryInfo &TLI = AM.getResult(F); DominatorTree &DT = AM.getResult(F); + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + PostDominatorTree &PDT = AM.getResult(F); - bool Changed = false; - if (EnableMemorySSA) { - MemorySSA &MSSA = AM.getResult(F).getMSSA(); - PostDominatorTree &PDT = AM.getResult(F); - - Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); - } else { - MemoryDependenceResults &MD = AM.getResult(F); - - Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); - } + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2675,10 +1978,7 @@ PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - if (EnableMemorySSA) - PA.preserve(); - else - PA.preserve(); + PA.preserve(); return PA; } @@ -2701,20 +2001,11 @@ DominatorTree &DT = getAnalysis().getDomTree(); const TargetLibraryInfo &TLI = getAnalysis().getTLI(F); + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); - bool Changed = false; - if (EnableMemorySSA) { - MemorySSA &MSSA = getAnalysis().getMSSA(); - PostDominatorTree &PDT = - getAnalysis().getPostDomTree(); - - Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); - } else { - MemoryDependenceResults &MD = - getAnalysis().getMemDep(); - - Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); - } + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2732,16 +2023,10 @@ AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - - if (EnableMemorySSA) { - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - } else { - AU.addRequired(); - AU.addPreserved(); - } + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); } }; Index: llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" Index: llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll +++ llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loop-carried-dependence.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s +; RUN: opt -dse -S %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" Index: llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll +++ llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s +; RUN: opt -dse -S %s | FileCheck %s declare i1 @cond() readnone Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s -; PR9561 -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin9.8" - -@A = external global [0 x i32] - -declare ghccc void @Func2(i32*, i32*, i32*, i32) - -define ghccc void @Func1(i32* noalias %Arg1, i32* noalias %Arg2, i32* %Arg3, i32 %Arg4) { -entry: - store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2 -; CHECK: store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2 - %ln2gz = getelementptr i32, i32* %Arg1, i32 14 - %ln2gA = bitcast i32* %ln2gz to double* - %ln2gB = load double, double* %ln2gA - %ln2gD = getelementptr i32, i32* %Arg2, i32 -3 - %ln2gE = bitcast i32* %ln2gD to double* - store double %ln2gB, double* %ln2gE -; CHECK: store double %ln2gB, double* %ln2gE - tail call ghccc void @Func2(i32* %Arg1, i32* %Arg2, i32* %Arg3, i32 %Arg4) nounwind - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin" - -%"class.std::auto_ptr" = type { i32* } - -; CHECK-LABEL: @_Z3foov( -define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret(%"class.std::auto_ptr") %agg.result) uwtable ssp { -_ZNSt8auto_ptrIiED1Ev.exit: - %temp.lvalue = alloca %"class.std::auto_ptr", align 8 - call void @_Z3barv(%"class.std::auto_ptr"* sret(%"class.std::auto_ptr") %temp.lvalue) - %_M_ptr.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0 - %tmp.i.i = load i32*, i32** %_M_ptr.i.i, align 8 -; CHECK-NOT: store i32* null - store i32* null, i32** %_M_ptr.i.i, align 8 - %_M_ptr.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 - store i32* %tmp.i.i, i32** %_M_ptr.i.i4, align 8 -; CHECK: ret void - ret void -} - -declare void @_Z3barv(%"class.std::auto_ptr"* sret(%"class.std::auto_ptr")) Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll +++ /dev/null @@ -1,85 +0,0 @@ -; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -%struct.pair.162 = type { %struct.BasicBlock*, i32, [4 x i8] } -%struct.BasicBlock = type { %struct.Value, %struct.ilist_node.24, %struct.iplist.22, %struct.Function* } -%struct.Value = type { i32 (...)**, i8, i8, i16, %struct.Type*, %struct.Use*, %struct.StringMapEntry* } -%struct.Type = type { %struct.LLVMContext*, i8, [3 x i8], i32, {}* } -%struct.LLVMContext = type { %struct.LLVMContextImpl* } -%struct.LLVMContextImpl = type opaque -%struct.Use = type { %struct.Value*, %struct.Use*, %struct.PointerIntPair } -%struct.PointerIntPair = type { i64 } -%struct.StringMapEntry = type opaque -%struct.ilist_node.24 = type { %struct.ilist_half_node.23, %struct.BasicBlock* } -%struct.ilist_half_node.23 = type { %struct.BasicBlock* } -%struct.iplist.22 = type { %struct.ilist_traits.21, %struct.Instruction* } -%struct.ilist_traits.21 = type { %struct.ilist_half_node.25 } -%struct.ilist_half_node.25 = type { %struct.Instruction* } -%struct.Instruction = type { [52 x i8], %struct.ilist_node.26, %struct.BasicBlock*, %struct.DebugLoc } -%struct.ilist_node.26 = type { %struct.ilist_half_node.25, %struct.Instruction* } -%struct.DebugLoc = type { i32, i32 } -%struct.Function = type { %struct.GlobalValue, %struct.ilist_node.14, %struct.iplist.4, %struct.iplist, %struct.ValueSymbolTable*, %struct.AttrListPtr } -%struct.GlobalValue = type <{ [52 x i8], [4 x i8], %struct.Module*, i8, i16, [5 x i8], %struct.basic_string }> -%struct.Module = type { %struct.LLVMContext*, %struct.iplist.20, %struct.iplist.16, %struct.iplist.12, %struct.vector.2, %struct.ilist, %struct.basic_string, %struct.ValueSymbolTable*, %struct.OwningPtr, %struct.basic_string, %struct.basic_string, %struct.basic_string, i8* } -%struct.iplist.20 = type { %struct.ilist_traits.19, %struct.GlobalVariable* } -%struct.ilist_traits.19 = type { %struct.ilist_node.18 } -%struct.ilist_node.18 = type { %struct.ilist_half_node.17, %struct.GlobalVariable* } -%struct.ilist_half_node.17 = type { %struct.GlobalVariable* } -%struct.GlobalVariable = type { %struct.GlobalValue, %struct.ilist_node.18, i8, [7 x i8] } -%struct.iplist.16 = type { %struct.ilist_traits.15, %struct.Function* } -%struct.ilist_traits.15 = type { %struct.ilist_node.14 } -%struct.ilist_node.14 = type { %struct.ilist_half_node.13, %struct.Function* } -%struct.ilist_half_node.13 = type { %struct.Function* } -%struct.iplist.12 = type { %struct.ilist_traits.11, %struct.GlobalAlias* } -%struct.ilist_traits.11 = type { %struct.ilist_node.10 } -%struct.ilist_node.10 = type { %struct.ilist_half_node.9, %struct.GlobalAlias* } -%struct.ilist_half_node.9 = type { %struct.GlobalAlias* } -%struct.GlobalAlias = type { %struct.GlobalValue, %struct.ilist_node.10 } -%struct.vector.2 = type { %struct._Vector_base.1 } -%struct._Vector_base.1 = type { %struct._Vector_impl.0 } -%struct._Vector_impl.0 = type { %struct.basic_string*, %struct.basic_string*, %struct.basic_string* } -%struct.basic_string = type { %struct._Alloc_hider } -%struct._Alloc_hider = type { i8* } -%struct.ilist = type { %struct.iplist.8 } -%struct.iplist.8 = type { %struct.ilist_traits.7, %struct.NamedMDNode* } -%struct.ilist_traits.7 = type { %struct.ilist_node.6 } -%struct.ilist_node.6 = type { %struct.ilist_half_node.5, %struct.NamedMDNode* } -%struct.ilist_half_node.5 = type { %struct.NamedMDNode* } -%struct.NamedMDNode = type { %struct.ilist_node.6, %struct.basic_string, %struct.Module*, i8* } -%struct.ValueSymbolTable = type opaque -%struct.OwningPtr = type { %struct.GVMaterializer* } -%struct.GVMaterializer = type opaque -%struct.iplist.4 = type { %struct.ilist_traits.3, %struct.BasicBlock* } -%struct.ilist_traits.3 = type { %struct.ilist_half_node.23 } -%struct.iplist = type { %struct.ilist_traits, %struct.Argument* } -%struct.ilist_traits = type { %struct.ilist_half_node } -%struct.ilist_half_node = type { %struct.Argument* } -%struct.Argument = type { %struct.Value, %struct.ilist_node, %struct.Function* } -%struct.ilist_node = type { %struct.ilist_half_node, %struct.Argument* } -%struct.AttrListPtr = type { %struct.AttributeListImpl* } -%struct.AttributeListImpl = type opaque - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind - -; CHECK: _ZSt9iter_swapIPSt4pairIPN4llvm10BasicBlockEjES5_EvT_T0_ -; CHECK: store -; CHECK: ret void -define void @_ZSt9iter_swapIPSt4pairIPN4llvm10BasicBlockEjES5_EvT_T0_(%struct.pair.162* %__a, %struct.pair.162* %__b) nounwind uwtable inlinehint { -entry: - %memtmp = alloca %struct.pair.162, align 8 - %0 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %memtmp, i64 0, i32 0 - %1 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %__a, i64 0, i32 0 - %2 = load %struct.BasicBlock*, %struct.BasicBlock** %1, align 8 - store %struct.BasicBlock* %2, %struct.BasicBlock** %0, align 8 - %3 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %memtmp, i64 0, i32 1 - %4 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %__a, i64 0, i32 1 - %5 = load i32, i32* %4, align 4 - store i32 %5, i32* %3, align 8 - %6 = bitcast %struct.pair.162* %__a to i8* - %7 = bitcast %struct.pair.162* %__b to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 12, i1 false) - %8 = bitcast %struct.pair.162* %memtmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 12, i1 false) - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S -enable-dse-partial-overwrite-tracking | FileCheck %s -; PR28588 - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind -define void @_UPT_destroy(i8* nocapture %ptr) local_unnamed_addr #0 { -entry: - %edi = getelementptr inbounds i8, i8* %ptr, i64 8 - -; CHECK-NOT: tail call void @llvm.memset.p0i8.i64(i8* align 8 %edi, i8 0, i64 176, i1 false) -; CHECK-NOT: store i32 -1, i32* %addr - - tail call void @llvm.memset.p0i8.i64(i8* align 8 %edi, i8 0, i64 176, i1 false) - %format4.i = getelementptr inbounds i8, i8* %ptr, i64 144 - %addr = bitcast i8* %format4.i to i32* - store i32 -1, i32* %addr, align 8 - -; CHECK: tail call void @free - tail call void @free(i8* nonnull %ptr) - ret void -} - -; Function Attrs: nounwind -declare void @free(i8* nocapture) local_unnamed_addr #0 - -; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 - -attributes #0 = { nounwind } -attributes #1 = { argmemonly nounwind } Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll +++ /dev/null @@ -1,41 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -declare i8* @_Znwj(i32) willreturn -declare void @foo() readnone willreturn - -define void @test1(i8** %ptr) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[VAL:%.*]] = inttoptr i64 23452 to i8* -; CHECK-NEXT: store i8* [[VAL]], i8** [[PTR:%.*]] -; CHECK-NEXT: ret void -; - %val = inttoptr i64 23452 to i8* - store i8* %val, i8** %ptr - %call = call i8* @_Znwj(i32 1) - store i8* %call, i8** %ptr - store i8* %val, i8** %ptr - ret void -} - -define void @test2(i8** %ptr, i8* %p1, i8* %p2) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: [[VAL:%.*]] = inttoptr i64 23452 to i8* -; CHECK-NEXT: store i8* [[VAL]], i8** [[PTR:%.*]] -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: store i8* [[P1:%.*]], i8** [[PTR]] -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: store i8* [[VAL]], i8** [[PTR]] -; CHECK-NEXT: ret void -; - %val = inttoptr i64 23452 to i8* - store i8* %val, i8** %ptr - call void @foo() - store i8* %p1, i8** %ptr - call void @foo() - store i8* %p2, i8** %ptr - %call = call i8* @_Znwj(i32 1) - store i8* %call, i8** %ptr - store i8* %val, i8** %ptr - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll +++ /dev/null @@ -1,393 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -define void @write4to7(i32* nocapture %p) { -; CHECK-LABEL: @write4to7( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @write4to7_atomic(i32* nocapture %p) { -; CHECK-LABEL: @write4to7_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1 - store atomic i32 1, i32* %arrayidx1 unordered, align 4 - ret void -} - -define void @write0to3(i32* nocapture %p) { -; CHECK-LABEL: @write0to3( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) -; CHECK-NEXT: store i32 1, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false) - store i32 1, i32* %p, align 4 - ret void -} - -define void @write0to3_atomic(i32* nocapture %p) { -; CHECK-LABEL: @write0to3_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4) -; CHECK-NEXT: store atomic i32 1, i32* [[P]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) - store atomic i32 1, i32* %p unordered, align 4 - ret void -} - -; Atomicity of the store is weaker from the memset -define void @write0to3_atomic_weaker(i32* nocapture %p) { -; CHECK-LABEL: @write0to3_atomic_weaker( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4) -; CHECK-NEXT: store i32 1, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) - store i32 1, i32* %p, align 4 - ret void -} - -define void @write0to7(i32* nocapture %p) { -; CHECK-LABEL: @write0to7( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 8 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64* -; CHECK-NEXT: store i64 1, i64* [[P4]], align 8 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false) - %p4 = bitcast i32* %p to i64* - store i64 1, i64* %p4, align 8 - ret void -} - -; Changing the memset start and length is okay here because the -; store is a multiple of the memset element size -define void @write0to7_atomic(i32* nocapture %p) { -; CHECK-LABEL: @write0to7_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 8 -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64* -; CHECK-NEXT: store atomic i64 1, i64* [[P4]] unordered, align 8 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) - %p4 = bitcast i32* %p to i64* - store atomic i64 1, i64* %p4 unordered, align 8 - ret void -} - -define void @write0to7_2(i32* nocapture %p) { -; CHECK-LABEL: @write0to7_2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64* -; CHECK-NEXT: store i64 1, i64* [[P4]], align 8 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false) - %p4 = bitcast i32* %p to i64* - store i64 1, i64* %p4, align 8 - ret void -} - -define void @write0to7_2_atomic(i32* nocapture %p) { -; CHECK-LABEL: @write0to7_2_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64* -; CHECK-NEXT: store atomic i64 1, i64* [[P4]] unordered, align 8 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) - %p4 = bitcast i32* %p to i64* - store atomic i64 1, i64* %p4 unordered, align 8 - ret void -} - -; We do not trim the beginning of the eariler write if the alignment of the -; start pointer is changed. -define void @dontwrite0to3_align8(i32* nocapture %p) { -; CHECK-LABEL: @dontwrite0to3_align8( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[P3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: store i32 1, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.p0i8.i64(i8* align 8 %p3, i8 0, i64 32, i1 false) - store i32 1, i32* %p, align 4 - ret void -} - -define void @dontwrite0to3_align8_atomic(i32* nocapture %p) { -; CHECK-LABEL: @dontwrite0to3_align8_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[P3]], i8 0, i64 32, i32 4) -; CHECK-NEXT: store atomic i32 1, i32* [[P]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %p3, i8 0, i64 32, i32 4) - store atomic i32 1, i32* %p unordered, align 4 - ret void -} - -define void @dontwrite0to1(i32* nocapture %p) { -; CHECK-LABEL: @dontwrite0to1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i16* -; CHECK-NEXT: store i16 1, i16* [[P4]], align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false) - %p4 = bitcast i32* %p to i16* - store i16 1, i16* %p4, align 4 - ret void -} - -define void @dontwrite0to1_atomic(i32* nocapture %p) { -; CHECK-LABEL: @dontwrite0to1_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i16* -; CHECK-NEXT: store atomic i16 1, i16* [[P4]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) - %p4 = bitcast i32* %p to i16* - store atomic i16 1, i16* %p4 unordered, align 4 - ret void -} - -define void @dontwrite2to9(i32* nocapture %p) { -; CHECK-LABEL: @dontwrite2to9( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i16* -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[P4]], i64 1 -; CHECK-NEXT: [[P5:%.*]] = bitcast i16* [[ARRAYIDX2]] to i64* -; CHECK-NEXT: store i64 1, i64* [[P5]], align 8 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false) - %p4 = bitcast i32* %p to i16* - %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1 - %p5 = bitcast i16* %arrayidx2 to i64* - store i64 1, i64* %p5, align 8 - ret void -} - -define void @dontwrite2to9_atomic(i32* nocapture %p) { -; CHECK-LABEL: @dontwrite2to9_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4) -; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i16* -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[P4]], i64 1 -; CHECK-NEXT: [[P5:%.*]] = bitcast i16* [[ARRAYIDX2]] to i64* -; CHECK-NEXT: store atomic i64 1, i64* [[P5]] unordered, align 8 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) - %p4 = bitcast i32* %p to i16* - %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1 - %p5 = bitcast i16* %arrayidx2 to i64* - store atomic i64 1, i64* %p5 unordered, align 8 - ret void -} - -define void @write8To15AndThen0To7(i64* nocapture %P) { -; CHECK-LABEL: @write8To15AndThen0To7( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16 -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0 -; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 -; CHECK-NEXT: store i64 1, i64* [[BASE64_1]] -; CHECK-NEXT: store i64 2, i64* [[BASE64_0]] -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i1 false) - - %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 - %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 - - store i64 1, i64* %base64_1 - store i64 2, i64* %base64_0 - ret void -} - -define void @write8To15AndThen0To7_atomic(i64* nocapture %P) { -; CHECK-LABEL: @write8To15AndThen0To7_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16 -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8) -; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0 -; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 -; CHECK-NEXT: store atomic i64 1, i64* [[BASE64_1]] unordered, align 8 -; CHECK-NEXT: store atomic i64 2, i64* [[BASE64_0]] unordered, align 8 -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) - - %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 - %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 - - store atomic i64 1, i64* %base64_1 unordered, align 8 - store atomic i64 2, i64* %base64_0 unordered, align 8 - ret void -} - -define void @write8To15AndThen0To7_atomic_weaker(i64* nocapture %P) { -; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16 -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8) -; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0 -; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 -; CHECK-NEXT: store atomic i64 1, i64* [[BASE64_1]] unordered, align 8 -; CHECK-NEXT: store i64 2, i64* [[BASE64_0]], align 8 -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) - - %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 - %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 - - store atomic i64 1, i64* %base64_1 unordered, align 8 - store i64 2, i64* %base64_0, align 8 - ret void -} - -define void @write8To15AndThen0To7_atomic_weaker_2(i64* nocapture %P) { -; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker_2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16 -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8) -; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0 -; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 -; CHECK-NEXT: store i64 1, i64* [[BASE64_1]], align 8 -; CHECK-NEXT: store atomic i64 2, i64* [[BASE64_0]] unordered, align 8 -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) - - %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 - %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 - - store i64 1, i64* %base64_1, align 8 - store atomic i64 2, i64* %base64_0 unordered, align 8 - ret void -} - -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind -declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll +++ /dev/null @@ -1,390 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -%struct.vec2 = type { <4 x i32>, <4 x i32> } -%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 } - -@glob1 = global %struct.vec2 zeroinitializer, align 16 -@glob2 = global %struct.vec2plusi zeroinitializer, align 16 - -define void @write24to28(i32* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write24to28( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i1 false) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @write24to28_atomic(i32* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write24to28_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i32 4) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 - store atomic i32 1, i32* %arrayidx1 unordered, align 4 - ret void -} - -; Atomicity of the store is weaker from the memset -define void @write24to28_atomic_weaker(i32* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write24to28_atomic_weaker( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i32 4) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 - %p3 = bitcast i32* %arrayidx0 to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @write28to32(i32* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write28to32( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @write28to32_atomic(i32* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write28to32_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 - store atomic i32 1, i32* %arrayidx1 unordered, align 4 - ret void -} - -define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @dontwrite28to32memset( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[P3]], i8 0, i64 32, i1 false) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i1 false) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @dontwrite28to32memset_atomic(i32* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @dontwrite28to32memset_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 16 [[P3]], i8 0, i64 32, i32 4) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %p3 = bitcast i32* %p to i8* - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i32 4) - %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 - store atomic i32 1, i32* %arrayidx1 unordered, align 4 - ret void -} - -define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write32to36( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i1 false) -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2 -; CHECK-NEXT: store i32 1, i32* [[C]], align 4 -; CHECK-NEXT: ret void -; -entry: - %0 = bitcast %struct.vec2plusi* %p to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i1 false) - %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2 - store i32 1, i32* %c, align 4 - ret void -} - -define void @write32to36_atomic(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write32to36_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 4) -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2 -; CHECK-NEXT: store atomic i32 1, i32* [[C]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %0 = bitcast %struct.vec2plusi* %p to i8* - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4) - %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2 - store atomic i32 1, i32* %c unordered, align 4 - ret void -} - -; Atomicity of the store is weaker than the memcpy -define void @write32to36_atomic_weaker(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write32to36_atomic_weaker( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 4) -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2 -; CHECK-NEXT: store i32 1, i32* [[C]], align 4 -; CHECK-NEXT: ret void -; -entry: - %0 = bitcast %struct.vec2plusi* %p to i8* - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4) - %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2 - store i32 1, i32* %c, align 4 - ret void -} - -define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write16to32( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 16, i1 false) -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 1 -; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[C]], align 4 -; CHECK-NEXT: ret void -; -entry: - %0 = bitcast %struct.vec2* %p to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false) - %c = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 1 - store <4 x i32> , <4 x i32>* %c, align 4 - ret void -} - -define void @write16to32_atomic(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @write16to32_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 4) -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 1 -; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[C]], align 4 -; CHECK-NEXT: ret void -; -entry: - %0 = bitcast %struct.vec2* %p to i8* - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4) - %c = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 1 - store <4 x i32> , <4 x i32>* %c, align 4 - ret void -} - -define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @dontwrite28to32memcpy( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 0, i64 7 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: ret void -; -entry: - %0 = bitcast %struct.vec2* %p to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false) - %arrayidx1 = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 0, i64 7 - store i32 1, i32* %arrayidx1, align 4 - ret void -} - -define void @dontwrite28to32memcpy_atomic(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK-LABEL: @dontwrite28to32memcpy_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 0, i64 7 -; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 -; CHECK-NEXT: ret void -; -entry: - %0 = bitcast %struct.vec2* %p to i8* - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4) - %arrayidx1 = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 0, i64 7 - store atomic i32 1, i32* %arrayidx1 unordered, align 4 - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind -declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind -declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind - -%struct.trapframe = type { i64, i64, i64 } - -; bugzilla 11455 - make sure negative GEP's don't break this optimisation -define void @cpu_lwp_fork(%struct.trapframe* %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp { -; CHECK-LABEL: @cpu_lwp_fork( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[PCB_RSP0:%.*]] to %struct.trapframe* -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds [[STRUCT_TRAPFRAME:%.*]], %struct.trapframe* [[TMP0]], i64 -1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.trapframe* [[ADD_PTR]] to i8* -; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.trapframe* [[MD_REGS:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 24, i1 false) -; CHECK-NEXT: [[TF_TRAPNO:%.*]] = getelementptr inbounds [[STRUCT_TRAPFRAME]], %struct.trapframe* [[TMP0]], i64 -1, i32 1 -; CHECK-NEXT: store i64 3, i64* [[TF_TRAPNO]], align 8 -; CHECK-NEXT: ret void -; -entry: - %0 = inttoptr i64 %pcb_rsp0 to %struct.trapframe* - %add.ptr = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1 - %1 = bitcast %struct.trapframe* %add.ptr to i8* - %2 = bitcast %struct.trapframe* %md_regs to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i1 false) - %tf_trapno = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1, i32 1 - store i64 3, i64* %tf_trapno, align 8 - ret void -} - -define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { -; CHECK-LABEL: @write16To23AndThen24To31( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 -; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 -; CHECK-NEXT: store i64 3, i64* [[BASE64_2]] -; CHECK-NEXT: store i64 3, i64* [[BASE64_3]] -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i1 false) - - %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 - %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 - - store i64 3, i64* %base64_2 - store i64 3, i64* %base64_3 - ret void -} - -define void @write16To23AndThen24To31_atomic(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { -; CHECK-LABEL: @write16To23AndThen24To31_atomic( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8) -; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 -; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 -; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_2]] unordered, align 8 -; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_3]] unordered, align 8 -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) - - %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 - %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 - - store atomic i64 3, i64* %base64_2 unordered, align 8 - store atomic i64 3, i64* %base64_3 unordered, align 8 - ret void -} - -define void @write16To23AndThen24To31_atomic_weaker1(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { -; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8) -; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 -; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 -; CHECK-NEXT: store i64 3, i64* [[BASE64_2]], align 8 -; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_3]] unordered, align 8 -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) - - %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 - %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 - - store i64 3, i64* %base64_2, align 8 - store atomic i64 3, i64* %base64_3 unordered, align 8 - ret void -} - -define void @write16To23AndThen24To31_atomic_weaker2(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { -; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* -; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8) -; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 -; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 -; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_2]] unordered, align 8 -; CHECK-NEXT: store i64 3, i64* [[BASE64_3]], align 8 -; CHECK-NEXT: ret void -; -entry: - - %base0 = bitcast i64* %P to i8* - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) - - %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 - %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 - - store atomic i64 3, i64* %base64_2 unordered, align 8 - store i64 3, i64* %base64_3, align 8 - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll +++ /dev/null @@ -1,87 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=false -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -; Ensure that the dead store is deleted in this case. It is wholely -; overwritten by the second store. -define void @test1(i32 *%V) { - %V2 = bitcast i32* %V to i8* ; [#uses=1] - store i8 0, i8* %V2 - store i32 1234567, i32* %V - ret void -; CHECK-LABEL: @test1( -; CHECK-NEXT: store i32 1234567 -} - -; Note that we could do better by merging the two stores into one. -define void @test2(i32* %P) { -; CHECK-LABEL: @test2( - store i32 0, i32* %P -; CHECK: store i32 - %Q = bitcast i32* %P to i16* - store i16 1, i16* %Q -; CHECK: store i16 - ret void -} - - -define i32 @test3(double %__x) { -; CHECK-LABEL: @test3( -; CHECK: store double - %__u = alloca { [3 x i32] } - %tmp.1 = bitcast { [3 x i32] }* %__u to double* - store double %__x, double* %tmp.1 - %tmp.4 = getelementptr { [3 x i32] }, { [3 x i32] }* %__u, i32 0, i32 0, i32 1 - %tmp.5 = load i32, i32* %tmp.4 - %tmp.6 = icmp slt i32 %tmp.5, 0 - %tmp.7 = zext i1 %tmp.6 to i32 - ret i32 %tmp.7 -} - -; PR6043 -define void @test4(i8* %P) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: bitcast -; CHECK-NEXT: store double - - store i8 19, i8* %P ;; dead - %A = getelementptr i8, i8* %P, i32 3 - - store i8 42, i8* %A ;; dead - - %Q = bitcast i8* %P to double* - store double 0.0, double* %Q - ret void -} - -; PR8657 -declare void @test5a(i32*) -define void @test5(i32 %i) nounwind ssp { - %A = alloca i32 - %B = bitcast i32* %A to i8* - %C = getelementptr i8, i8* %B, i32 %i - store i8 10, i8* %C ;; Dead store to variable index. - store i32 20, i32* %A - - call void @test5a(i32* %A) - ret void -; CHECK-LABEL: @test5( -; CHECK-NEXT: alloca -; CHECK-NEXT: store i32 20 -; CHECK-NEXT: call void @test5a -} - -declare void @test5a_as1(i32*) -define void @test5_addrspacecast(i32 %i) nounwind ssp { - %A = alloca i32 - %B = addrspacecast i32* %A to i8 addrspace(1)* - %C = getelementptr i8, i8 addrspace(1)* %B, i32 %i - store i8 10, i8 addrspace(1)* %C ;; Dead store to variable index. - store i32 20, i32* %A - - call void @test5a(i32* %A) - ret void -; CHECK-LABEL: @test5_addrspacecast( -; CHECK-NEXT: alloca -; CHECK-NEXT: store i32 20 -; CHECK-NEXT: call void @test5a -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll +++ /dev/null @@ -1,55 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s --data-layout "e" -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=true -S | FileCheck %s -; RUN: opt < %s --data-layout "E" -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=true -S | FileCheck %s - -; This test used to hit an assertion (see PR41949). -; -; Better safe than sorry, do not assume anything about the padding for the -; i28 store that has 32 bits as store size. -define void @test1(i32* %p) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[A:%.*]] = alloca i32 -; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[A]] to i28* -; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[A]] to { i16, i16 }* -; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* [[C]], i32 0, i32 1 -; CHECK-NEXT: store i28 10, i28* [[B]] -; CHECK-NEXT: store i16 20, i16* [[C1]] -; CHECK-NEXT: call void @test1(i32* [[A]]) -; CHECK-NEXT: ret void -; - %a = alloca i32 - %b = bitcast i32* %a to i28* - %c = bitcast i32* %a to { i16, i16 }* - %c1 = getelementptr inbounds { i16, i16 }, { i16, i16 }* %c, i32 0, i32 1 - store i28 10, i28* %b - store i16 20, i16* %c1 - - call void @test1(i32* %a) - ret void -} - - -; This test used to mis-compile (see PR41949). -; -; Better safe than sorry, do not assume anything about the padding for the -; i12 store that has 16 bits as store size. -define void @test2(i32* %p) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: [[U:%.*]] = alloca i32 -; CHECK-NEXT: [[A:%.*]] = bitcast i32* [[U]] to i32* -; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[U]] to i12* -; CHECK-NEXT: store i32 -1, i32* [[A]] -; CHECK-NEXT: store i12 20, i12* [[B]] -; CHECK-NEXT: call void @test2(i32* [[U]]) -; CHECK-NEXT: ret void -; - %u = alloca i32 - %a = bitcast i32* %u to i32* - %b = bitcast i32* %u to i12* - store i32 -1, i32* %a - store i12 20, i12* %b - - call void @test2(i32* %u) - ret void -} - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll +++ /dev/null @@ -1,21 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -dse -enable-dse-memoryssa=false -S | FileCheck %s - -; Both stores should be emitted because we can't tell if the gather aliases. - -define <4 x i32> @bar(<4 x i32> %arg, i32* %arg1) { -; CHECK-LABEL: @bar( -; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 5, i32* [[ARG1:%.*]] -; CHECK-NEXT: [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* null, <4 x i32> [[ARG:%.*]], <4 x i32> , i8 1) -; CHECK-NEXT: store i32 10, i32* [[ARG1]] -; CHECK-NEXT: ret <4 x i32> [[TMP]] -; -bb: - store i32 5, i32* %arg1 - %tmp = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* null, <4 x i32> %arg, <4 x i32> , i8 1) - store i32 10, i32* %arg1 - ret <4 x i32> %tmp -} - -declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll +++ /dev/null @@ -1,132 +0,0 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7.0" - -; Sanity tests for atomic stores. -; Note that it turns out essentially every transformation DSE does is legal on -; atomic ops, just some transformations are not allowed across release-acquire pairs. - -@x = common global i32 0, align 4 -@y = common global i32 0, align 4 - -declare void @randomop(i32*) - -; DSE across unordered store (allowed) -define void @test1() { -; CHECK-LABEL: test1 -; CHECK-NOT: store i32 0 -; CHECK: store i32 1 - store i32 0, i32* @x - store atomic i32 0, i32* @y unordered, align 4 - store i32 1, i32* @x - ret void -} - -; DSE remove unordered store (allowed) -define void @test4() { -; CHECK-LABEL: test4 -; CHECK-NOT: store atomic -; CHECK: store i32 1 - store atomic i32 0, i32* @x unordered, align 4 - store i32 1, i32* @x - ret void -} - -; DSE unordered store overwriting non-atomic store (allowed) -define void @test5() { -; CHECK-LABEL: test5 -; CHECK: store atomic i32 1 - store i32 0, i32* @x - store atomic i32 1, i32* @x unordered, align 4 - ret void -} - -; DSE no-op unordered atomic store (allowed) -define void @test6() { -; CHECK-LABEL: test6 -; CHECK-NOT: store -; CHECK: ret void - %x = load atomic i32, i32* @x unordered, align 4 - store atomic i32 %x, i32* @x unordered, align 4 - ret void -} - -; DSE seq_cst store (be conservative; DSE doesn't have infrastructure -; to reason about atomic operations). -define void @test7() { -; CHECK-LABEL: test7 -; CHECK: store atomic - %a = alloca i32 - store atomic i32 0, i32* %a seq_cst, align 4 - ret void -} - -; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure -; to reason about atomic operations). -define i32 @test8() { -; CHECK-LABEL: test8 -; CHECK: store -; CHECK: load atomic - %a = alloca i32 - call void @randomop(i32* %a) - store i32 0, i32* %a, align 4 - %x = load atomic i32, i32* @x seq_cst, align 4 - ret i32 %x -} - -; DSE across monotonic load (allowed as long as the eliminated store isUnordered) -define i32 @test9() { -; CHECK-LABEL: test9 -; CHECK-NOT: store i32 0 -; CHECK: store i32 1 - store i32 0, i32* @x - %x = load atomic i32, i32* @y monotonic, align 4 - store i32 1, i32* @x - ret i32 %x -} - -; DSE across monotonic store (allowed as long as the eliminated store isUnordered) -define void @test10() { -; CHECK-LABEL: test10 -; CHECK-NOT: store i32 0 -; CHECK: store i32 1 - store i32 0, i32* @x - store atomic i32 42, i32* @y monotonic, align 4 - store i32 1, i32* @x - ret void -} - -; DSE across monotonic load (forbidden since the eliminated store is atomic) -define i32 @test11() { -; CHECK-LABEL: test11 -; CHECK: store atomic i32 0 -; CHECK: store atomic i32 1 - store atomic i32 0, i32* @x monotonic, align 4 - %x = load atomic i32, i32* @y monotonic, align 4 - store atomic i32 1, i32* @x monotonic, align 4 - ret i32 %x -} - -; DSE across monotonic store (forbidden since the eliminated store is atomic) -define void @test12() { -; CHECK-LABEL: test12 -; CHECK: store atomic i32 0 -; CHECK: store atomic i32 1 - store atomic i32 0, i32* @x monotonic, align 4 - store atomic i32 42, i32* @y monotonic, align 4 - store atomic i32 1, i32* @x monotonic, align 4 - ret void -} - -; But DSE is not allowed across a release-acquire pair. -define i32 @test15() { -; CHECK-LABEL: test15 -; CHECK: store i32 0 -; CHECK: store i32 1 - store i32 0, i32* @x - store atomic i32 0, i32* @y release, align 4 - %x = load atomic i32, i32* @y acquire, align 4 - store i32 1, i32* @x - ret i32 %x -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll +++ /dev/null @@ -1,65 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -declare noalias i8* @calloc(i64, i64) - -define i32* @test1() { -; CHECK-LABEL: test1 - %1 = tail call noalias i8* @calloc(i64 1, i64 4) - %2 = bitcast i8* %1 to i32* - ; This store is dead and should be removed - store i32 0, i32* %2, align 4 -; CHECK-NOT: store i32 0, i32* %2, align 4 - ret i32* %2 -} - -define i32* @test2() { -; CHECK-LABEL: test2 - %1 = tail call noalias i8* @calloc(i64 1, i64 4) - %2 = bitcast i8* %1 to i32* - %3 = getelementptr i32, i32* %2, i32 5 - store i32 0, i32* %3, align 4 -; CHECK-NOT: store i32 0, i32* %2, align 4 - ret i32* %2 -} - -define i32* @test3(i32 *%arg) { -; CHECK-LABEL: test3 - store i32 0, i32* %arg, align 4 -; CHECK: store i32 0, i32* %arg, align 4 - ret i32* %arg -} - -declare void @clobber_memory(i8*) -define i8* @test4() { -; CHECK-LABEL: test4 - %1 = tail call noalias i8* @calloc(i64 1, i64 4) - call void @clobber_memory(i8* %1) - store i8 0, i8* %1, align 4 -; CHECK: store i8 0, i8* %1, align 4 - ret i8* %1 -} - -define i32* @test5() { -; CHECK-LABEL: test5 - %1 = tail call noalias i8* @calloc(i64 1, i64 4) - %2 = bitcast i8* %1 to i32* - store volatile i32 0, i32* %2, align 4 -; CHECK: store volatile i32 0, i32* %2, align 4 - ret i32* %2 -} - -define i8* @test6() { -; CHECK-LABEL: test6 - %1 = tail call noalias i8* @calloc(i64 1, i64 4) - store i8 5, i8* %1, align 4 -; CHECK: store i8 5, i8* %1, align 4 - ret i8* %1 -} - -define i8* @test7(i8 %arg) { -; CHECK-LABEL: test7 - %1 = tail call noalias i8* @calloc(i64 1, i64 4) - store i8 %arg, i8* %1, align 4 -; CHECK: store i8 %arg, i8* %1, align 4 - ret i8* %1 -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll +++ /dev/null @@ -1,239 +0,0 @@ -; RUN: opt -S -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=false < %s | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64le-unknown-linux" - -%"struct.std::complex" = type { { float, float } } - -define void @_Z4testSt7complexIfE(%"struct.std::complex"* noalias nocapture sret(%"struct.std::complex") %agg.result, i64 %c.coerce) { -entry: -; CHECK-LABEL: @_Z4testSt7complexIfE - - %ref.tmp = alloca i64, align 8 - %tmpcast = bitcast i64* %ref.tmp to %"struct.std::complex"* - %c.sroa.0.0.extract.shift = lshr i64 %c.coerce, 32 - %c.sroa.0.0.extract.trunc = trunc i64 %c.sroa.0.0.extract.shift to i32 - %0 = bitcast i32 %c.sroa.0.0.extract.trunc to float - %c.sroa.2.0.extract.trunc = trunc i64 %c.coerce to i32 - %1 = bitcast i32 %c.sroa.2.0.extract.trunc to float - call void @_Z3barSt7complexIfE(%"struct.std::complex"* nonnull sret(%"struct.std::complex") %tmpcast, i64 %c.coerce) - %2 = bitcast %"struct.std::complex"* %agg.result to i64* - %3 = load i64, i64* %ref.tmp, align 8 - store i64 %3, i64* %2, align 4 -; CHECK-NOT: store i64 - - %_M_value.realp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 0 - %4 = lshr i64 %3, 32 - %5 = trunc i64 %4 to i32 - %6 = bitcast i32 %5 to float - %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 1 - %7 = trunc i64 %3 to i32 - %8 = bitcast i32 %7 to float - %mul_ad.i.i = fmul fast float %6, %1 - %mul_bc.i.i = fmul fast float %8, %0 - %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i - %mul_ac.i.i = fmul fast float %6, %0 - %mul_bd.i.i = fmul fast float %8, %1 - %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i - store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4 - store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4 - ret void -; CHECK: ret void -} - -declare void @_Z3barSt7complexIfE(%"struct.std::complex"* sret(%"struct.std::complex"), i64) - -define void @test1(i32 *%ptr) { -entry: -; CHECK-LABEL: @test1 - - store i32 5, i32* %ptr - %bptr = bitcast i32* %ptr to i8* - store i8 7, i8* %bptr - %wptr = bitcast i32* %ptr to i16* - store i16 -30062, i16* %wptr - %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 - store i8 25, i8* %bptr2 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - store i8 47, i8* %bptr3 - %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 - %wptrp = bitcast i8* %bptr1 to i16* - store i16 2020, i16* %wptrp, align 1 - ret void - -; CHECK-NOT: store i32 5, i32* %ptr -; CHECK-NOT: store i8 7, i8* %bptr -; CHECK: store i16 -30062, i16* %wptr -; CHECK-NOT: store i8 25, i8* %bptr2 -; CHECK: store i8 47, i8* %bptr3 -; CHECK: store i16 2020, i16* %wptrp, align 1 - -; CHECK: ret void -} - -define void @test2(i32 *%ptr) { -entry: -; CHECK-LABEL: @test2 - - store i32 5, i32* %ptr - - %bptr = bitcast i32* %ptr to i8* - %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 - %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 - %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - - %wptr = bitcast i8* %bptr to i16* - %wptrm1 = bitcast i8* %bptrm1 to i16* - %wptr1 = bitcast i8* %bptr1 to i16* - %wptr2 = bitcast i8* %bptr2 to i16* - %wptr3 = bitcast i8* %bptr3 to i16* - - store i16 1456, i16* %wptrm1, align 1 - store i16 1346, i16* %wptr, align 1 - store i16 1756, i16* %wptr1, align 1 - store i16 1126, i16* %wptr2, align 1 - store i16 5656, i16* %wptr3, align 1 - -; CHECK-NOT: store i32 5, i32* %ptr - -; CHECK: store i16 1456, i16* %wptrm1, align 1 -; CHECK: store i16 1346, i16* %wptr, align 1 -; CHECK: store i16 1756, i16* %wptr1, align 1 -; CHECK: store i16 1126, i16* %wptr2, align 1 -; CHECK: store i16 5656, i16* %wptr3, align 1 - - ret void - -; CHECK: ret void -} - -define signext i8 @test3(i32 *%ptr) { -entry: -; CHECK-LABEL: @test3 - - store i32 5, i32* %ptr - - %bptr = bitcast i32* %ptr to i8* - %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 - %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 - %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - - %wptr = bitcast i8* %bptr to i16* - %wptrm1 = bitcast i8* %bptrm1 to i16* - %wptr1 = bitcast i8* %bptr1 to i16* - %wptr2 = bitcast i8* %bptr2 to i16* - %wptr3 = bitcast i8* %bptr3 to i16* - - %v = load i8, i8* %bptr, align 1 - store i16 1456, i16* %wptrm1, align 1 - store i16 1346, i16* %wptr, align 1 - store i16 1756, i16* %wptr1, align 1 - store i16 1126, i16* %wptr2, align 1 - store i16 5656, i16* %wptr3, align 1 - -; CHECK: store i32 5, i32* %ptr - - ret i8 %v - -; CHECK: ret i8 %v -} - -%struct.foostruct = type { -i32 (i8*, i8**, i32, i8, i8*)*, -i32 (i8*, i8**, i32, i8, i8*)*, -i32 (i8*, i8**, i32, i8, i8*)*, -i32 (i8*, i8**, i32, i8, i8*)*, -void (i8*, i32, i32)* -} -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) -declare void @goFunc(%struct.foostruct*) -declare i32 @fa(i8*, i8**, i32, i8, i8*) - -define void @test4() { -entry: -; CHECK-LABEL: @test4 - - %bang = alloca %struct.foostruct, align 8 - %v1 = bitcast %struct.foostruct* %bang to i8* - call void @llvm.memset.p0i8.i64(i8* align 8 %v1, i8 0, i64 40, i1 false) - %v2 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 0 - store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v2, align 8 - %v3 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 1 - store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v3, align 8 - %v4 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 2 - store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v4, align 8 - %v5 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 3 - store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v5, align 8 - %v6 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 4 - store void (i8*, i32, i32)* null, void (i8*, i32, i32)** %v6, align 8 - call void @goFunc(%struct.foostruct* %bang) - ret void - -; CHECK-NOT: memset -; CHECK: ret void -} - -define signext i8 @test5(i32 *%ptr) { -entry: -; CHECK-LABEL: @test5 - - store i32 0, i32* %ptr - - %bptr = bitcast i32* %ptr to i8* - %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 - %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - - %wptr = bitcast i8* %bptr to i16* - %wptr1 = bitcast i8* %bptr1 to i16* - %wptr2 = bitcast i8* %bptr2 to i16* - - store i16 65535, i16* %wptr2, align 1 - store i16 1456, i16* %wptr1, align 1 - store i16 1346, i16* %wptr, align 1 - -; CHECK-NOT: store i32 0, i32* %ptr - - ret i8 0 -} - -define signext i8 @test6(i32 *%ptr) { -entry: -; CHECK-LABEL: @test6 - - store i32 0, i32* %ptr - - %bptr = bitcast i32* %ptr to i16* - %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0 - %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1 - - store i16 1456, i16* %bptr2, align 1 - store i16 65535, i16* %bptr1, align 1 - -; CHECK-NOT: store i32 0, i32* %ptr - - ret i8 0 -} - -define signext i8 @test7(i64 *%ptr) { -entry: -; CHECK-LABEL: @test7 - - store i64 0, i64* %ptr - - %bptr = bitcast i64* %ptr to i16* - %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0 - %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1 - %bptr3 = getelementptr inbounds i16, i16* %bptr, i64 2 - %bptr4 = getelementptr inbounds i16, i16* %bptr, i64 3 - - store i16 1346, i16* %bptr1, align 1 - store i16 1756, i16* %bptr3, align 1 - store i16 1456, i16* %bptr2, align 1 - store i16 5656, i16* %bptr4, align 1 - -; CHECK-NOT: store i64 0, i64* %ptr - - ret i8 0 -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -%t = type { i32 } - -@g = global i32 42 - -define void @test1(%t* noalias %pp) { - %p = getelementptr inbounds %t, %t* %pp, i32 0, i32 0 - - store i32 1, i32* %p; <-- This is dead - %x = load i32, i32* inttoptr (i32 12345 to i32*) - store i32 %x, i32* %p - ret void -; CHECK-LABEL: define void @test1( -; CHECK: store -; CHECK-NOT: store -; CHECK: ret void -} - -define void @test3() { - store i32 1, i32* @g; <-- This is dead. - store i32 42, i32* @g - ret void -; CHECK-LABEL: define void @test3( -; CHECK: store -; CHECK-NOT: store -; CHECK: ret void -} - -define void @test4(i32* %p) { - store i32 1, i32* %p - %x = load i32, i32* @g; <-- %p and @g could alias - store i32 %x, i32* %p - ret void -; CHECK-LABEL: define void @test4( -; CHECK: store -; CHECK: store -; CHECK: ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin10.0" - -@g80 = external global i8 ; [#uses=3] - -declare signext i8 @foo(i8 signext, i8 signext) nounwind readnone ssp - -declare i32 @func68(i32) nounwind readonly ssp - -; PR4815 -define void @test1(i32 %int32p54) noreturn nounwind ssp { -entry: - br label %bb - -bb: ; preds = %bb, %entry - %storemerge = phi i8 [ %2, %bb ], [ 1, %entry ] ; [#uses=1] - store i8 %storemerge, i8* @g80 - %0 = tail call i32 @func68(i32 1) nounwind ssp ; [#uses=1] - %1 = trunc i32 %0 to i8 ; [#uses=1] - store i8 %1, i8* @g80, align 1 - store i8 undef, i8* @g80, align 1 - %2 = tail call signext i8 @foo(i8 signext undef, i8 signext 1) nounwind ; [#uses=1] - br label %bb -} - -define fastcc i32 @test2() nounwind ssp { -bb14: ; preds = %bb4 - %0 = bitcast i8* undef to i8** ; [#uses=1] - %1 = getelementptr inbounds i8*, i8** %0, i64 undef ; [#uses=1] - %2 = bitcast i8** %1 to i16* ; [#uses=2] - %3 = getelementptr inbounds i16, i16* %2, i64 undef ; [#uses=1] - %4 = bitcast i16* %3 to i8* ; [#uses=1] - %5 = getelementptr inbounds i8, i8* %4, i64 undef ; [#uses=1] - %6 = getelementptr inbounds i16, i16* %2, i64 undef ; [#uses=1] - store i16 undef, i16* %6, align 2 - %7 = getelementptr inbounds i8, i8* %5, i64 undef ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i1 false) - unreachable -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind - - -; rdar://7635088 -define i32 @test3() { -entry: - ret i32 0 - -dead: - %P2 = getelementptr i32, i32 *%P2, i32 52 - %Q2 = getelementptr i32, i32 *%Q2, i32 52 - store i32 4, i32* %P2 - store i32 4, i32* %Q2 - br label %dead -} - - -; PR3141 -%struct.ada__tags__dispatch_table = type { [1 x i32] } -%struct.f393a00_1__object = type { %struct.ada__tags__dispatch_table*, i8 } -%struct.f393a00_2__windmill = type { %struct.f393a00_1__object, i16 } - -define void @test4(%struct.f393a00_2__windmill* %a, %struct.f393a00_2__windmill* %b) { -entry: - %t = alloca %struct.f393a00_2__windmill ; <%struct.f393a00_2__windmill*> [#uses=1] - %0 = getelementptr %struct.f393a00_2__windmill, %struct.f393a00_2__windmill* %t, i32 0, i32 0, i32 0 ; <%struct.ada__tags__dispatch_table**> [#uses=1] - %1 = load %struct.ada__tags__dispatch_table*, %struct.ada__tags__dispatch_table** null, align 4 ; <%struct.ada__tags__dispatch_table*> [#uses=1] - %2 = load %struct.ada__tags__dispatch_table*, %struct.ada__tags__dispatch_table** %0, align 8 ; <%struct.ada__tags__dispatch_table*> [#uses=1] - store %struct.ada__tags__dispatch_table* %2, %struct.ada__tags__dispatch_table** null, align 4 - store %struct.ada__tags__dispatch_table* %1, %struct.ada__tags__dispatch_table** null, align 4 - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%class.basic_string = type { %"class.__gnu_cxx::__versa_string" } -%"class.__gnu_cxx::__versa_string" = type { %"class.__gnu_cxx::__sso_string_base" } -%"class.__gnu_cxx::__sso_string_base" = type { %"struct.__gnu_cxx::__vstring_utility, std::allocator >::_Alloc_hider", i64, %union.anon } -%"struct.__gnu_cxx::__vstring_utility, std::allocator >::_Alloc_hider" = type { i8* } -%union.anon = type { i64, [8 x i8] } - -; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 - -; Function Attrs: noinline nounwind readonly uwtable -declare zeroext i1 @callee_takes_string(%class.basic_string* nonnull) #1 align 2 - -; Function Attrs: nounwind uwtable -define weak_odr zeroext i1 @test() #2 align 2 { - -; CHECK-LABEL: @test - -bb: - %tmp = alloca %class.basic_string, align 8 - %tmp1 = alloca %class.basic_string, align 8 - %tmp3 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 2 - %tmp4 = bitcast %union.anon* %tmp3 to i8* - %tmp5 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 0, i32 0 - %tmp6 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 1 - %tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 1 - %tmp8 = bitcast %class.basic_string* %tmp to i8* - %tmp9 = bitcast i64 0 to i64 - %tmp10 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 2 - %tmp11 = bitcast %union.anon* %tmp10 to i8* - %tmp12 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 0, i32 0 - %tmp13 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 1 - %tmp14 = getelementptr inbounds i8, i8* %tmp11, i64 1 - %tmp15 = bitcast %class.basic_string* %tmp1 to i8* - br label %_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit - -_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit: ; preds = %bb - store i8* %tmp4, i8** %tmp5, align 8 - store i8 62, i8* %tmp4, align 8 - store i64 1, i64* %tmp6, align 8 - store i8 0, i8* %tmp7, align 1 - %tmp16 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp) - br label %_ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3 - -_ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3: ; preds = %_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit - -; CHECK: _ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3: - -; The following can be read through the call %tmp17: - store i8* %tmp11, i8** %tmp12, align 8 - store i8 125, i8* %tmp11, align 8 - store i64 1, i64* %tmp13, align 8 - store i8 0, i8* %tmp14, align 1 - -; CHECK: store i8* %tmp11, i8** %tmp12, align 8 -; CHECK: store i8 125, i8* %tmp11, align 8 -; CHECK: store i64 1, i64* %tmp13, align 8 -; CHECK: store i8 0, i8* %tmp14, align 1 - - %tmp17 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp1) - call void @llvm.memset.p0i8.i64(i8* align 8 %tmp11, i8 -51, i64 16, i1 false) #0 - call void @llvm.memset.p0i8.i64(i8* align 8 %tmp15, i8 -51, i64 32, i1 false) #0 - call void @llvm.memset.p0i8.i64(i8* align 8 %tmp4, i8 -51, i64 16, i1 false) #0 - call void @llvm.memset.p0i8.i64(i8* align 8 %tmp8, i8 -51, i64 32, i1 false) #0 - ret i1 %tmp17 -} - -attributes #0 = { nounwind } -attributes #1 = { noinline nounwind readonly uwtable } -attributes #2 = { nounwind uwtable } - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: opt < %s -debugify -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -declare noalias i8* @malloc(i32) - -declare void @test_f() - -define i32* @test_salvage(i32 %arg) { -; Check that all four original local variables have their values preserved. -; CHECK-LABEL: @test_salvage( -; CHECK-NEXT: malloc -; CHECK-NEXT: @llvm.dbg.value(metadata i8* %p, metadata ![[p:.*]], metadata !DIExpression()) -; CHECK-NEXT: bitcast -; CHECK-NEXT: @llvm.dbg.value(metadata i32* %P, metadata ![[P:.*]], metadata !DIExpression()) -; CHECK-NEXT: @llvm.dbg.value(metadata i32 %arg, metadata ![[DEAD:.*]], metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)) -; CHECK-NEXT: call void @test_f() -; CHECK-NEXT: store i32 0, i32* %P - - %p = tail call i8* @malloc(i32 4) - %P = bitcast i8* %p to i32* - %DEAD = add i32 %arg, 1 - store i32 %DEAD, i32* %P - call void @test_f() - store i32 0, i32* %P - ret i32* %P -} - -; CHECK: ![[p]] = !DILocalVariable(name: "1" -; CHECK: ![[P]] = !DILocalVariable(name: "2" -; CHECK: ![[DEAD]] = !DILocalVariable(name: "3" Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt -dse -enable-dse-memoryssa=false -disable-output < %s -; test that we don't crash -declare void @bar() - -define void @foo() { -bb1: - %memtmp3.i = alloca [21 x i8], align 1 - %0 = getelementptr inbounds [21 x i8], [21 x i8]* %memtmp3.i, i64 0, i64 0 - br label %bb3 - -bb2: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) - br label %bb3 - -bb3: - call void @bar() - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) - br label %bb4 - -bb4: - ret void - -} - -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll +++ /dev/null @@ -1,96 +0,0 @@ -; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s - -; We conservative choose to prevent dead store elimination -; across release or stronger fences. It's not required -; (since the must still be a race on %addd.i), but -; it is conservatively correct. A legal optimization -; could hoist the second store above the fence, and then -; DSE one of them. -define void @test1(i32* %addr.i) { -; CHECK-LABEL: @test1 -; CHECK: store i32 5 -; CHECK: fence -; CHECK: store i32 5 -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - store i32 5, i32* %addr.i, align 4 - ret void -} - -; Same as previous, but with different values. If we ever optimize -; this more aggressively, this allows us to check that the correct -; store is retained (the 'i32 1' store in this case) -define void @test1b(i32* %addr.i) { -; CHECK-LABEL: @test1b -; CHECK: store i32 42 -; CHECK: fence release -; CHECK: store i32 1 -; CHECK: ret - store i32 42, i32* %addr.i, align 4 - fence release - store i32 1, i32* %addr.i, align 4 - ret void -} - -; We *could* DSE across this fence, but don't. No other thread can -; observe the order of the acquire fence and the store. -define void @test2(i32* %addr.i) { -; CHECK-LABEL: @test2 -; CHECK: store -; CHECK: fence -; CHECK: store -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence acquire - store i32 5, i32* %addr.i, align 4 - ret void -} - -; We DSE stack alloc'ed and byval locations, in the presence of fences. -; Fence does not make an otherwise thread local store visible. -; Right now the DSE in presence of fence is only done in end blocks (with no successors), -; but the same logic applies to other basic blocks as well. -; The store to %addr.i can be removed since it is a byval attribute -define void @test3(i32* byval(i32) %addr.i) { -; CHECK-LABEL: @test3 -; CHECK-NOT: store -; CHECK: fence -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - ret void -} - -declare void @foo(i8* nocapture %p) - -declare noalias i8* @malloc(i32) - -; DSE of stores in locations allocated through library calls. -define void @test_nocapture() { -; CHECK-LABEL: @test_nocapture -; CHECK: malloc -; CHECK: foo -; CHECK-NOT: store -; CHECK: fence - %m = call i8* @malloc(i32 24) - call void @foo(i8* %m) - store i8 4, i8* %m - fence release - ret void -} - - -; This is a full fence, but it does not make a thread local store visible. -; We can DSE the store in presence of the fence. -define void @fence_seq_cst() { -; CHECK-LABEL: @fence_seq_cst -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: ret void - %P1 = alloca i32 - store i32 0, i32* %P1, align 4 - fence seq_cst - store i32 4, i32* %P1, align 4 - ret void -} - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll +++ /dev/null @@ -1,70 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -target datalayout = "e-p:64:64:64" - -declare void @free(i8* nocapture) -declare noalias i8* @malloc(i64) - -; CHECK-LABEL: @test( -; CHECK-NEXT: bitcast -; CHECK-NEXT: @free -; CHECK-NEXT: ret void -define void @test(i32* %Q, i32* %P) { - %DEAD = load i32, i32* %Q ; [#uses=1] - store i32 %DEAD, i32* %P - %1 = bitcast i32* %P to i8* - tail call void @free(i8* %1) nounwind - ret void -} - -; CHECK-LABEL: @test2( -; CHECK-NEXT: bitcast -; CHECK-NEXT: @free -; CHECK-NEXT: ret void -define void @test2({i32, i32}* %P) { - %Q = getelementptr {i32, i32}, {i32, i32} *%P, i32 0, i32 1 - store i32 4, i32* %Q - %1 = bitcast {i32, i32}* %P to i8* - tail call void @free(i8* %1) nounwind - ret void -} - -; CHECK-LABEL: @test3( -; CHECK-NOT: store -; CHECK: ret void -define void @test3() { - %m = call i8* @malloc(i64 24) - store i8 0, i8* %m - %m1 = getelementptr i8, i8* %m, i64 1 - store i8 1, i8* %m1 - call void @free(i8* %m) nounwind - ret void -} - -; PR11240 -; CHECK-LABEL: @test4( -; CHECK-NOT: store -; CHECK: ret void -define void @test4(i1 %x) nounwind { -entry: - %alloc1 = tail call noalias i8* @malloc(i64 4) nounwind - br i1 %x, label %skipinit1, label %init1 - -init1: - store i8 1, i8* %alloc1 - br label %skipinit1 - -skipinit1: - tail call void @free(i8* %alloc1) nounwind - ret void -} - -; CHECK-LABEL: @test5( -define void @test5() { - br label %bb - -bb: - tail call void @free(i8* undef) nounwind - br label %bb -} - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll +++ /dev/null @@ -1,261 +0,0 @@ -; RUN: opt -S -dse -enable-dse-memoryssa=false < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; If there are two stores to the same location, DSE should be able to remove -; the first store if the two stores are separated by no more than 98 -; instructions. The existence of debug intrinsics between the stores should -; not affect this instruction limit. - -@x = global i32 0, align 4 - -; Function Attrs: nounwind -define i32 @test_within_limit() !dbg !4 { -entry: - ; The first store; later there is a second store to the same location, - ; so this store should be optimized away by DSE. - ; CHECK-NOT: store i32 1, i32* @x, align 4 - store i32 1, i32* @x, align 4 - - ; Insert 98 dummy instructions between the two stores - %0 = bitcast i32 0 to i32 - %1 = bitcast i32 0 to i32 - %2 = bitcast i32 0 to i32 - %3 = bitcast i32 0 to i32 - %4 = bitcast i32 0 to i32 - %5 = bitcast i32 0 to i32 - %6 = bitcast i32 0 to i32 - %7 = bitcast i32 0 to i32 - %8 = bitcast i32 0 to i32 - %9 = bitcast i32 0 to i32 - %10 = bitcast i32 0 to i32 - %11 = bitcast i32 0 to i32 - %12 = bitcast i32 0 to i32 - %13 = bitcast i32 0 to i32 - %14 = bitcast i32 0 to i32 - %15 = bitcast i32 0 to i32 - %16 = bitcast i32 0 to i32 - %17 = bitcast i32 0 to i32 - %18 = bitcast i32 0 to i32 - %19 = bitcast i32 0 to i32 - %20 = bitcast i32 0 to i32 - %21 = bitcast i32 0 to i32 - %22 = bitcast i32 0 to i32 - %23 = bitcast i32 0 to i32 - %24 = bitcast i32 0 to i32 - %25 = bitcast i32 0 to i32 - %26 = bitcast i32 0 to i32 - %27 = bitcast i32 0 to i32 - %28 = bitcast i32 0 to i32 - %29 = bitcast i32 0 to i32 - %30 = bitcast i32 0 to i32 - %31 = bitcast i32 0 to i32 - %32 = bitcast i32 0 to i32 - %33 = bitcast i32 0 to i32 - %34 = bitcast i32 0 to i32 - %35 = bitcast i32 0 to i32 - %36 = bitcast i32 0 to i32 - %37 = bitcast i32 0 to i32 - %38 = bitcast i32 0 to i32 - %39 = bitcast i32 0 to i32 - %40 = bitcast i32 0 to i32 - %41 = bitcast i32 0 to i32 - %42 = bitcast i32 0 to i32 - %43 = bitcast i32 0 to i32 - %44 = bitcast i32 0 to i32 - %45 = bitcast i32 0 to i32 - %46 = bitcast i32 0 to i32 - %47 = bitcast i32 0 to i32 - %48 = bitcast i32 0 to i32 - %49 = bitcast i32 0 to i32 - %50 = bitcast i32 0 to i32 - %51 = bitcast i32 0 to i32 - %52 = bitcast i32 0 to i32 - %53 = bitcast i32 0 to i32 - %54 = bitcast i32 0 to i32 - %55 = bitcast i32 0 to i32 - %56 = bitcast i32 0 to i32 - %57 = bitcast i32 0 to i32 - %58 = bitcast i32 0 to i32 - %59 = bitcast i32 0 to i32 - %60 = bitcast i32 0 to i32 - %61 = bitcast i32 0 to i32 - %62 = bitcast i32 0 to i32 - %63 = bitcast i32 0 to i32 - %64 = bitcast i32 0 to i32 - %65 = bitcast i32 0 to i32 - %66 = bitcast i32 0 to i32 - %67 = bitcast i32 0 to i32 - %68 = bitcast i32 0 to i32 - %69 = bitcast i32 0 to i32 - %70 = bitcast i32 0 to i32 - %71 = bitcast i32 0 to i32 - %72 = bitcast i32 0 to i32 - %73 = bitcast i32 0 to i32 - %74 = bitcast i32 0 to i32 - %75 = bitcast i32 0 to i32 - %76 = bitcast i32 0 to i32 - %77 = bitcast i32 0 to i32 - %78 = bitcast i32 0 to i32 - %79 = bitcast i32 0 to i32 - %80 = bitcast i32 0 to i32 - %81 = bitcast i32 0 to i32 - %82 = bitcast i32 0 to i32 - %83 = bitcast i32 0 to i32 - %84 = bitcast i32 0 to i32 - %85 = bitcast i32 0 to i32 - %86 = bitcast i32 0 to i32 - %87 = bitcast i32 0 to i32 - %88 = bitcast i32 0 to i32 - %89 = bitcast i32 0 to i32 - %90 = bitcast i32 0 to i32 - %91 = bitcast i32 0 to i32 - %92 = bitcast i32 0 to i32 - %93 = bitcast i32 0 to i32 - %94 = bitcast i32 0 to i32 - %95 = bitcast i32 0 to i32 - %96 = bitcast i32 0 to i32 - %97 = bitcast i32 0 to i32 - - ; Insert a meaningless dbg.value intrinsic; it should have no - ; effect on the working of DSE in any way. - call void @llvm.dbg.value(metadata i32 undef, metadata !10, metadata !DIExpression()), !dbg !DILocation(scope: !4) - - ; CHECK: store i32 -1, i32* @x, align 4 - store i32 -1, i32* @x, align 4 - ret i32 0 -} - -; Function Attrs: nounwind -define i32 @test_outside_limit() { -entry: - ; The first store; later there is a second store to the same location - ; CHECK: store i32 1, i32* @x, align 4 - store i32 1, i32* @x, align 4 - - ; Insert 99 dummy instructions between the two stores; this is - ; one too many instruction for the DSE to take place. - %0 = bitcast i32 0 to i32 - %1 = bitcast i32 0 to i32 - %2 = bitcast i32 0 to i32 - %3 = bitcast i32 0 to i32 - %4 = bitcast i32 0 to i32 - %5 = bitcast i32 0 to i32 - %6 = bitcast i32 0 to i32 - %7 = bitcast i32 0 to i32 - %8 = bitcast i32 0 to i32 - %9 = bitcast i32 0 to i32 - %10 = bitcast i32 0 to i32 - %11 = bitcast i32 0 to i32 - %12 = bitcast i32 0 to i32 - %13 = bitcast i32 0 to i32 - %14 = bitcast i32 0 to i32 - %15 = bitcast i32 0 to i32 - %16 = bitcast i32 0 to i32 - %17 = bitcast i32 0 to i32 - %18 = bitcast i32 0 to i32 - %19 = bitcast i32 0 to i32 - %20 = bitcast i32 0 to i32 - %21 = bitcast i32 0 to i32 - %22 = bitcast i32 0 to i32 - %23 = bitcast i32 0 to i32 - %24 = bitcast i32 0 to i32 - %25 = bitcast i32 0 to i32 - %26 = bitcast i32 0 to i32 - %27 = bitcast i32 0 to i32 - %28 = bitcast i32 0 to i32 - %29 = bitcast i32 0 to i32 - %30 = bitcast i32 0 to i32 - %31 = bitcast i32 0 to i32 - %32 = bitcast i32 0 to i32 - %33 = bitcast i32 0 to i32 - %34 = bitcast i32 0 to i32 - %35 = bitcast i32 0 to i32 - %36 = bitcast i32 0 to i32 - %37 = bitcast i32 0 to i32 - %38 = bitcast i32 0 to i32 - %39 = bitcast i32 0 to i32 - %40 = bitcast i32 0 to i32 - %41 = bitcast i32 0 to i32 - %42 = bitcast i32 0 to i32 - %43 = bitcast i32 0 to i32 - %44 = bitcast i32 0 to i32 - %45 = bitcast i32 0 to i32 - %46 = bitcast i32 0 to i32 - %47 = bitcast i32 0 to i32 - %48 = bitcast i32 0 to i32 - %49 = bitcast i32 0 to i32 - %50 = bitcast i32 0 to i32 - %51 = bitcast i32 0 to i32 - %52 = bitcast i32 0 to i32 - %53 = bitcast i32 0 to i32 - %54 = bitcast i32 0 to i32 - %55 = bitcast i32 0 to i32 - %56 = bitcast i32 0 to i32 - %57 = bitcast i32 0 to i32 - %58 = bitcast i32 0 to i32 - %59 = bitcast i32 0 to i32 - %60 = bitcast i32 0 to i32 - %61 = bitcast i32 0 to i32 - %62 = bitcast i32 0 to i32 - %63 = bitcast i32 0 to i32 - %64 = bitcast i32 0 to i32 - %65 = bitcast i32 0 to i32 - %66 = bitcast i32 0 to i32 - %67 = bitcast i32 0 to i32 - %68 = bitcast i32 0 to i32 - %69 = bitcast i32 0 to i32 - %70 = bitcast i32 0 to i32 - %71 = bitcast i32 0 to i32 - %72 = bitcast i32 0 to i32 - %73 = bitcast i32 0 to i32 - %74 = bitcast i32 0 to i32 - %75 = bitcast i32 0 to i32 - %76 = bitcast i32 0 to i32 - %77 = bitcast i32 0 to i32 - %78 = bitcast i32 0 to i32 - %79 = bitcast i32 0 to i32 - %80 = bitcast i32 0 to i32 - %81 = bitcast i32 0 to i32 - %82 = bitcast i32 0 to i32 - %83 = bitcast i32 0 to i32 - %84 = bitcast i32 0 to i32 - %85 = bitcast i32 0 to i32 - %86 = bitcast i32 0 to i32 - %87 = bitcast i32 0 to i32 - %88 = bitcast i32 0 to i32 - %89 = bitcast i32 0 to i32 - %90 = bitcast i32 0 to i32 - %91 = bitcast i32 0 to i32 - %92 = bitcast i32 0 to i32 - %93 = bitcast i32 0 to i32 - %94 = bitcast i32 0 to i32 - %95 = bitcast i32 0 to i32 - %96 = bitcast i32 0 to i32 - %97 = bitcast i32 0 to i32 - %98 = bitcast i32 0 to i32 - - ; CHECK: store i32 -1, i32* @x, align 4 - store i32 -1, i32* @x, align 4 - ret i32 0 -} - -; Function Attrs: nounwind readnone -declare void @llvm.dbg.value(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!11, !13} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) -!1 = !DIFile(filename: "test.c", directory: "/home/tmp") -!2 = !{} -!4 = distinct !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, retainedNodes: !2) -!5 = !DIFile(filename: "test.c", directory: "/home/tmp") -!6 = !DISubroutineType(types: !7) -!7 = !{!8} -!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!9 = !{!10} -!10 = !DILocalVariable(name: "x", scope: !4, type: !8) -!11 = !{i32 2, !"Dwarf Version", i32 4} -!12 = !{i32* undef} - -!13 = !{i32 1, !"Debug Info Version", i32 3} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt -S < %s -dse -enable-dse-memoryssa=false | FileCheck %s - -declare void @llvm.sideeffect() - -; Dead store elimination across a @llvm.sideeffect. - -; CHECK-LABEL: dse -; CHECK: store -; CHECK-NOT: store -define void @dse(float* %p) { - store float 0.0, float* %p - call void @llvm.sideeffect() - store float 0.0, float* %p - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll +++ /dev/null @@ -1,34 +0,0 @@ -; Test to make sure llvm.invariant.start calls are not treated as clobbers. -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly - -; We cannot remove the store 1 to %p. -; FIXME: By the semantics of invariant.start, the store 3 to p is unreachable. -define void @test(i8 *%p) { - store i8 1, i8* %p, align 4 - %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p) - store i8 3, i8* %p, align 4 - ret void -; CHECK-LABEL: @test( -; CHECK-NEXT: store i8 1, i8* %p, align 4 -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p) -; CHECK-NEXT: store i8 3, i8* %p, align 4 -; CHECK-NEXT: ret void -} - -; FIXME: We should be able to remove the first store to p, even though p and q -; may alias. -define void @test2(i8* %p, i8* %q) { - store i8 1, i8* %p, align 4 - store i8 2, i8* %q, align 4 - %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q) - store i8 3, i8* %p, align 4 - ret void -; CHECK-LABEL: @test2( -; CHECK-NEXT: store i8 1, i8* %p, align 4 -; CHECK-NEXT: store i8 2, i8* %q, align 4 -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q) -; CHECK-NEXT: store i8 3, i8* %p, align 4 -; CHECK-NEXT: ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll +++ /dev/null @@ -1,65 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -; CHECK-LABEL: void @skipBarrier(i8* %ptr) -define void @skipBarrier(i8* %ptr) { -; CHECK-NOT: store i8 42 - store i8 42, i8* %ptr -; CHECK: %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) - %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) -; CHECK: store i8 43 - store i8 43, i8* %ptr2 - ret void -} - -; CHECK-LABEL: void @skip2Barriers(i8* %ptr) -define void @skip2Barriers(i8* %ptr) { -; CHECK-NOT: store i8 42 - store i8 42, i8* %ptr -; CHECK: %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) - %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) -; CHECK-NOT: store i8 43 - store i8 43, i8* %ptr2 - %ptr3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr2) - %ptr4 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr3) - -; CHECK: store i8 44 - store i8 44, i8* %ptr4 - ret void -} - -; CHECK-LABEL: void @skip3Barriers(i8* %ptr) -define void @skip3Barriers(i8* %ptr) { -; CHECK-NOT: store i8 42 - store i8 42, i8* %ptr -; CHECK: %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) - %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) -; CHECK-NOT: store i8 43 - store i8 43, i8* %ptr2 - %ptr3 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr2) - %ptr4 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr3) - -; CHECK: store i8 44 - store i8 44, i8* %ptr4 - ret void -} - -; CHECK-LABEL: void @skip4Barriers(i8* %ptr) -define void @skip4Barriers(i8* %ptr) { -; CHECK-NOT: store i8 42 - store i8 42, i8* %ptr -; CHECK: %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) - %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) -; CHECK-NOT: store i8 43 - store i8 43, i8* %ptr2 - %ptr3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr2) - %ptr4 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr3) - %ptr5 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr3) - -; CHECK: store i8 44 - store i8 44, i8* %ptr5 - ret void -} - - -declare i8* @llvm.launder.invariant.group.p0i8(i8*) -declare i8* @llvm.strip.invariant.group.p0i8(i8*) Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s - -target triple = "x86_64-unknown-linux-gnu" - -declare i8* @strcpy(i8* %dest, i8* %src) nounwind -define void @test1(i8* %src) { -; CHECK-LABEL: @test1( - %B = alloca [16 x i8] - %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0 -; CHECK-NOT: @strcpy - %call = call i8* @strcpy(i8* %dest, i8* %src) -; CHECK: ret void - ret void -} - -declare i8* @strncpy(i8* %dest, i8* %src, i64 %n) nounwind -define void @test2(i8* %src) { -; CHECK-LABEL: @test2( - %B = alloca [16 x i8] - %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0 -; CHECK-NOT: @strncpy - %call = call i8* @strncpy(i8* %dest, i8* %src, i64 12) -; CHECK: ret void - ret void -} - -declare i8* @strcat(i8* %dest, i8* %src) nounwind -define void @test3(i8* %src) { -; CHECK-LABEL: @test3( - %B = alloca [16 x i8] - %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0 -; CHECK-NOT: @strcat - %call = call i8* @strcat(i8* %dest, i8* %src) -; CHECK: ret void - ret void -} - -declare i8* @strncat(i8* %dest, i8* %src, i64 %n) nounwind -define void @test4(i8* %src) { -; CHECK-LABEL: @test4( - %B = alloca [16 x i8] - %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0 -; CHECK-NOT: @strncat - %call = call i8* @strncat(i8* %dest, i8* %src, i64 12) -; CHECK: ret void - ret void -} - -define void @test5(i8* nocapture %src) { -; CHECK-LABEL: @test5( - %dest = alloca [100 x i8], align 16 - %arraydecay = getelementptr inbounds [100 x i8], [100 x i8]* %dest, i64 0, i64 0 - %call = call i8* @strcpy(i8* %arraydecay, i8* %src) -; CHECK: %call = call i8* @strcpy - %arrayidx = getelementptr inbounds i8, i8* %call, i64 10 - store i8 97, i8* %arrayidx, align 1 - ret void -} - -declare void @user(i8* %p) -define void @test6(i8* %src) { -; CHECK-LABEL: @test6( - %B = alloca [16 x i8] - %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0 -; CHECK: @strcpy - %call = call i8* @strcpy(i8* %dest, i8* %src) -; CHECK: @user - call void @user(i8* %dest) -; CHECK: ret void - ret void -} - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s - -target triple = "x86_64-unknown-linux-gnu" - -declare i8* @strncpy(i8* %dest, i8* %src, i32 %n) nounwind -define void @test2(i8* %src) { -; CHECK-LABEL: @test2( - %B = alloca [16 x i8] - %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0 -; CHECK: @strncpy - %call = call i8* @strncpy(i8* %dest, i8* %src, i32 12) -; CHECK: ret void - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s - -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind -declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind - -define void @test1() { -; CHECK-LABEL: @test1( - %A = alloca i8 - - store i8 0, i8* %A ;; Written to by memset - call void @llvm.lifetime.end.p0i8(i64 1, i8* %A) -; CHECK-NOT: lifetime.end - - call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false) -; CHECK-NOT: memset - - ret void -; CHECK: ret void -} - -define void @test2(i32* %P) { -; CHECK: test2 - %Q = getelementptr i32, i32* %P, i32 1 - %R = bitcast i32* %Q to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %R) - store i32 0, i32* %Q ;; This store is dead. -; CHECK-NOT: store - call void @llvm.lifetime.end.p0i8(i64 4, i8* %R) - ret void -} - - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: opt -S -dse -enable-dse-memoryssa=false -memdep-block-scan-limit=3 < %s | FileCheck %s -; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa=false -memdep-block-scan-limit=3 < %s | FileCheck %s - -; Test case to check that the memory dependency analysis gets the same -; result even if we have a dbg value between the memcpy and -; store. The memory dependency is then used by DSE to remove the store. - -; We use -memdep-block-scan-limit=3 to be able to create a small test case. -; Without it, we would need to squeeze in 100 instructions since the default -; limit is 100. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@g = common global [1 x i8] zeroinitializer, align 1, !dbg !0 - -; Function Attrs: noinline nounwind uwtable -define void @foo() #0 !dbg !14 { -entry: - %i = alloca i8, align 1 - store i8 1, i8* %i, align 1, !dbg !19 - call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 - %0 = bitcast [1 x i8]* @g to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %i, i8* %0, i64 1, i1 false), !dbg !20 - br label %bb2 - -bb2: ; preds = %0 - ret void, !dbg !21 -} - -; Function Attrs: nounwind readnone speculatable -declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 - -; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #2 - -attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone speculatable } -attributes #2 = { argmemonly nounwind } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!10, !11, !12} -!llvm.ident = !{!13} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 6.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) -!3 = !DIFile(filename: "foo.c", directory: "/bar") -!4 = !{} -!5 = !{!0} -!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 8, elements: !8) -!7 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!8 = !{!9} -!9 = !DISubrange(count: 1) -!10 = !{i32 2, !"Dwarf Version", i32 4} -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{i32 1, !"wchar_size", i32 4} -!13 = !{!"clang version 6.0.0"} -!14 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 5, type: !15, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, unit: !2, retainedNodes: !4) -!15 = !DISubroutineType(types: !16) -!16 = !{null} -!17 = !DILocalVariable(name: "i", scope: !14, file: !3, line: 7, type: !7) -!18 = !DILocation(line: 7, column: 10, scope: !14) -!19 = !DILocation(line: 8, column: 7, scope: !14) -!20 = !DILocation(line: 9, column: 5, scope: !14) -!21 = !DILocation(line: 10, column: 1, scope: !14) - -; Check that the store is removed and that the memcpy is still there -; CHECK-LABEL: foo -; CHECK-NOT: store i8 -; CHECK: call void @llvm.memcpy -; CHECK: ret void Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll +++ /dev/null @@ -1,95 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse -enable-dse-memoryssa=false < %s | FileCheck %s - -declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind -declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind -declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind - -define void @test1() { -; CHECK-LABEL: @test1( -; CHECK-NEXT: ret void -; - %A = alloca i8 - %B = alloca i8 - - store i8 0, i8* %A ;; Written to by memcpy - - call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false) - - ret void -} - -define void @test2() { -; CHECK-LABEL: @test2( -; CHECK-NEXT: ret void -; - %A = alloca i8 - %B = alloca i8 - - store i8 0, i8* %A ;; Written to by memmove - - call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false) - - ret void -} - -define void @test3() { -; CHECK-LABEL: @test3( -; CHECK-NEXT: ret void -; - %A = alloca i8 - - store i8 0, i8* %A ;; Written to by memset - - call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false) - - ret void -} - -declare void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* nocapture, i16* nocapture, i16, i32) nounwind -declare void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* nocapture, i16* nocapture, i16, i32) nounwind -declare void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* nocapture, i8, i16, i32) nounwind - - -define void @test4() { -; CHECK-LABEL: @test4( -; CHECK-NEXT: ret void -; - %A = alloca i16, i16 1024, align 2 - %B = alloca i16, i16 1024, align 2 - - store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memcpy - store atomic i16 0, i16* %B unordered, align 2 ;; Read by memcpy - - call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 %A, i16* align 2 %B, i16 1024, i32 2) - - ret void -} - -define void @test5() { -; CHECK-LABEL: @test5( -; CHECK-NEXT: ret void -; - %A = alloca i16, i16 1024, align 2 - %B = alloca i16, i16 1024, align 2 - - store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memmove - store atomic i16 0, i16* %B unordered, align 2 ;; Read by memmove - - call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 %A, i16* align 2 %B, i16 1024, i32 2) - - ret void -} - -define void @test6() { -; CHECK-LABEL: @test6( -; CHECK-NEXT: ret void -; - %A = alloca i16, i16 1024, align 2 - - store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memset - - call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 %A, i8 0, i16 1024, i32 2) - - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll +++ /dev/null @@ -1,89 +0,0 @@ -; Test that the getelementptr generated when the dse pass determines that -; a memset can be shortened has the debugloc carried over from the memset. - -; RUN: opt -S -march=native -dse -enable-dse-memoryssa=false < %s| FileCheck %s -; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg -; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]] -; CHECK: ![[DBG]] = !DILocation(line: 2, - -; The test IR is generated by running: -; -; clang Debugify_Dead_Store_Elimination.cpp -Wno-c++11-narrowing -S \ -; -emit-llvm -O0 -w -Xclang -disable-O0-optnone -march=native -fdeclspec \ -; --target=x86_64-gnu-linux-unknown -Werror=unreachable-code -o - -; -; Where Debugify_Dead_Store_Elimination.cpp contains: -; -; int a() { -; long b[]{2, 2, 2, 2, 0}; -; if (a()) -; ; -; } - - -define dso_local i32 @_Z1av() !dbg !7 { -entry: - %retval = alloca i32, align 4 - %b = alloca [5 x i64], align 16 - call void @llvm.dbg.declare(metadata [5 x i64]* %b, metadata !11, metadata !DIExpression()), !dbg !16 - %0 = bitcast [5 x i64]* %b to i8*, !dbg !16 - call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 40, i1 false), !dbg !16 - %1 = bitcast i8* %0 to [5 x i64]*, !dbg !16 - %2 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 0, !dbg !16 - store i64 2, i64* %2, align 16, !dbg !16 - %3 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 1, !dbg !16 - store i64 2, i64* %3, align 8, !dbg !16 - %4 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 2, !dbg !16 - store i64 2, i64* %4, align 16, !dbg !16 - %5 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 3, !dbg !16 - store i64 2, i64* %5, align 8, !dbg !16 - %call = call i32 @_Z1av(), !dbg !17 - %tobool = icmp ne i32 %call, 0, !dbg !17 - br i1 %tobool, label %if.then, label %if.end, !dbg !19 - -if.then: ; preds = %entry - br label %if.end, !dbg !19 - -if.end: ; preds = %if.then, %entry - call void @llvm.trap(), !dbg !20 - unreachable, !dbg !20 - -return: ; No predecessors! - %6 = load i32, i32* %retval, align 4, !dbg !21 - ret i32 %6, !dbg !21 -} - -; Function Attrs: nounwind readnone speculatable -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) - -; Function Attrs: cold noreturn nounwind -declare void @llvm.trap() - -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git eb1a156d7f7ba56ea8f9a26da36e6a93d1e98bda)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -!1 = !DIFile(filename: "Debugify_Dead_Store_Elimination.cpp", directory: "") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git eb1a156d7f7ba56ea8f9a26da36e6a93d1e98bda)"} -!7 = distinct !DISubprogram(name: "a", linkageName: "_Z1av", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{!10} -!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!11 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 2, type: !12) -!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 320, elements: !14) -!13 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) -!14 = !{!15} -!15 = !DISubrange(count: 5) -!16 = !DILocation(line: 2, column: 8, scope: !7) -!17 = !DILocation(line: 3, column: 7, scope: !18) -!18 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 7) -!19 = !DILocation(line: 3, column: 7, scope: !7) -!20 = !DILocation(line: 3, column: 9, scope: !18) -!21 = !DILocation(line: 5, column: 1, scope: !7) Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll +++ /dev/null @@ -1,172 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging -S < %s | FileCheck %s -target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" - -define void @byte_by_byte_replacement(i32 *%ptr) { -; CHECK-LABEL: @byte_by_byte_replacement( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 151653132, i32* [[PTR:%.*]] -; CHECK-NEXT: ret void -; -entry: - ;; This store's value should be modified as it should be better to use one - ;; larger store than several smaller ones. - ;; store will turn into 0x090A0B0C == 151653132 - store i32 305419896, i32* %ptr ; 0x12345678 - %bptr = bitcast i32* %ptr to i8* - %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 - %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - - ;; We should be able to merge these four stores with the i32 above - ; value (and bytes) stored before ; 0x12345678 - store i8 9, i8* %bptr ; 09 - store i8 10, i8* %bptr1 ; 0A - store i8 11, i8* %bptr2 ; 0B - store i8 12, i8* %bptr3 ; 0C - ; 0x090A0B0C - - ret void -} - -define void @word_replacement(i64 *%ptr) { -; CHECK-LABEL: @word_replacement( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 72638273700655232, i64* [[PTR:%.*]] -; CHECK-NEXT: ret void -; -entry: - store i64 72623859790382856, i64* %ptr ; 0x0102030405060708 - - %wptr = bitcast i64* %ptr to i16* - %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 - - ;; We should be able to merge these two stores with the i64 one above - ; value (and bytes) stored before ; 0x0102030405060708 - store i16 4128, i16* %wptr1 ; 1020 - store i16 28800, i16* %wptr3 ; 7080 - ; 0x0102102005067080 - - ret void -} - - -define void @differently_sized_replacements(i64 *%ptr) { -; CHECK-LABEL: @differently_sized_replacements( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 289077004501059343, i64* [[PTR:%.*]] -; CHECK-NEXT: ret void -; -entry: - store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f - - %bptr = bitcast i64* %ptr to i8* - %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6 - %wptr = bitcast i64* %ptr to i16* - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 - %dptr = bitcast i64* %ptr to i32* - - ;; We should be able to merge all these stores with the i64 one above - ; value (and bytes) stored before ; 0x08090a0b0c0d0e0f - store i8 7, i8* %bptr6 ; 07 - store i16 1541, i16* %wptr2 ; 0605 - store i32 67305985, i32* %dptr ; 04030201 - ; 0x040302010605070f - ret void -} - - -define void @multiple_replacements_to_same_byte(i64 *%ptr) { -; CHECK-LABEL: @multiple_replacements_to_same_byte( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 289077004602248719, i64* [[PTR:%.*]] -; CHECK-NEXT: ret void -; -entry: - store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f - - %bptr = bitcast i64* %ptr to i8* - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - %wptr = bitcast i64* %ptr to i16* - %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %dptr = bitcast i64* %ptr to i32* - - ;; We should be able to merge all these stores with the i64 one above - ; value (and bytes) stored before ; 0x08090a0b0c0d0e0f - store i8 7, i8* %bptr3 ; 07 - store i16 1541, i16* %wptr1 ; 0605 - store i32 67305985, i32* %dptr ; 04030201 - ; 0x040302010c0d0e0f - ret void -} - -define void @merged_merges(i64 *%ptr) { -; CHECK-LABEL: @merged_merges( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 289081428418563599, i64* [[PTR:%.*]] -; CHECK-NEXT: ret void -; -entry: - store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f - - %bptr = bitcast i64* %ptr to i8* - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - %wptr = bitcast i64* %ptr to i16* - %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %dptr = bitcast i64* %ptr to i32* - - ;; We should be able to merge all these stores with the i64 one above - ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f - store i32 67305985, i32* %dptr ; 04030201 - store i16 1541, i16* %wptr1 ; 0605 - store i8 7, i8* %bptr3 ; 07 - ; 0x040306070c0d0e0f - ret void -} - -define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) { -; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8* -; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1 -; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 -; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32* -; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64* -; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1 -; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1 -; CHECK-NEXT: ret i8 0 -; -entry: - - store i64 0, i64* %ptr - - %bptr = bitcast i64* %ptr to i8* - %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - %dptr = bitcast i8* %bptrm1 to i32* - %qptr = bitcast i8* %bptr3 to i64* - - store i32 1234, i32* %dptr, align 1 - store i64 5678, i64* %qptr, align 1 - - ret i8 0 -} - -;; Test case from PR31777 -%union.U = type { i64 } - -define void @foo(%union.U* nocapture %u) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0 -; CHECK-NEXT: store i64 11821949021847552, i64* [[I]], align 8 -; CHECK-NEXT: ret void -; -entry: - %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0 - store i64 0, i64* %i, align 8 - %s = bitcast %union.U* %u to i16* - store i16 42, i16* %s, align 8 - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll +++ /dev/null @@ -1,236 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" - -define void @byte_by_byte_replacement(i32 *%ptr) { -; CHECK-LABEL: @byte_by_byte_replacement( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 202050057, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: ret void -; -entry: - ;; This store's value should be modified as it should be better to use one - ;; larger store than several smaller ones. - ;; store will turn into 0x0C0B0A09 == 202050057 - store i32 305419896, i32* %ptr ; 0x12345678 - %bptr = bitcast i32* %ptr to i8* - %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 - %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - - ;; We should be able to merge these four stores with the i32 above - ; value (and bytes) stored before ; 0x12345678 - store i8 9, i8* %bptr ; 09 - store i8 10, i8* %bptr1 ; 0A - store i8 11, i8* %bptr2 ; 0B - store i8 12, i8* %bptr3 ; 0C - ; 0x0C0B0A09 - ret void -} - -define void @word_replacement(i64 *%ptr) { -; CHECK-LABEL: @word_replacement( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 8106482645252179720, i64* [[PTR:%.*]], align 8 -; CHECK-NEXT: ret void -; -entry: - store i64 72623859790382856, i64* %ptr ; 0x0102030405060708 - - %wptr = bitcast i64* %ptr to i16* - %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 - - ;; We should be able to merge these two stores with the i64 one above - ; value (not bytes) stored before ; 0x0102030405060708 - store i16 4128, i16* %wptr1 ; 1020 - store i16 28800, i16* %wptr3 ; 7080 - ; 0x7080030410200708 - ret void -} - - -define void @differently_sized_replacements(i64 *%ptr) { -; CHECK-LABEL: @differently_sized_replacements( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 578437695752307201, i64* [[PTR:%.*]], align 8 -; CHECK-NEXT: ret void -; -entry: - store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f - - %bptr = bitcast i64* %ptr to i8* - %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6 - %wptr = bitcast i64* %ptr to i16* - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 - %dptr = bitcast i64* %ptr to i32* - - ;; We should be able to merge all these stores with the i64 one above - ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f - store i8 7, i8* %bptr6 ; 07 - store i16 1541, i16* %wptr2 ; 0605 - store i32 67305985, i32* %dptr ; 04030201 - ; 0x0807060504030201 - ret void -} - - -define void @multiple_replacements_to_same_byte(i64 *%ptr) { -; CHECK-LABEL: @multiple_replacements_to_same_byte( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 579005069522043393, i64* [[PTR:%.*]], align 8 -; CHECK-NEXT: ret void -; -entry: - store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f - - %bptr = bitcast i64* %ptr to i8* - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - %wptr = bitcast i64* %ptr to i16* - %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %dptr = bitcast i64* %ptr to i32* - - ;; We should be able to merge all these stores with the i64 one above - ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f - store i8 7, i8* %bptr3 ; 07 - store i16 1541, i16* %wptr1 ; 0605 - store i32 67305985, i32* %dptr ; 04030201 - ; 0x08090a0b04030201 - ret void -} - -define void @merged_merges(i64 *%ptr) { -; CHECK-LABEL: @merged_merges( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 579005069572506113, i64* [[PTR:%.*]], align 8 -; CHECK-NEXT: ret void -; -entry: - store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f - - %bptr = bitcast i64* %ptr to i8* - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - %wptr = bitcast i64* %ptr to i16* - %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %dptr = bitcast i64* %ptr to i32* - - ;; We should be able to merge all these stores with the i64 one above - ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f - store i32 67305985, i32* %dptr ; 04030201 - store i16 1541, i16* %wptr1 ; 0605 - store i8 7, i8* %bptr3 ; 07 - ; 0x08090a0b07050201 - ret void -} - -define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) { -; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8* -; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1 -; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 -; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32* -; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64* -; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1 -; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1 -; CHECK-NEXT: ret i8 0 -; -entry: - - ; Also check that alias.scope metadata doesn't get dropped - store i64 0, i64* %ptr, !alias.scope !32 - - %bptr = bitcast i64* %ptr to i8* - %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 - %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 - %dptr = bitcast i8* %bptrm1 to i32* - %qptr = bitcast i8* %bptr3 to i64* - - store i32 1234, i32* %dptr, align 1 - store i64 5678, i64* %qptr, align 1 - - ret i8 0 -} - -;; Test case from PR31777 -%union.U = type { i64 } - -define void @foo(%union.U* nocapture %u) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0 -; CHECK-NEXT: store i64 42, i64* [[I]], align 8, !tbaa !0, !noalias !3, !nontemporal !4 -; CHECK-NEXT: ret void -; -entry: - %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0 - store i64 0, i64* %i, align 8, !dbg !22, !tbaa !26, !noalias !30, !nontemporal !29 - %s = bitcast %union.U* %u to i16* - store i16 42, i16* %s, align 8 - ret void -} - -; Don't crash by operating on stale data if we merge (kill) the last 2 stores. - -define void @PR34074(i32* %x, i64* %y) { -; CHECK-LABEL: @PR34074( -; CHECK-NEXT: store i64 42, i64* [[Y:%.*]], align 8 -; CHECK-NEXT: store i32 4, i32* [[X:%.*]], align 4 -; CHECK-NEXT: ret void -; - store i64 42, i64* %y ; independent store - %xbc = bitcast i32* %x to i8* - store i32 0, i32* %x ; big store of constant - store i8 4, i8* %xbc ; small store with mergeable constant - ret void -} - -; We can't eliminate the last store because P and Q may alias. - -define void @PR36129(i32* %P, i32* %Q) { -; CHECK-LABEL: @PR36129( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* -; CHECK-NEXT: store i32 2, i32* [[Q:%.*]], align 4 -; CHECK-NEXT: store i8 3, i8* [[P2]], align 1 -; CHECK-NEXT: ret void -; - store i32 1, i32* %P - %P2 = bitcast i32* %P to i8* - store i32 2, i32* %Q - store i8 3, i8* %P2 - ret void -} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 306512)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "me.cpp", directory: "/compiler-explorer") -!2 = !{} -!7 = distinct !DISubprogram(name: "foo", linkageName: "foo(U*)", scope: !1, file: !1, line: 9, type: !8, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !20) -!8 = !DISubroutineType(types: !9) -!9 = !{null, !10} -!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) -!11 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "U", file: !1, line: 4, size: 64, elements: !12, identifier: "typeinfo name for U") -!12 = !{!13, !17} -!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !11, file: !1, line: 5, baseType: !14, size: 64) -!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !15, line: 55, baseType: !16) -!15 = !DIFile(filename: "/usr/include/stdint.h", directory: "/compiler-explorer") -!16 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned) -!17 = !DIDerivedType(tag: DW_TAG_member, name: "s", scope: !11, file: !1, line: 6, baseType: !18, size: 16) -!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", file: !15, line: 49, baseType: !19) -!19 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) -!20 = !{!21} -!21 = !DILocalVariable(name: "u", arg: 1, scope: !7, file: !1, line: 9, type: !10) -!22 = !DILocation(line: 10, column: 8, scope: !7) - -!26 = !{!27, !27, i64 0} -!27 = !{!"omnipotent char", !28, i64 0} -!28 = !{!"Simple C++ TBAA"} - -!29 = !{i32 1} - -; Domains and scopes which might alias -!30 = !{!30} -!31 = !{!31, !30} - -!32 = !{!32} -!33 = !{!33, !32} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind - -define void @fn(i8* nocapture %buf) #0 { -entry: - -; We would not eliminate the first memcpy with data layout, and we should not -; eliminate it without data layout. -; CHECK-LABEL: @fn -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK: ret void - - %arrayidx = getelementptr i8, i8* %buf, i64 18 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i1 false) - store i8 1, i8* %arrayidx, align 1 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i1 false) - ret void -} - Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s - -declare noalias i8* @malloc(i64) "malloc-like" - -declare void @foo() -declare void @bar(i8*) - -define void @test() { - %obj = call i8* @malloc(i64 8) - store i8 0, i8* %obj - ; don't remove store. %obj should be treated like it will be read by the @foo. - ; CHECK: store i8 0, i8* %obj - call void @foo() ["deopt" (i8* %obj)] - ret void -} - -define void @test1() { - %obj = call i8* @malloc(i64 8) - store i8 0, i8* %obj - ; CHECK: store i8 0, i8* %obj - call void @bar(i8* nocapture %obj) - ret void -} - -define void @test2() { - %obj = call i8* @malloc(i64 8) - store i8 0, i8* %obj - ; CHECK-NOT: store i8 0, i8* %obj - call void @foo() - ret void -} - -define void @test3() { - ; CHECK-LABEL: @test3( - %s = alloca i64 - ; Verify that this first store is not considered killed by the second one - ; since it could be observed from the deopt continuation. - ; CHECK: store i64 1, i64* %s - store i64 1, i64* %s - call void @foo() [ "deopt"(i64* %s) ] - store i64 0, i64* %s - ret void -} - -declare noalias i8* @calloc(i64, i64) - -define void @test4() { -; CHECK-LABEL: @test4 - %local_obj = call i8* @calloc(i64 1, i64 4) - call void @foo() ["deopt" (i8* %local_obj)] - store i8 0, i8* %local_obj, align 4 - ; CHECK-NOT: store i8 0, i8* %local_obj, align 4 - call void @bar(i8* nocapture %local_obj) - ret void -} Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s -; PR11390 -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define fastcc void @cat_domain(i8* nocapture %name, i8* nocapture %domain, i8** -nocapture %s) nounwind uwtable { -entry: - %call = tail call i64 @strlen(i8* %name) nounwind readonly - %call1 = tail call i64 @strlen(i8* %domain) nounwind readonly - %add = add i64 %call, 1 - %add2 = add i64 %add, %call1 - %add3 = add i64 %add2, 1 - %call4 = tail call noalias i8* @malloc(i64 %add3) nounwind - store i8* %call4, i8** %s, align 8 - %tobool = icmp eq i8* %call4, null - br i1 %tobool, label %return, label %if.end - -if.end: ; preds = %entry - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i1 false) - %arrayidx = getelementptr inbounds i8, i8* %call4, i64 %call - store i8 46, i8* %arrayidx, align 1 -; CHECK: store i8 46 - %add.ptr5 = getelementptr inbounds i8, i8* %call4, i64 %add - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i1 false) - %arrayidx8 = getelementptr inbounds i8, i8* %call4, i64 %add2 - store i8 0, i8* %arrayidx8, align 1 - br label %return - -return: ; preds = %if.end, %entry - ret void -} - -declare i64 @strlen(i8* nocapture) nounwind readonly - -declare noalias i8* @malloc(i64) nounwind - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll +++ /dev/null @@ -1,1215 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa=false -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind -declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind -declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind -declare void @llvm.init.trampoline(i8*, i8*, i8*) - -define void @test1(i32* %Q, i32* %P) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - %DEAD = load i32, i32* %Q - store i32 %DEAD, i32* %P - store i32 0, i32* %P - ret void -} - -; PR8576 - Should delete store of 10 even though p/q are may aliases. -define void @test2(i32 *%p, i32 *%q) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: store i32 20, i32* [[Q:%.*]], align 4 -; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - store i32 10, i32* %p, align 4 - store i32 20, i32* %q, align 4 - store i32 30, i32* %p, align 4 - ret void -} - - -; PR8677 -@g = global i32 1 - -define i32 @test3(i32* %g_addr) nounwind { -; CHECK-LABEL: @test3( -; CHECK-NEXT: [[G_VALUE:%.*]] = load i32, i32* [[G_ADDR:%.*]], align 4 -; CHECK-NEXT: store i32 -1, i32* @g, align 4 -; CHECK-NEXT: store i32 [[G_VALUE]], i32* [[G_ADDR]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 -; CHECK-NEXT: ret i32 [[TMP3]] -; - %g_value = load i32, i32* %g_addr, align 4 - store i32 -1, i32* @g, align 4 - store i32 %g_value, i32* %g_addr, align 4 - %tmp3 = load i32, i32* @g, align 4 - ret i32 %tmp3 -} - - -define void @test4(i32* %Q) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4 -; CHECK-NEXT: store volatile i32 [[A]], i32* [[Q]], align 4 -; CHECK-NEXT: ret void -; - %a = load i32, i32* %Q - store volatile i32 %a, i32* %Q - ret void -} - -define void @test5(i32* %Q) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]], align 4 -; CHECK-NEXT: ret void -; - %a = load volatile i32, i32* %Q - store i32 %a, i32* %Q - ret void -} - -; Should delete store of 10 even though memset is a may-store to P (P and Q may -; alias). -define void @test6(i32 *%p, i8 *%q) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false) -; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - store i32 10, i32* %p, align 4 ;; dead. - call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false) - store i32 30, i32* %p, align 4 - ret void -} - -; Should delete store of 10 even though memset is a may-store to P (P and Q may -; alias). -define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) { -; CHECK-LABEL: @test6_atomic( -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4) -; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 -; CHECK-NEXT: ret void -; - store atomic i32 10, i32* %p unordered, align 4 ;; dead. - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4) - store atomic i32 30, i32* %p unordered, align 4 - ret void -} - -; Should delete store of 10 even though memcpy is a may-store to P (P and Q may -; alias). -define void @test7(i32 *%p, i8 *%q, i8* noalias %r) { -; CHECK-LABEL: @test7( -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false) -; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - store i32 10, i32* %p, align 4 ;; dead. - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false) - store i32 30, i32* %p, align 4 - ret void -} - -; Should delete store of 10 even though memcpy is a may-store to P (P and Q may -; alias). -define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) { -; CHECK-LABEL: @test7_atomic( -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4) -; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 -; CHECK-NEXT: ret void -; - store atomic i32 10, i32* %p unordered, align 4 ;; dead. - call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4) - store atomic i32 30, i32* %p unordered, align 4 - ret void -} - -; Do not delete stores that are only partially killed. -define i32 @test8() { -; CHECK-LABEL: @test8( -; CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1234567, i32* [[V]], align 4 -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 -; CHECK-NEXT: ret i32 [[X]] -; - %V = alloca i32 - store i32 1234567, i32* %V - %V2 = bitcast i32* %V to i8* - store i8 0, i8* %V2 - %X = load i32, i32* %V - ret i32 %X - -} - - -; Test for byval handling. -%struct.x = type { i32, i32, i32, i32 } -define void @test9(%struct.x* byval(%struct.x) %a) nounwind { -; CHECK-LABEL: @test9( -; CHECK-NEXT: ret void -; - %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0 - store i32 1, i32* %tmp2, align 4 - ret void -} - -; Test for inalloca handling. -define void @test9_2(%struct.x* inalloca %a) nounwind { -; CHECK-LABEL: @test9_2( -; CHECK-NEXT: ret void -; - %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0 - store i32 1, i32* %tmp2, align 4 - ret void -} - -; Test for preallocated handling. -define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind { -; CHECK-LABEL: @test9_3( -; CHECK-NEXT: ret void -; - %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0 - store i32 1, i32* %tmp2, align 4 - ret void -} - -; va_arg has fuzzy dependence, the store shouldn't be zapped. -define double @test10(i8* %X) { -; CHECK-LABEL: @test10( -; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: store i8* [[X:%.*]], i8** [[X_ADDR]], align 8 -; CHECK-NEXT: [[TMP_0:%.*]] = va_arg i8** [[X_ADDR]], double -; CHECK-NEXT: ret double [[TMP_0]] -; - %X_addr = alloca i8* - store i8* %X, i8** %X_addr - %tmp.0 = va_arg i8** %X_addr, double - ret double %tmp.0 -} - - -; DSE should delete the dead trampoline. -declare void @test11f() -define void @test11() { -; CHECK-LABEL: @test11( -; CHECK-NEXT: ret void -; - %storage = alloca [10 x i8], align 16 ; <[10 x i8]*> [#uses=1] - %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0 ; [#uses=1] - call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null ) ; [#uses=1] - ret void -} - - -; PR2599 - load -> store to same address. -define void @test12({ i32, i32 }* %x) nounwind { -; CHECK-LABEL: @test12( -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]] -; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4 -; CHECK-NEXT: ret void -; - %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 - %tmp5 = load i32, i32* %tmp4, align 4 - %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 - %tmp8 = load i32, i32* %tmp7, align 4 - %tmp17 = sub i32 0, %tmp8 - store i32 %tmp5, i32* %tmp4, align 4 - store i32 %tmp17, i32* %tmp7, align 4 - ret void -} - - -; %P doesn't escape, the DEAD instructions should be removed. -declare void @test13f() -define i32* @test13() { -; CHECK-LABEL: @test13( -; CHECK-NEXT: [[PTR:%.*]] = tail call i8* @malloc(i32 4) -; CHECK-NEXT: [[P:%.*]] = bitcast i8* [[PTR]] to i32* -; CHECK-NEXT: call void @test13f() -; CHECK-NEXT: store i32 0, i32* [[P]], align 4 -; CHECK-NEXT: ret i32* [[P]] -; - %ptr = tail call i8* @malloc(i32 4) - %P = bitcast i8* %ptr to i32* - %DEAD = load i32, i32* %P - %DEAD2 = add i32 %DEAD, 1 - store i32 %DEAD2, i32* %P - call void @test13f( ) - store i32 0, i32* %P - ret i32* %P -} - -define i32 addrspace(1)* @test13_addrspacecast() { -; CHECK-LABEL: @test13_addrspacecast( -; CHECK-NEXT: [[P:%.*]] = tail call i8* @malloc(i32 4) -; CHECK-NEXT: [[P_BC:%.*]] = bitcast i8* [[P]] to i32* -; CHECK-NEXT: [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)* -; CHECK-NEXT: call void @test13f() -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P]], align 4 -; CHECK-NEXT: ret i32 addrspace(1)* [[P]] -; - %p = tail call i8* @malloc(i32 4) - %p.bc = bitcast i8* %p to i32* - %P = addrspacecast i32* %p.bc to i32 addrspace(1)* - %DEAD = load i32, i32 addrspace(1)* %P - %DEAD2 = add i32 %DEAD, 1 - store i32 %DEAD2, i32 addrspace(1)* %P - call void @test13f( ) - store i32 0, i32 addrspace(1)* %P - ret i32 addrspace(1)* %P -} - -declare noalias i8* @malloc(i32) willreturn -declare noalias i8* @calloc(i32, i32) willreturn -declare noalias i8* @aligned_alloc(i32, i32) -declare void @free(i8*) - - -define void @test14(i32* %Q) { -; CHECK-LABEL: @test14( -; CHECK-NEXT: ret void -; - %P = alloca i32 - %DEAD = load i32, i32* %Q - store i32 %DEAD, i32* %P - ret void - -} - -; Dead store on an aligned_alloc: should know that %M doesn't alias with %A. -define i32 @test14a(i8* %M, i8 %value) { -; CHECK-LABEL: @test14a( -; CHECK-NEXT: [[A:%.*]] = tail call i8* @aligned_alloc(i32 32, i32 1024) -; CHECK-NEXT: tail call void @free(i8* [[A]]) -; CHECK-NEXT: ret i32 0 -; - %A = tail call i8* @aligned_alloc(i32 32, i32 1024) - store i8 %value, i8* %A - tail call void @free(i8* %A) - ret i32 0 -} - -; PR8701 - -;; Fully dead overwrite of memcpy. -define void @test15(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -;; Fully dead overwrite of memcpy. -define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Fully dead overwrite of memcpy. -define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Fully dead overwrite of memcpy. -define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - ret void -} - -;; Full overwrite of smaller memcpy. -define void @test16(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -;; Full overwrite of smaller memcpy. -define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Full overwrite of smaller memory where overwrite has stronger atomicity -define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Full overwrite of smaller memory where overwrite has weaker atomicity. -define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - ret void -} - -;; Overwrite of memset by memcpy. -define void @test17(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -;; Overwrite of memset by memcpy. -define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Overwrite of memset by memcpy. Overwrite is stronger atomicity. We can -;; remove the memset. -define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Overwrite of memset by memcpy. Overwrite is weaker atomicity. We can remove -;; the memset. -define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - ret void -} - -; Should not delete the volatile memset. -define void @test17v(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test17v( -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -; PR8728 -; Do not delete instruction where possible situation is: -; A = B -; A = A -; -; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be -; equal (but not inequal and overlapping). -define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { -; CHECK-LABEL: @test18( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) - ret void -} - -define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { -; CHECK-LABEL: @test18_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) - ret void -} - - -; The store here is not dead because the byval call reads it. -declare void @test19f({i32}* byval({i32}) align 4 %P) - -define void @test19({i32} * nocapture byval({i32}) align 4 %arg5) nounwind ssp { -; CHECK-LABEL: @test19( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds { i32 }, { i32 }* [[ARG5:%.*]], i32 0, i32 0 -; CHECK-NEXT: store i32 912, i32* [[TMP7]], align 4 -; CHECK-NEXT: call void @test19f({ i32 }* byval({ i32 }) align 4 [[ARG5]]) -; CHECK-NEXT: ret void -; -bb: - %tmp7 = getelementptr inbounds {i32}, {i32}* %arg5, i32 0, i32 0 - store i32 912, i32* %tmp7 - call void @test19f({i32}* byval({i32}) align 4 %arg5) - ret void - -} - -define void @test20() { -; CHECK-LABEL: @test20( -; CHECK-NEXT: ret void -; - %m = call i8* @malloc(i32 24) - store i8 0, i8* %m - ret void -} - -define void @test21() { -; CHECK-LABEL: @test21( -; CHECK-NEXT: ret void -; - %m = call i8* @calloc(i32 9, i32 7) - store i8 0, i8* %m - ret void -} - -define void @test22(i1 %i, i32 %k, i32 %m) nounwind { -; CHECK-LABEL: @test22( -; CHECK-NEXT: ret void -; - %k.addr = alloca i32 - %m.addr = alloca i32 - %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr - store i32 0, i32* %k.addr.m.addr, align 4 - ret void -} - -; PR13547 -declare noalias i8* @strdup(i8* nocapture) nounwind -define noalias i8* @test23() nounwind uwtable ssp { -; CHECK-LABEL: @test23( -; CHECK-NEXT: [[X:%.*]] = alloca [2 x i8], align 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 0 -; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 -; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) [[ATTR5:#.*]] -; CHECK-NEXT: ret i8* [[CALL]] -; - %x = alloca [2 x i8], align 1 - %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 0 - store i8 97, i8* %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 1 - store i8 0, i8* %arrayidx1, align 1 - %call = call i8* @strdup(i8* %arrayidx) nounwind - ret i8* %call -} - -; Make sure same sized store to later element is deleted -define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind { -; CHECK-LABEL: @test24( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0 -; CHECK-NEXT: store i32 [[B:%.*]], i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1 -; CHECK-NEXT: store i32 [[C:%.*]], i32* [[TMP2]], align 4 -; CHECK-NEXT: ret void -; - %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 - store i32 0, i32* %1, align 4 - %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 - store i32 0, i32* %2, align 4 - %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 - store i32 %b, i32* %3, align 4 - %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 - store i32 %c, i32* %4, align 4 - ret void -} - -; Check another case like PR13547 where strdup is not like malloc. -define i8* @test25(i8* %p) nounwind { -; CHECK-LABEL: @test25( -; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[P_4]], align 1 -; CHECK-NEXT: store i8 0, i8* [[P_4]], align 1 -; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) [[ATTR8:#.*]] -; CHECK-NEXT: store i8 [[TMP]], i8* [[P_4]], align 1 -; CHECK-NEXT: ret i8* [[Q]] -; - %p.4 = getelementptr i8, i8* %p, i64 4 - %tmp = load i8, i8* %p.4, align 1 - store i8 0, i8* %p.4, align 1 - %q = call i8* @strdup(i8* %p) nounwind optsize - store i8 %tmp, i8* %p.4, align 1 - ret i8* %q -} - -; Remove redundant store if loaded value is in another block. -define i32 @test26(i1 %c, i32* %p) { -; CHECK-LABEL: @test26( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - store i32 %v, i32* %p, align 4 - br label %bb3 -bb3: - ret i32 0 -} - -; Remove redundant store if loaded value is in another block. -define i32 @test27(i1 %c, i32* %p) { -; CHECK-LABEL: @test27( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - br label %bb3 -bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 -} - -; Don't remove redundant store because of may-aliased store. -define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) { -; CHECK-LABEL: @test28( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: store i32 [[I:%.*]], i32* [[P2:%.*]], align 4 -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - - ; Might overwrite value at %p - store i32 %i, i32* %p2, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - br label %bb3 -bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 -} - -; Don't remove redundant store because of may-aliased store. -define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) { -; CHECK-LABEL: @test29( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: store i32 [[I:%.*]], i32* [[P2:%.*]], align 4 -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - ; Might overwrite value at %p - store i32 %i, i32* %p2, align 4 - br label %bb3 -bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 -} - -declare void @unknown_func() - -; Don't remove redundant store because of unknown call. -define i32 @test30(i1 %c, i32* %p, i32 %i) { -; CHECK-LABEL: @test30( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - ; Might overwrite value at %p - call void @unknown_func() - br label %bb3 -bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 -} - -; Remove redundant store if loaded value is in another block inside a loop. -define i32 @test31(i1 %c, i32* %p, i32 %i) { -; CHECK-LABEL: @test31( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] -; CHECK: bb2: -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br label %bb1 -bb1: - store i32 %v, i32* %p, align 4 - br i1 undef, label %bb1, label %bb2 -bb2: - ret i32 0 -} - -; Don't remove redundant store in a loop with a may-alias store. -define i32 @test32(i1 %c, i32* %p, i32 %i) { -; CHECK-LABEL: @test32( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] -; CHECK: bb2: -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br label %bb1 -bb1: - store i32 %v, i32* %p, align 4 - ; Might read and overwrite value at %p - call void @unknown_func() - br i1 undef, label %bb1, label %bb2 -bb2: - ret i32 0 -} - -; Remove redundant store, which is in the lame loop as the load. -define i32 @test33(i1 %c, i32* %p, i32 %i) { -; CHECK-LABEL: @test33( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB2:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] -; CHECK: bb3: -; CHECK-NEXT: ret i32 0 -; -entry: - br label %bb1 -bb1: - %v = load i32, i32* %p, align 4 - br label %bb2 -bb2: - store i32 %v, i32* %p, align 4 - ; Might read and overwrite value at %p, but doesn't matter. - call void @unknown_func() - br i1 undef, label %bb1, label %bb3 -bb3: - ret i32 0 -} - -; Don't remove redundant store: unknown_func could unwind -define void @test34(i32* noalias %p) { -; CHECK-LABEL: @test34( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 0, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; - store i32 1, i32* %p - call void @unknown_func() - store i32 0, i32* %p - ret void -} - -; Remove redundant store even with an unwinding function in the same block -define void @test35(i32* noalias %p) { -; CHECK-LABEL: @test35( -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - call void @unknown_func() - store i32 1, i32* %p - store i32 0, i32* %p - ret void -} - -; We cannot optimize away the first memmove since %P could overlap with %Q. -define void @test36(i8* %P, i8* %Q) { -; CHECK-LABEL: @test36( -; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[Q]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -define void @test36_atomic(i8* %P, i8* %Q) { -; CHECK-LABEL: @test36_atomic( -; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -define void @test37(i8* %P, i8* %Q, i8* %R) { -; CHECK-LABEL: @test37( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) - ret void -} - -define void @test37_atomic(i8* %P, i8* %Q, i8* %R) { -; CHECK-LABEL: @test37_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) - ret void -} - -; Same caveat about memcpy as in @test18 applies here. -define void @test38(i8* %P, i8* %Q, i8* %R) { -; CHECK-LABEL: @test38( -; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) - ret void -} - -define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { -; CHECK-LABEL: @test38_atomic( -; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) - ret void -} - -define void @test39(i8* %P, i8* %Q, i8* %R) { -; CHECK-LABEL: @test39( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 8, i1 false) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) - ret void -} - -define void @test39_atomic(i8* %P, i8* %Q, i8* %R) { -; CHECK-LABEL: @test39_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 8, i32 1) -; CHECK-NEXT: ret void -; - - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 8, i32 1) - ret void -} - -define i32 @test40() { -; CHECK-LABEL: @test40( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store i8 1, i8* [[P_NEXT]], align 1 -; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 0, i8* [[P]], align 1 -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 -; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] -; CHECK: return: -; CHECK-NEXT: ret i32 0 -; -entry: - %m = call i8* @calloc(i32 9, i32 20) - br label %loop -loop: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next - store i8 1, i8* %p.next - %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv - store i8 0, i8* %p - %continue = icmp ugt i64 %indvars.iv, 15 - br i1 %continue, label %loop, label %return -return: - ret i32 0 -} - -define i32 @test41() { -; CHECK-LABEL: @test41( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store i8 1, i8* [[P_NEXT]], align 1 -; CHECK-NEXT: br label [[CONT]] -; CHECK: cont: -; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 0, i8* [[P]], align 1 -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 -; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] -; CHECK: return: -; CHECK-NEXT: ret i32 0 -; -entry: - %m = call i8* @calloc(i32 9, i32 20) - br label %loop -loop: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ] - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next - store i8 1, i8* %p.next - br label %cont - -cont: - %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv - store i8 0, i8* %p - %continue = icmp ugt i64 %indvars.iv, 15 - br i1 %continue, label %loop, label %return - -return: - ret i32 0 -} - -; The store is redundant here, but currently we fail to eliminate it. -; We are walking from the store up to the calloc and translate phis as -; needed. In this case we fail to translate %p while going over the -; backedge. Because of that we conservatively assume that zero initialized -; memory is clobbered. -define i32 @test42() { -; CHECK-LABEL: @test42( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: br label [[CONT]] -; CHECK: cont: -; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 0, i8* [[P]], align 1 -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 -; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] -; CHECK: return: -; CHECK-NEXT: ret i32 0 -; -entry: - %m = call i8* @calloc(i32 9, i32 20) - br label %loop -loop: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ] - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %cont - -cont: - %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv - store i8 0, i8* %p - %continue = icmp ugt i64 %indvars.iv, 15 - br i1 %continue, label %loop, label %return - -return: - ret i32 0 -} - -define i32 @test43() { -; CHECK-LABEL: @test43( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store i8 1, i8* [[P_NEXT]], align 1 -; CHECK-NEXT: br label [[CONT:%.*]] -; CHECK: cont: -; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 0, i8* [[P]], align 1 -; CHECK-NEXT: br label [[CONT_2]] -; CHECK: cont.2: -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 -; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] -; CHECK: return: -; CHECK-NEXT: ret i32 0 -; -entry: - %m = call i8* @calloc(i32 9, i32 20) - br label %loop -loop: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ] - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next - store i8 1, i8* %p.next - br label %cont - -cont: - %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv - store i8 0, i8* %p - br label %cont.2 - -cont.2: - %continue = icmp ugt i64 %indvars.iv, 15 - br i1 %continue, label %loop, label %return - -return: - ret i32 0 -} - -define i32 @test44() { -; CHECK-LABEL: @test44( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 0, i8* [[P]], align 1 -; CHECK-NEXT: br label [[CONT:%.*]] -; CHECK: cont: -; CHECK-NEXT: store i8 1, i8* [[P_NEXT]], align 1 -; CHECK-NEXT: br label [[CONT_2]] -; CHECK: cont.2: -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15 -; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]] -; CHECK: return: -; CHECK-NEXT: ret i32 0 -; -entry: - %m = call i8* @calloc(i32 9, i32 20) - br label %loop -loop: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ] - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next - %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv - store i8 0, i8* %p - br label %cont - -cont: - store i8 1, i8* %p.next - br label %cont.2 - -cont.2: - %continue = icmp ugt i64 %indvars.iv, 15 - br i1 %continue, label %loop, label %return - -return: - ret i32 0 -} - -; This is an example which can potentially benefit from PHI translation. -; Current implementation doesn't handle this case though. This is because -; we don't visit the same block with different addresses while looking for -; clobbering instructions. -define i32 @test45(i1 %c) { -; CHECK-LABEL: @test45( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) -; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]] -; CHECK: true: -; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1 -; CHECK-NEXT: store i8 1, i8* [[P_1]], align 1 -; CHECK-NEXT: br label [[CONT:%.*]] -; CHECK: false: -; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2 -; CHECK-NEXT: store i8 1, i8* [[P_2]], align 1 -; CHECK-NEXT: br label [[CONT]] -; CHECK: cont: -; CHECK-NEXT: [[OFFSET:%.*]] = phi i64 [ 2, [[TRUE]] ], [ 1, [[FALSE]] ] -; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[OFFSET]] -; CHECK-NEXT: store i8 0, i8* [[P]], align 1 -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: return: -; CHECK-NEXT: ret i32 0 -; -entry: - %m = call i8* @calloc(i32 9, i32 20) - br i1 %c, label %true, label %false - -true: - %p.1 = getelementptr inbounds i8, i8* %m, i64 1 - store i8 1, i8* %p.1 - br label %cont - -false: - %p.2 = getelementptr inbounds i8, i8* %m, i64 2 - store i8 1, i8* %p.2 - br label %cont - -cont: - %offset = phi i64 [ 2, %true ], [ 1, %false ] - %p = getelementptr inbounds i8, i8* %m, i64 %offset - store i8 0, i8* %p - br label %return - -return: - ret i32 0 -} - -; This is test45 modified in a way to demonstrate PHI translation -; improving the accuracy of the analysis (on a slightly convoluted -; case though). -define i32 @test46(i1 %c) { -; CHECK-LABEL: @test46( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20) -; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1 -; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2 -; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]] -; CHECK: true: -; CHECK-NEXT: store i8 1, i8* [[P_1]], align 1 -; CHECK-NEXT: br label [[CONT:%.*]] -; CHECK: false: -; CHECK-NEXT: store i8 1, i8* [[P_1]], align 1 -; CHECK-NEXT: br label [[CONT]] -; CHECK: cont: -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: return: -; CHECK-NEXT: ret i32 0 -; -entry: - %m = call i8* @calloc(i32 9, i32 20) - %p.1 = getelementptr inbounds i8, i8* %m, i64 1 - %p.2 = getelementptr inbounds i8, i8* %m, i64 2 - br i1 %c, label %true, label %false - -true: - store i8 1, i8* %p.1 - br label %cont - -false: - store i8 1, i8* %p.1 - br label %cont - -cont: - %offset = phi i64 [ 2, %true ], [ 2, %false ] - %p = getelementptr inbounds i8, i8* %m, i64 %offset - store i8 0, i8* %p - br label %return - -return: - ret i32 0 -} - -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) -declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32) Index: llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s - -; Don't eliminate stores to allocas before tail calls to functions that use -; byval. It's correct to mark calls like these as 'tail'. To implement this tail -; call, the backend should copy the bytes from the alloca into the argument area -; before clearing the stack. - -target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" -target triple = "i386-unknown-linux-gnu" - -declare void @g(i32* byval(i32) %p) - -define void @f(i32* byval(i32) %x) { -entry: - %p = alloca i32 - %v = load i32, i32* %x - store i32 %v, i32* %p - tail call void @g(i32* byval(i32) %p) - ret void -} -; CHECK-LABEL: define void @f(i32* byval(i32) %x) -; CHECK: store i32 %v, i32* %p -; CHECK: tail call void @g(i32* byval(i32) %p) Index: llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll +++ llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tbaa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s -; RUN: opt -tbaa -dse -enable-dse-memoryssa=true -S < %s | FileCheck %s +; RUN: opt -tbaa -dse -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 { Index: llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tbaa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s -; RUN: opt -tbaa -dse -enable-dse-memoryssa=true -S < %s | FileCheck %s +; RUN: opt -tbaa -dse -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 {