Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -18,6 +18,7 @@ #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -29,6 +30,9 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -59,8 +63,8 @@ #include "llvm/Transforms/Utils/Local.h" #include #include -#include #include +#include #include #include #include @@ -85,6 +89,15 @@ cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); +static cl::opt + EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Use the new MemorySSA-backed DSE.")); + +static cl::opt MemorySSAMemoryAccessScanLimit( + "dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, + cl::desc("The number of memory instructions to scan dead store elimination " + "(default = 100)")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1288,22 +1301,517 @@ return MadeChange; } +//============================================================================= +// New MemorySSA form. The above MemDep based methods will eventually be +// removed (if they are unused by those below). + +struct NextResult { + enum WalkType { Bad, Next, End }; + WalkType type; + MemoryAccess *next; +}; + +// Find the next MemorySSA use from Start, weaning out things that dont alias. +NextResult getNextMemoryAccess(MemoryAccess *Current, MemoryLocation SILoc, + AliasAnalysis &AA) { + NextResult Next = {NextResult::End, nullptr}; + + // Scan the uses to see if we have a single interesting Next MemoryAccess + for (Use &U : Current->uses()) { + MemoryAccess *Use = cast(U.getUser()); + if (auto MA = dyn_cast(Use)) { + // Any MemoryUse's that may alias is a break. Otherwise ignore + if (AA.getModRefInfo(MA->getMemoryInst(), SILoc) & MRI_Ref) + return {NextResult::Bad, nullptr}; + } else if (isa(Use) || isa(Use)) { + // If we see two different nodes, we are at a split point + if (Next.next && Next.next != Use) + return {NextResult::Bad, nullptr}; + Next = {NextResult::Next, Use}; + } else + llvm_unreachable("Unexpected MemoryAccess type"); + } + + return Next; +} + +// Delete dead instructions +void deleteDeadInstruction( + Instruction *SI, InstOverlapIntervalsTy &IOL, MemorySSA &MSSA, + const TargetLibraryInfo &TLI, + SmallSetVector *DeadStackObject = nullptr) { + + MemorySSAUpdater updater(&MSSA); + SmallVector NowDeadInsts; + NowDeadInsts.push_back(SI); + --NumFastOther; + + while (!NowDeadInsts.empty()) { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // Remove the Instruction from MSSA and IOL + if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) + updater.removeMemoryAccess(MA); + IOL.erase(DeadInst); + if (DeadStackObject) + DeadStackObject->remove(DeadInst); + + // Remove it's operands + for (Use &O : DeadInst->operands()) + if (Instruction *OpI = dyn_cast(O)) { + O = nullptr; + if (isInstructionTriviallyDead(OpI, &TLI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + } +} + +// Merge constant stores into predecessors +bool mergePartialMemoryDefs(MemoryDef &EarlierMD, MemoryDef &LaterMD, + InstOverlapIntervalsTy &IOL, const DataLayout &DL, + MemorySSA &MSSA, const TargetLibraryInfo &TLI) { + auto *Earlier = dyn_cast(EarlierMD.getMemoryInst()); + auto *Later = dyn_cast(LaterMD.getMemoryInst()); + if (!Earlier || !isa(Earlier->getValueOperand()) || !Later || + !isa(Later->getValueOperand())) + return false; + + // If the store we find is: + // a) partially overwritten by the store to 'Loc' + // b) the later store is fully contained in the earlier one and + // c) they both have a constant value + // Merge the two stores, replacing the earlier store's value with a + // merge of both values. + // TODO: Deal with other constant types (vectors, etc), and probably + // some mem intrinsics (if needed) + + APInt EarlierValue = + cast(Earlier->getValueOperand())->getValue(); + APInt LaterValue = cast(Later->getValueOperand())->getValue(); + unsigned LaterBits = LaterValue.getBitWidth(); + assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth()); + LaterValue = LaterValue.zext(EarlierValue.getBitWidth()); + + const Value *P1 = Earlier->getPointerOperand()->stripPointerCasts(); + const Value *P2 = Later->getPointerOperand()->stripPointerCasts(); + int64_t InstWriteOffset, DepWriteOffset; + GetPointerBaseWithConstantOffset(P1, DepWriteOffset, DL); + GetPointerBaseWithConstantOffset(P2, InstWriteOffset, DL); + + // Offset of the smaller store inside the larger store + unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8; + unsigned LShiftAmount = + DL.isBigEndian() ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits + : BitOffsetDiff; + APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount, + LShiftAmount + LaterBits); + // Clear the bits we'll be replacing, then OR with the smaller + // store, shifted appropriately. + APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount); + DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier << "\n Later: " + << *Later << "\n Merged Value: " << Merged << '\n'); + + Earlier->setOperand( + 0, ConstantInt::get(Earlier->getValueOperand()->getType(), Merged)); + deleteDeadInstruction(Later, IOL, MSSA, TLI); + IOL.erase(Earlier); + ++NumModifiedStores; + return true; +} + +// FIXME: Make this use MSSA? +bool eliminateNoopStore(MemoryDef &MD, InstOverlapIntervalsTy &IOL, + const DataLayout &DL, AliasAnalysis &AA, + MemorySSA &MSSA, const TargetLibraryInfo &TLI) { + // Must be a store instruction. + StoreInst *SI = dyn_cast(MD.getMemoryInst()); + if (!SI) + return false; + + // If we're storing the same value back to a pointer that we just loaded from, + // then the store can be removed. + if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, &AA)) { + + DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: " + << *DepLoad << "\n STORE: " << *SI << '\n'); + + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumRedundantStores; + return true; + } + } + + // Remove null stores into the calloc'ed objects + Constant *StoredConstant = dyn_cast(SI->getValueOperand()); + if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) { + Instruction *UnderlyingPointer = + dyn_cast(GetUnderlyingObject(SI->getPointerOperand(), DL)); + + if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, &TLI) && + memoryIsNotModifiedBetween(UnderlyingPointer, SI, &AA)) { + DEBUG( + dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: " + << *SI << "\n OBJECT: " << *UnderlyingPointer << '\n'); + + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumRedundantStores; + return true; + } + } + return false; +} + +bool isThrowInRange(Value *SI, Value *NI, const Value *SILocUnd, + DenseMap &InstructionPostOrders, + SmallVector &MayThrowsPostOrders, + const DataLayout &DL, const TargetLibraryInfo &TLI) { + assert(InstructionPostOrders.count(SI)); + assert(InstructionPostOrders.count(NI)); + unsigned SIPO = InstructionPostOrders[SI]; + unsigned NIPO = InstructionPostOrders[NI]; + assert( + std::is_sorted(MayThrowsPostOrders.begin(), MayThrowsPostOrders.end())); + assert(SIPO > NIPO); + auto it = std::upper_bound(MayThrowsPostOrders.begin(), + MayThrowsPostOrders.end(), NIPO); + if (it == MayThrowsPostOrders.end() || *it >= SIPO) + return false; + + if (isa(SILocUnd)) + return false; + + // We're looking for a call to an allocation function + // where the allocation doesn't escape before the last + // throwing instruction; PointerMayBeCaptured + // reasonably fast approximation. + if (isAllocLikeFn(SILocUnd, &TLI) && + !PointerMayBeCaptured(SILocUnd, false, true)) + return false; + + return true; +} + +static bool handleFree(Instruction *SI, const Value *SILocUnd, Instruction *NI, + MemoryLocation &NILoc, + DenseMap &InstructionPostOrders, + SmallVector &MayThrowsPostOrders, + InstOverlapIntervalsTy &IOL, AliasAnalysis &AA, + MemorySSA &MSSA, const DataLayout &DL, + const TargetLibraryInfo &TLI) { + // look for frees or lifetime_ends that mark the end of the lifetime of the + // object. + const Value *NIUndPtr = nullptr; + if (isFreeCall(NI, &TLI)) { + NIUndPtr = GetUnderlyingObject(cast(NI)->getArgOperand(0), DL); + } else if (auto *In = dyn_cast(NI)) { + if (In->getIntrinsicID() == Intrinsic::lifetime_end) + NIUndPtr = GetUnderlyingObject(NILoc.Ptr, DL); + } + + if (!NIUndPtr) + return false; + + if (!AA.isMustAlias(SILocUnd, NIUndPtr) || + isThrowInRange(SI, NI, SILocUnd, InstructionPostOrders, + MayThrowsPostOrders, DL, TLI)) + return false; + + DEBUG(dbgs() << "DSE: Dead Store to soon to be freed memory:\n DEAD: " << *SI + << '\n'); + deleteDeadInstruction(SI, IOL, MSSA, TLI); + return true; +} + +static bool handleEnd(Instruction *SI, + SmallSetVector &DeadStackObjects, + InstOverlapIntervalsTy &IOL, MemorySSA &MSSA, + const DataLayout &DL, const TargetLibraryInfo &TLI) { + + // See through pointer-to-pointer bitcasts + SmallVector Pointers; + GetUnderlyingObjects(getStoredPointerOperand(SI), Pointers, DL); + + // Stores to stack values are valid candidates for removal. + for (Value *Pointer : Pointers) + if (!DeadStackObjects.count(Pointer)) + return false; + + DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *SI + << "\n Objects: "; + for (SmallVectorImpl::iterator I = Pointers.begin(), + E = Pointers.end(); + I != E; ++I) { + dbgs() << **I; + if (std::next(I) != E) + dbgs() << ", "; + } dbgs() + << '\n'); + deleteDeadInstruction(SI, IOL, MSSA, TLI); + return true; +} + +static bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, + const Value *SILocUnd, Instruction *NI, + MemoryLocation &NILoc, int &MaxModRefInfo, + DenseMap &InstructionPostOrders, + SmallVector &MayThrowsPostOrders, + AliasAnalysis &AA, const DataLayout &DL, + const TargetLibraryInfo &TLI) { + // Check for anything that act as a DSE barrier. + // Such as atomics stronger that monotonic. + // Anything that looks like a self read. + // And anything that throws between SI and NI. + if (isThrowInRange(SI, NI, SILocUnd, InstructionPostOrders, + MayThrowsPostOrders, DL, TLI)) + return true; + + int NIModRefInfo = AA.getModRefInfo(NI, SILoc); + if (NI->isAtomic()) { + if (auto *NSI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NSI->getOrdering())) + return true; + } else if (auto *NLI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NLI->getOrdering()) || + !AA.isNoAlias(MemoryLocation::get(NLI), SILoc)) + return true; + } else if (isa(NI)) + // We can remove the dead stores, irrespective of the fence and its + // ordering (release/acquire/seq_cst). Fences only constraints the + // ordering of already visible stores, it does not make a store visible to + // other threads. So, skipping over a fence does not change a store from + // being dead. We treat fences as maythrows, to be conservative so that + // they block optimisations in the same way + return false; + else + return true; + } else if (NIModRefInfo & MRI_Ref) + return true; + + if (hasMemoryWrite(NI, TLI) && isPossibleSelfRead(NI, NILoc, SI, TLI, AA)) + return true; + + MaxModRefInfo |= NIModRefInfo; + return false; +} + +// TODOD Combine into a single getLocForWrite version above +static MemoryLocation getLocForWriteEx(Instruction *SI, AliasAnalysis &AA, + const TargetLibraryInfo &TLI) { + MemoryLocation Loc = getLocForWrite(SI, AA); + if (Loc.Ptr) + return Loc; + + if (hasMemoryWrite(SI, TLI)) { + CallSite CS(SI); + // All the supported functions so far happen to have dest as their first + // argument. + return MemoryLocation(CS.getArgument(0)); + } + return MemoryLocation(); +} + +static bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, + MemorySSA &MSSA, + PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + + // All MemoryDef stores + SmallVector Stores; + // Any that should be skipped as they are already deleted + SmallPtrSet SkipStores; + // A map of interval maps representing partially-overwritten value parts + InstOverlapIntervalsTy IOL; + // Post-orders for each interesting instruction, basic block and throw + // These count up in post order, so lower numbers are later + DenseMap InstructionPostOrders; + SmallVector MayThrowsPostOrders; + // Keep track of all of the stack objects that are dead at the end of the + // function. + SmallSetVector DeadStackObjects; + + // Get all memory writes and post orders of anything interesting + unsigned PO = 0; + for (BasicBlock *BB : post_order(&F)) { + for (Instruction &I : reverse(*BB)) { + bool MayThrow = I.mayThrow() || isa(I); + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + + if (MayThrow || MD) { + unsigned IPO = PO++; + InstructionPostOrders[&I] = IPO; + + if (MayThrow) + MayThrowsPostOrders.push_back(IPO); + if (MD && hasMemoryWrite(&I, TLI) && isRemovable(&I)) + Stores.push_back(MD); + } + + // Track alloca and alloca-like objects + if (isa(&I)) + DeadStackObjects.insert(&I); + // Okay, so these are dead heap objects, but if the pointer never escapes + // then it's leaked by this function anyways. + else if (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, true, true)) + DeadStackObjects.insert(&I); + } + + InstructionPostOrders[BB] = PO++; + } + // Treat byval or inalloca arguments the same as Allocas, stores to them are + // dead at the end of the function. + for (Argument &AI : F.args()) + if (AI.hasByValOrInAllocaAttr()) + DeadStackObjects.insert(&AI); + + // For each store: + for (MemoryDef *SIMD : Stores) { + if (SkipStores.count(SIMD)) + continue; + Instruction *SI = SIMD->getMemoryInst(); + MemoryLocation SILoc = getLocForWriteEx(SI, AA, TLI); + assert(SILoc.Ptr && "SILoc should not be null"); + const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); + + // Check for noop stores that store what was read to the same address + if (eliminateNoopStore(*SIMD, IOL, DL, AA, MSSA, TLI)) { + MadeChange = true; + continue; + } + + // Limit on the number of memory instruction we can look at + int InstructionLimit = MemorySSAMemoryAccessScanLimit; + // Used by PartialEarlierWithFullLater to ensure that we were free of + // anything that could modref (excluding the current instruction) + int MaxModRefInfo = MRI_NoModRef; + // Previous Next result, in case we hit a loop latch + MemoryAccess *Prev = SIMD; + + // Walk forward for stores that kill this + for (NextResult Next = getNextMemoryAccess(SIMD, SILoc, AA); + Next.type != NextResult::Bad; + Next = getNextMemoryAccess(Next.next, SILoc, AA)) { + + // We have hit the end of the walk. If the store is non-escaping, it is + // dead and we can kill it. + if (Next.type == NextResult::End) { + if (handleEnd(SI, DeadStackObjects, IOL, MSSA, DL, TLI)) + MadeChange = true; + break; + } + + if (auto ND = dyn_cast(Next.next)) { + Instruction *NI = ND->getMemoryInst(); + MemoryLocation NILoc = getLocForWriteEx(NI, AA, TLI); + + // If we hit a free or lifetime end, we can kill the store + if (handleFree(SI, SILocUnd, NI, NILoc, InstructionPostOrders, + MayThrowsPostOrders, IOL, AA, MSSA, DL, TLI)) { + MadeChange = true; + break; + } + + // Check for anything that looks like it will be a barrier to further + // removal + int MaxModRefInfoPrev = MaxModRefInfo; + if (isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc, MaxModRefInfo, + InstructionPostOrders, MayThrowsPostOrders, AA, DL, + TLI)) + break; + + // We must post-dom the instructions to safely remove it + if (!PDT.dominates(ND->getBlock(), SIMD->getBlock())) + continue; + + // Get what type of overwrite this might be + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(NILoc, SILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, SI, IOL); + + if (OR == OW_Complete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI + << "\n KILLER: " << *NI << '\n'); + deleteDeadInstruction(SI, IOL, MSSA, TLI); + ++NumFastStores; + MadeChange = true; + break; + } + // There are a few extra checks if it is a PartialEarlierWithFullLater, + // as the removal happens in the opposite order to normal (NI is + // removed, not SI) + else if (OR == OW_PartialEarlierWithFullLater && isRemovable(NI) && + ND->getBlock() == SIMD->getBlock() && + MaxModRefInfoPrev == MRI_NoModRef) { + if (mergePartialMemoryDefs(*SIMD, *ND, IOL, DL, MSSA, TLI)) { + // This removes ND, so we start over, ensuring we won't look at ND. + // We dont reset the InstructionLimit. + Next = {NextResult::Next, SIMD}; + SkipStores.insert(ND); + MaxModRefInfo = MRI_NoModRef; + MadeChange = true; + continue; + } + } + } else if (auto NP = dyn_cast(Next.next)) { + // Detect loops by checking if the PO instr numbers are earlier + Value *PrevValue = nullptr; + if (auto SMD = dyn_cast(Prev)) + PrevValue = SMD->getMemoryInst(); + else if (auto SMP = dyn_cast(Prev)) + PrevValue = SMP->getBlock(); + assert(InstructionPostOrders.count(NP->getBlock())); + assert(InstructionPostOrders.count(PrevValue)); + if (InstructionPostOrders[NP->getBlock()] > + InstructionPostOrders[PrevValue]) + break; + } + Prev = Next.next; + + if (--InstructionLimit == 0) + break; + } + } + + if (EnablePartialOverwriteTracking) + MadeChange |= removePartiallyOverlappedStores(&AA, DL, IOL); + + return MadeChange; +} + //===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { - AliasAnalysis *AA = &AM.getResult(F); - DominatorTree *DT = &AM.getResult(F); - MemoryDependenceResults *MD = &AM.getResult(F); - const TargetLibraryInfo *TLI = &AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + const TargetLibraryInfo &TLI = AM.getResult(F); - if (!eliminateDeadStores(F, AA, MD, DT, TLI)) - return PreservedAnalyses::all(); + if (EnableMemorySSA) { + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + PostDominatorTree &PDT = AM.getResult(F); + + if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, PDT, TLI)) + return PreservedAnalyses::all(); + } else { + MemoryDependenceResults &MD = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + + if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI)) + return PreservedAnalyses::all(); + } PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); + if (EnableMemorySSA) + PA.preserve(); + else + PA.preserve(); return PA; } @@ -1322,25 +1830,42 @@ if (skipFunction(F)) return false; - DominatorTree *DT = &getAnalysis().getDomTree(); - AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + AliasAnalysis &AA = getAnalysis().getAAResults(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(); + + if (EnableMemorySSA) { + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); + + return eliminateDeadStoresMemorySSA(F, AA, MSSA, PDT, TLI); + } else { + MemoryDependenceResults &MD = + getAnalysis().getMemDep(); + DominatorTree &DT = getAnalysis().getDomTree(); - return eliminateDeadStores(F, AA, MD, DT, TLI); + return eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + + if (EnableMemorySSA) { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } } }; @@ -1351,8 +1876,10 @@ INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, Index: test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll =================================================================== --- test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll +++ test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll @@ -40,7 +40,6 @@ %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above Index: test/Transforms/DeadStoreElimination/merge-stores.ll =================================================================== --- test/Transforms/DeadStoreElimination/merge-stores.ll +++ test/Transforms/DeadStoreElimination/merge-stores.ll @@ -39,7 +39,6 @@ %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above Index: test/Transforms/DeadStoreElimination/multiblock.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/multiblock.ll @@ -0,0 +1,607 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +declare void @unknown_func() +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @free(i8* nocapture) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + + +define void @test1(i32* noalias %P) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + %DEAD = load i32, i32* %P + %DEAD2 = add i32 %DEAD, 1 + store i32 %DEAD2, i32* %P + br label %bb1 +bb1: + store i32 0, i32* %P + ret void +} + + +define void @test2(i32* noalias %P) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test3(i32* noalias %P) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 0, i32* %P + br label %bb3 +bb3: + ret void +} + + +define void @test4(i32* noalias %P) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %x = load i32, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test5(i32* noalias %P) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 1, i32* %P + br label %bb3 +bb2: + store i32 1, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test6(i32* noalias %P) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test7(i32* noalias %P, i32* noalias %Q) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[Q:%.*]] +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 1, i32* %Q + br i1 true, label %bb1, label %bb2 +bb1: + load i32, i32* %P + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %Q + store i32 0, i32* %P + ret void +} + + +define void @test8(i32* noalias %P) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 1, i32* %P + br label %bb3 +bb2: + store i32 1, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test9(i32* noalias %P) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ret void +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test10(i32* noalias %P) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i8 1, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i8 1, i8* %P2 + ret void +} + + +define void @test11(i32* noalias %P) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i8 1, i8* %P2 + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test12(i32* noalias %P) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: store i8 0, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ret void +bb3: + store i8 0, i8* %P2 + ret void +} + + +define void @test13(i32* noalias %P) { +; CHECK-LABEL: @test13( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br label %for +for: + store i32 0, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test14(i32* noalias %P) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + store i32 1, i32* %P + br label %for +for: + store i32 0, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test15(i32* noalias %P) { +; CHECK-LABEL: @test15( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i8 2, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + br i1 true, label %bb1, label %bb3 +bb1: + store i32 1, i32* %P + br label %bb3 +bb3: + store i8 2, i8* %P2 + ret void +} + + +define void @test16(i32* noalias %P) { +; CHECK-LABEL: @test16( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + ret void +} + + +define void @test17(i32* noalias %P) { +; CHECK-LABEL: @test17( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + call void @unknown_func() + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + ret void +} + + +define void @test18(i32* noalias %P) { +; CHECK-LABEL: @test18( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i8 1, i8* [[P2]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: store i8 2, i8* [[P2]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br label %for +for: + store i8 1, i8* %P2 + %x = load i32, i32* %P + store i8 2, i8* %P2 + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test19(i32* noalias %P) { +; CHECK-LABEL: @test19( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P3]], i8 0, i64 28, i32 4, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + br label %bb3 +bb3: + ret void +} + + +define void @test20(i32* noalias %P) { +; CHECK-LABEL: @test20( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 24, i32 4, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + ret void +} + + +define void @test21(i32* noalias %P) { +; CHECK-LABEL: @test21( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 24, i32 4, i1 false) +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br label %for +for: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + br i1 false, label %for, label %end +end: + ret void +} + + + +define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %x, i32* noalias nocapture %b) local_unnamed_addr { +; CHECK-LABEL: @test_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY4_LR_PH_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body4.lr.ph.preheader: +; CHECK-NEXT: br label [[FOR_BODY4_LR_PH:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body4.lr.ph: +; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] +; CHECK-NEXT: br label [[FOR_BODY4:%.*]] +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: store i32 [[ADD9:%.*]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC11]] = add nuw nsw i32 [[I_028]], 1 +; CHECK-NEXT: [[EXITCOND29:%.*]] = icmp eq i32 [[INC11]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND29]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_LR_PH]] +; CHECK: for.body4: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[ADD9]], [[FOR_BODY4]] ] +; CHECK-NEXT: [[J_026:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY4]] ] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_026]], [[MUL]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[J_026]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[ADD9]] = add nsw i32 [[MUL7]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_026]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]] +; +entry: + %cmp27 = icmp sgt i32 %N, 0 + br i1 %cmp27, label %for.body4.lr.ph.preheader, label %for.cond.cleanup + +for.body4.lr.ph.preheader: ; preds = %entry + br label %for.body4.lr.ph + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void + +for.body4.lr.ph: ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3 + %i.028 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.028 + store i32 0, i32* %arrayidx, align 4 + %mul = mul nsw i32 %i.028, %N + br label %for.body4 + +for.cond.cleanup3: ; preds = %for.body4 + store i32 %add9, i32* %arrayidx, align 4 + %inc11 = add nuw nsw i32 %i.028, 1 + %exitcond29 = icmp eq i32 %inc11, %N + br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph + +for.body4: ; preds = %for.body4, %for.body4.lr.ph + %0 = phi i32 [ 0, %for.body4.lr.ph ], [ %add9, %for.body4 ] + %j.026 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ] + %add = add nsw i32 %j.026, %mul + %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add + %1 = load i32, i32* %arrayidx5, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %x, i32 %j.026 + %2 = load i32, i32* %arrayidx6, align 4 + %mul7 = mul nsw i32 %2, %1 + %add9 = add nsw i32 %mul7, %0 + %inc = add nuw nsw i32 %j.026, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 +} + Index: test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- test/Transforms/DeadStoreElimination/simple.ll +++ test/Transforms/DeadStoreElimination/simple.ll @@ -521,3 +521,111 @@ store i32 0, i32* %p ret void } + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind +define void @test36(i32** noalias %Pp, i32* noalias %Q) { +; CHECK-LABEL: @test36( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AC:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[AC]]) +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PP:%.*]] to i8** +; CHECK-NEXT: [[PC:%.*]] = load i8*, i8** [[TMP0]], align 8 +; CHECK-NEXT: [[QC:%.*]] = bitcast i32* [[Q:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull [[AC]], i8* [[QC]], i64 4, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[PC]], i8* nonnull [[AC]], i64 4, i32 4, i1 true) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[AC]]) +; CHECK-NEXT: ret void +; +entry: + %A = alloca i32, align 4 + %Ac = bitcast i32* %A to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %Ac) + %0 = bitcast i32** %Pp to i8** + %Pc = load i8*, i8** %0, align 8 + %Qc = bitcast i32* %Q to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %Ac, i8* %Qc, i64 4, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Pc, i8* nonnull %Ac, i64 4, i32 4, i1 true) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %Ac) + ret void +} + +; I think this case is currently handled incorrectly by memdeps dse +; throwing should leave store i32 1, not remove from the free. +declare void @free(i8* nocapture) +define void @test37(i32* noalias %P) { +; NOCHECK-LABEL: @test37( +; NOCHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; NOCHECK-NEXT: store i32 1, i32* [[P]] +; NOCHECK-NEXT: call void @unknown_func() +; NOCHECK-NEXT: call void @free(i8* [[P2]]) +; NOCHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + call void @unknown_func() + store i32 2, i32* %P + call void @free(i8* %P2) + ret void +} + +define void @test38(i32* %P, i32* %Q) { +; NOCHECK-LABEL: @test38( +; NOCHECK-NEXT: store i32 1, i32* [[P:%.*]] +; NOCHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; NOCHECK-NEXT: store i32 2, i32* [[Q:%.*]] +; NOCHECK-NEXT: store i8 3, i8* [[P2]] +; NOCHECK-NEXT: ret void +; + store i32 1, i32* %P + %P2 = bitcast i32* %P to i8* + store i32 2, i32* %Q + store i8 3, i8* %P2 + ret void +} + +define void @test38a(i32* %P, i32* %Q) { +; NOCHECK-LABEL: @test38a( +; NOCHECK-NEXT: store atomic i32 1, i32* [[P:%.*]] unordered, align 4 +; NOCHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* +; NOCHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 +; NOCHECK-NEXT: store atomic i8 3, i8* [[P2]] unordered, align 4 +; NOCHECK-NEXT: ret void +; + store atomic i32 1, i32* %P unordered, align 4 + %P2 = bitcast i32* %P to i8* + store atomic i32 2, i32* %Q unordered, align 4 + store atomic i8 3, i8* %P2 unordered, align 4 + ret void +} + +define void @test39(i32* %P, i32* noalias %Q) { +; CHECK-LABEL: @test39( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 50331649, i32* [[P:%.*]] +; CHECK-NEXT: store i32 2, i32* [[Q:%.*]] +; CHECK-NEXT: ret void +; +entry: + store i32 1, i32* %P + %P2 = bitcast i32* %P to i8* + store i32 2, i32* %Q + store i8 3, i8* %P2 + ret void +} + +define void @test39a(i32* %P, i32* noalias %Q) { +; CHECK-LABEL: @test39a( +; CHECK-NEXT: entry: +; CHECK-NEXT: store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + store atomic i32 1, i32* %P unordered, align 4 + %P2 = bitcast i32* %P to i8* + store atomic i32 2, i32* %Q unordered, align 4 + store atomic i8 3, i8* %P2 unordered, align 4 + ret void +}