diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -29,7 +29,10 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -40,6 +43,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -87,11 +91,20 @@ cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); -// Temporary dummy option for tests. static cl::opt EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, cl::desc("Use the new MemorySSA-backed DSE.")); +static cl::opt + MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, + cl::desc("The number of memory instructions to scan for " + "dead store elimination (default = 100)")); + +static cl::opt MemorySSADefsPerBlockLimit( + "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, + cl::desc("The number of MemoryDefs we consider as candidates to eliminated " + "other stores per basic block (default = 5000)")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1354,22 +1367,479 @@ return MadeChange; } +namespace { +//============================================================================= +// MemorySSA backed dead store elimination. +// +// The code below implements dead store elimination using MemorySSA. It uses +// the following general approach: given a MemoryDef, walk upwards to find +// clobbering MemoryDefs that may be killed by the starting def. Then check +// that there are no uses that may read the location of the original MemoryDef +// in between both MemoryDefs. A bit more concretely: +// +// For all MemoryDefs StartDef: +// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking +// upwards. +// 2. Check that there no reads between DomAccess and the StartDef by checking +// all uses starting at DomAccess and walking until we see StartDef. +// 3. For each found DomDef, check that: +// 1. There are no barrier instructions between DomDef and StartDef (like +// throws or stores with ordering constraints). +// 2. StartDef is executed whenever DomDef is executed. +// 3. StartDef completely overwrites DomDef. +// 4. Erase DomDef from the function and MemorySSA. + +// Returns true if \p M is an intrisnic that does not read or write memory. +bool isNoopIntrinsic(MemoryUseOrDef *M) { + if (const IntrinsicInst *II = dyn_cast(M->getMemoryInst())) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::assume: + return true; + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::dbg_value: + llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); + default: + return false; + } + } + return false; +} + +// Check if we can ignore \p D for DSE. +bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { + Instruction *DI = D->getMemoryInst(); + // Calls that only access inaccessible memory cannot read or write any memory + // locations we consider for elimination. + if (auto CS = CallSite(DI)) + if (CS.onlyAccessesInaccessibleMemory()) + return true; + + // We can eliminate stores to locations not visible to the caller across + // throwing instructions. + if (DI->mayThrow() && !DefVisibleToCaller) + return true; + + // We can remove the dead stores, irrespective of the fence and its ordering + // (release/acquire/seq_cst). Fences only constraints the ordering of + // already visible stores, it does not make a store visible to other + // threads. So, skipping over a fence does not change a store from being + // dead. + if (isa(DI)) + return true; + + // Skip intrinsics that do not really read or modify memory. + if (isNoopIntrinsic(D)) + return true; + + return false; +} + +struct DSEState { + Function &F; + AliasAnalysis &AA; + MemorySSA &MSSA; + DominatorTree &DT; + PostDominatorTree &PDT; + const TargetLibraryInfo &TLI; + + // All MemoryDefs that potentially could kill other MemDefs. + SmallVector MemDefs; + // Any that should be skipped as they are already deleted + SmallPtrSet SkipStores; + // Keep track of all of the objects that are invisible to the caller until the + // function returns. + SmallPtrSet InvisibleToCaller; + // Keep track of blocks with throwing instructions not modeled in MemorySSA. + SmallPtrSet ThrowingBlocks; + + DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, const TargetLibraryInfo &TLI) + : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} + + static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, + DominatorTree &DT, PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + DSEState State(F, AA, MSSA, DT, PDT, TLI); + // Collect blocks with throwing instructions not modeled in MemorySSA and + // alloc-like objects. + for (Instruction &I : instructions(F)) { + if (I.mayThrow() && !MSSA.getMemoryAccess(&I)) + State.ThrowingBlocks.insert(I.getParent()); + + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && + hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + State.MemDefs.push_back(MD); + + // Track alloca and alloca-like objects. Here we care about objects not + // visible to the caller during function execution. Alloca objects are + // invalid in the caller, for alloca-like objects we ensure that they are + // not captured throughout the function. + if (isa(&I) || + (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) + State.InvisibleToCaller.insert(&I); + } + // Treat byval or inalloca arguments the same as Allocas, stores to them are + // dead at the end of the function. + for (Argument &AI : F.args()) + if (AI.hasByValOrInAllocaAttr()) + State.InvisibleToCaller.insert(&AI); + return State; + } + + Optional getLocForWriteEx(Instruction *I) const { + if (!I->mayWriteToMemory()) + return None; + + if (auto *MTI = dyn_cast(I)) + return {MemoryLocation::getForDest(MTI)}; + + if (auto CS = CallSite(I)) { + if (Function *F = CS.getCalledFunction()) { + StringRef FnName = F->getName(); + if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) + return {MemoryLocation(CS.getArgument(0))}; + } + return None; + } + + return MemoryLocation::getOrNone(I); + } + + /// Returns true if \p Use completely overwrites \p DefLoc. + bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { + if (!UseInst->mayWriteToMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); + // If necessary, perform additional analysis. + if (isModSet(MR)) + MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); + + Optional UseLoc = getLocForWriteEx(UseInst); + return isModSet(MR) && isMustSet(MR) && + UseLoc->Size.getValue() >= DefLoc.Size.getValue(); + } + + /// Returns true if \p Use may read from \p DefLoc. + bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { + if (!UseInst->mayReadFromMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); + // If necessary, perform additional analysis. + if (isRefSet(MR)) + MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); + return isRefSet(MR); + } + + // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no + // read access in between or return None otherwise. The returned value may not + // (completely) overwrite \p DefLoc. Currently we bail out when we encounter + // any of the following + // * An aliasing MemoryUse (read). + // * A MemoryPHI. + Optional getDomMemoryDef(MemoryDef *KillingDef, + MemoryAccess *Current, + MemoryLocation DefLoc, + bool DefVisibleToCaller, + int &ScanLimit) const { + MemoryDef *DomDef; + MemoryAccess *StartDef = Current; + bool StepAgain; + LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current + << "\n"); + // Find the next clobbering Mod access for DefLoc, starting at Current. + do { + StepAgain = false; + // Reached TOP. + if (MSSA.isLiveOnEntryDef(Current)) + return None; + + MemoryUseOrDef *CurrentUD = dyn_cast(Current); + if (!CurrentUD) + return None; + + // Look for access that clobber DefLoc. + MemoryAccess *DomAccess = + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess( + CurrentUD->getDefiningAccess(), DefLoc); + DomDef = dyn_cast(DomAccess); + if (!DomDef || MSSA.isLiveOnEntryDef(DomDef)) + return None; + + // Check if we can skip DomDef for DSE. We also require the KillingDef + // execute whenever DomDef executes and use post-dominance to ensure that. + if (canSkipDef(DomDef, DefVisibleToCaller) || + !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) { + StepAgain = true; + Current = DomDef; + } + + } while (StepAgain); + + LLVM_DEBUG(dbgs() << " Checking for reads of " << *DomDef << " (" + << *DomDef->getMemoryInst() << ")\n"); + + SmallSetVector WorkList; + auto PushMemUses = [&WorkList](MemoryAccess *Acc) { + for (Use &U : Acc->uses()) + WorkList.insert(cast(U.getUser())); + }; + PushMemUses(DomDef); + + // Check if DomDef may be read. + for (unsigned I = 0; I < WorkList.size(); I++) { + MemoryAccess *UseAccess = WorkList[I]; + + LLVM_DEBUG(dbgs() << " Checking use " << *UseAccess); + if (--ScanLimit == 0) { + LLVM_DEBUG(dbgs() << " ... hit scan limit\n"); + return None; + } + + // Bail out on MemoryPhis for now. + if (isa(UseAccess)) { + LLVM_DEBUG(dbgs() << " ... hit MemoryPhi\n"); + return None; + } + + Instruction *UseInst = cast(UseAccess)->getMemoryInst(); + LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); + + if (isNoopIntrinsic(cast(UseAccess))) { + PushMemUses(UseAccess); + continue; + } + + // Uses which may read the original MemoryDef mean we cannot eliminate the + // original MD. Stop walk. + if (isReadClobber(DefLoc, UseInst)) { + LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + return None; + } + + if (StartDef == UseAccess) + continue; + + // Check all uses for MemoryDefs, except for defs completely overwriting + // the original location. Otherwise we have to check uses of *all* + // MemoryDefs we discover, including non-aliasing ones. Otherwise we might + // miss cases like the following + // 1 = Def(LoE) ; <----- DomDef stores [0,1] + // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] + // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. + // (The Use points to the *first* Def it may alias) + // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, + // stores [0,1] + if (MemoryDef *UseDef = dyn_cast(UseAccess)) { + if (!isCompleteOverwrite(DefLoc, UseInst)) + PushMemUses(UseDef); + } + } + + // No aliasing MemoryUses of DomDef found, DomDef is potentially dead. + return {DomDef}; + } + + // Delete dead memory defs + void deleteDeadInstruction(Instruction *SI) { + MemorySSAUpdater Updater(&MSSA); + SmallVector NowDeadInsts; + NowDeadInsts.push_back(SI); + --NumFastOther; + + while (!NowDeadInsts.empty()) { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // Remove the Instruction from MSSA. + if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { + Updater.removeMemoryAccess(MA); + if (MemoryDef *MD = dyn_cast(MA)) { + SkipStores.insert(MD); + } + } + + // Remove its operands + for (Use &O : DeadInst->operands()) + if (Instruction *OpI = dyn_cast(O)) { + O = nullptr; + if (isInstructionTriviallyDead(OpI, &TLI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + } + } + + // Check for any extra throws between SI and NI that block DSE. This only + // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may + // throw are handled during the walk from one def to the next. + bool mayThrowBetween(Instruction *SI, Instruction *NI, + const Value *SILocUnd) const { + // First see if we can ignore it by using the fact that SI is an + // alloca/alloca like object that is not visible to the caller during + // execution of the function. + if (SILocUnd && InvisibleToCaller.count(SILocUnd)) + return false; + + if (SI->getParent() == NI->getParent()) + return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end(); + return !ThrowingBlocks.empty(); + } + + // Check if \p NI acts as a DSE barrier for \p SI. The following instructions + // act as barriers: + // * A memory instruction that may throw and \p SI accesses a non-stack + // object. + // * Atomic stores stronger that monotonic. + bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, + const Value *SILocUnd, Instruction *NI, + MemoryLocation &NILoc) const { + // If NI may throw it acts as a barrier, unless we are to an alloca/alloca + // like object that does not escape. + if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd)) + return true; + + if (NI->isAtomic()) { + if (auto *NSI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NSI->getOrdering())) + return true; + } else + llvm_unreachable( + "Other instructions should be modeled/skipped in MemorySSA"); + } + + return false; + } +}; + +bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, + MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + // For each store: + for (unsigned I = 0; I < State.MemDefs.size(); I++) { + MemoryDef *Current = State.MemDefs[I]; + if (State.SkipStores.count(Current)) + continue; + Instruction *SI = cast(Current)->getMemoryInst(); + MemoryLocation SILoc = *State.getLocForWriteEx(SI); + assert(SILoc.Ptr && "SILoc should not be null"); + const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); + Instruction *DefObj = + const_cast(dyn_cast(SILocUnd)); + bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd); + if (DefObj && ((isAllocLikeFn(DefObj, &TLI) && + !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT)))) + DefVisibleToCaller = false; + + LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *SI + << "\n"); + + int ScanLimit = MemorySSAScanLimit; + MemoryDef *StartDef = Current; + Optional Next; + // Walk MemorySSA upward to find MemoryDefs that might be killed by SI. + while ((Next = State.getDomMemoryDef(StartDef, Current, SILoc, + DefVisibleToCaller, ScanLimit))) { + MemoryAccess *DomAccess = *Next; + LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess << "\n"); + MemoryDef *NextDef = dyn_cast(DomAccess); + Instruction *NI = NextDef->getMemoryInst(); + LLVM_DEBUG(dbgs() << " def " << *NI << "\n"); + + if (!hasAnalyzableMemoryWrite(NI, TLI)) + break; + MemoryLocation NILoc = *State.getLocForWriteEx(NI); + // Check for anything that looks like it will be a barrier to further + // removal + if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) { + LLVM_DEBUG(dbgs() << " stop, barrier\n"); + break; + } + + // Before we try to remove anything, check for any extra throwing + // instructions that block us from DSEing + if (State.mayThrowBetween(SI, NI, SILocUnd)) { + LLVM_DEBUG(dbgs() << " stop, may throw!\n"); + break; + } + + // Check if NI overwrites SI. + int64_t InstWriteOffset, DepWriteOffset; + InstOverlapIntervalsTy IOL; + OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, NI, IOL, AA, &F); + + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI + << "\n KILLER: " << *SI << '\n'); + State.deleteDeadInstruction(NI); + ++NumFastStores; + MadeChange = true; + } else + Current = NextDef; + } + } + + return MadeChange; +} +} // end anonymous namespace + //===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { - AliasAnalysis *AA = &AM.getResult(F); - DominatorTree *DT = &AM.getResult(F); - MemoryDependenceResults *MD = &AM.getResult(F); - const TargetLibraryInfo *TLI = &AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + const TargetLibraryInfo &TLI = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + + if (EnableMemorySSA) { + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + PostDominatorTree &PDT = AM.getResult(F); - if (!eliminateDeadStores(F, AA, MD, DT, TLI)) - return PreservedAnalyses::all(); + if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI)) + return PreservedAnalyses::all(); + } else { + MemoryDependenceResults &MD = AM.getResult(F); + + if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI)) + return PreservedAnalyses::all(); + } PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); + if (EnableMemorySSA) + PA.preserve(); + else + PA.preserve(); return PA; } @@ -1388,25 +1858,42 @@ if (skipFunction(F)) return false; - DominatorTree *DT = &getAnalysis().getDomTree(); - AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(F); + AliasAnalysis &AA = getAnalysis().getAAResults(); + DominatorTree &DT = getAnalysis().getDomTree(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); - return eliminateDeadStores(F, AA, MD, DT, TLI); + if (EnableMemorySSA) { + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); + + return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + } else { + MemoryDependenceResults &MD = + getAnalysis().getMemDep(); + + return eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + + if (EnableMemorySSA) { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addPreserved(); + } } }; @@ -1417,8 +1904,10 @@ INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll copy from llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll copy to llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll @@ -1,51 +1,6 @@ -; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s - -; We conservative choose to prevent dead store elimination -; across release or stronger fences. It's not required -; (since the must still be a race on %addd.i), but -; it is conservatively correct. A legal optimization -; could hoist the second store above the fence, and then -; DSE one of them. -define void @test1(i32* %addr.i) { -; CHECK-LABEL: @test1 -; CHECK: store i32 5 -; CHECK: fence -; CHECK: store i32 5 -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - store i32 5, i32* %addr.i, align 4 - ret void -} - -; Same as previous, but with different values. If we ever optimize -; this more aggressively, this allows us to check that the correct -; store is retained (the 'i32 1' store in this case) -define void @test1b(i32* %addr.i) { -; CHECK-LABEL: @test1b -; CHECK: store i32 42 -; CHECK: fence release -; CHECK: store i32 1 -; CHECK: ret - store i32 42, i32* %addr.i, align 4 - fence release - store i32 1, i32* %addr.i, align 4 - ret void -} +; XFAIL: * -; We *could* DSE across this fence, but don't. No other thread can -; observe the order of the acquire fence and the store. -define void @test2(i32* %addr.i) { -; CHECK-LABEL: @test2 -; CHECK: store -; CHECK: fence -; CHECK: store -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence acquire - store i32 5, i32* %addr.i, align 4 - ret void -} +; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s ; We DSE stack alloc'ed and byval locations, in the presence of fences. ; Fence does not make an otherwise thread local store visible. @@ -93,4 +48,3 @@ store i32 4, i32* %P1, align 4 ret void } - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll @@ -46,51 +46,3 @@ store i32 5, i32* %addr.i, align 4 ret void } - -; We DSE stack alloc'ed and byval locations, in the presence of fences. -; Fence does not make an otherwise thread local store visible. -; Right now the DSE in presence of fence is only done in end blocks (with no successors), -; but the same logic applies to other basic blocks as well. -; The store to %addr.i can be removed since it is a byval attribute -define void @test3(i32* byval %addr.i) { -; CHECK-LABEL: @test3 -; CHECK-NOT: store -; CHECK: fence -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - ret void -} - -declare void @foo(i8* nocapture %p) - -declare noalias i8* @malloc(i32) - -; DSE of stores in locations allocated through library calls. -define void @test_nocapture() { -; CHECK-LABEL: @test_nocapture -; CHECK: malloc -; CHECK: foo -; CHECK-NOT: store -; CHECK: fence - %m = call i8* @malloc(i32 24) - call void @foo(i8* %m) - store i8 4, i8* %m - fence release - ret void -} - - -; This is a full fence, but it does not make a thread local store visible. -; We can DSE the store in presence of the fence. -define void @fence_seq_cst() { -; CHECK-LABEL: @fence_seq_cst -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: ret void - %P1 = alloca i32 - store i32 0, i32* %P1, align 4 - fence seq_cst - store i32 4, i32* %P1, align 4 - ret void -} - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=4 -S | FileCheck --check-prefix=LIMIT-4 %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + + +define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) { +; +; NO-LIMIT-LABEL: @test2( +; NO-LIMIT-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; NO-LIMIT: bb1: +; NO-LIMIT-NEXT: br label [[BB3:%.*]] +; NO-LIMIT: bb2: +; NO-LIMIT-NEXT: br label [[BB3]] +; NO-LIMIT: bb3: +; NO-LIMIT-NEXT: store i32 0, i32* [[Q:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[R:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[P:%.*]] +; NO-LIMIT-NEXT: ret void +; +; LIMIT-0-LABEL: @test2( +; LIMIT-0-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-0: bb1: +; LIMIT-0-NEXT: br label [[BB3:%.*]] +; LIMIT-0: bb2: +; LIMIT-0-NEXT: br label [[BB3]] +; LIMIT-0: bb3: +; LIMIT-0-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-0-NEXT: ret void +; +; LIMIT-3-LABEL: @test2( +; LIMIT-3-NEXT: store i32 1, i32* [[P:%.*]] +; LIMIT-3-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-3: bb1: +; LIMIT-3-NEXT: br label [[BB3:%.*]] +; LIMIT-3: bb2: +; LIMIT-3-NEXT: br label [[BB3]] +; LIMIT-3: bb3: +; LIMIT-3-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[P]] +; LIMIT-3-NEXT: ret void +; +; LIMIT-4-LABEL: @test2( +; LIMIT-4-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-4: bb1: +; LIMIT-4-NEXT: br label [[BB3:%.*]] +; LIMIT-4: bb2: +; LIMIT-4-NEXT: br label [[BB3]] +; LIMIT-4: bb3: +; LIMIT-4-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-4-NEXT: ret void +; + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %Q + store i32 0, i32* %R + store i32 0, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -59,8 +59,7 @@ ; Should not delete the volatile memset. define void @test17v(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test17v( -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true) @@ -74,8 +73,7 @@ ; Previously this was not allowed (PR8728), also discussed in PR11763. define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) @@ -85,8 +83,7 @@ define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -25,7 +25,6 @@ define i8* @test_return_captures_2() { ; CHECK-LABEL: @test_return_captures_2( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: store i8 1, i8* [[M]] @@ -91,7 +90,6 @@ define i8* @test_malloc_capture_3() { ; CHECK-LABEL: @test_malloc_capture_3( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: store i8 1, i8* [[M]] @@ -191,7 +189,6 @@ define i8* @test_malloc_capture_7() { ; CHECK-LABEL: @test_malloc_capture_7( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: call void @may_throw() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -215,10 +212,10 @@ define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( ; CHECK-NEXT: [[M:%.*]] = alloca i8 -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -240,6 +237,7 @@ ; CHECK-NEXT: call void @capture(i8* [[M]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -262,6 +260,7 @@ ; CHECK: exit: ; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 ; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -30,6 +30,7 @@ ; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none [] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: +; CHECK-NEXT: store i32 40, i32* [[SV]] ; CHECK-NEXT: ret void ; block1: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -111,7 +111,6 @@ ; CHECK: for.body4.lr.ph: ; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] -; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: @@ -170,3 +169,116 @@ %exitcond29 = icmp eq i32 %inc11, %N br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph } + +declare i1 @cond() readnone + +; TODO: We can eliminate the store in for.header, but we currently hit a MemoryPhi. +define void @loop_multiple_def_uses(i32* noalias %P) { +; CHECK-LABEL: @loop_multiple_def_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[FOR_HEADER]] +; CHECK: end: +; CHECK-NEXT: store i32 3, i32* [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.header + +for.header: + store i32 1, i32* %P, align 4 + %c1 = call i1 @cond() + br i1 %c1, label %for.body, label %end + +for.body: + store i32 1, i32* %P, align 4 + %lv = load i32, i32* %P + br label %for.header + +end: + store i32 3, i32* %P, align 4 + ret void +} + +; We cannot eliminate the store in for.header, as it is only partially +; overwritten in for.body and read afterwards. +define void @loop_multiple_def_uses_partial_write(i32* noalias %p) { +; CHECK-LABEL: @loop_multiple_def_uses_partial_write( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: store i32 1239491, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: store i8 1, i8* [[C]], align 4 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[FOR_HEADER]] +; CHECK: end: +; CHECK-NEXT: store i32 3, i32* [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.header + +for.header: + store i32 1239491, i32* %p, align 4 + %c1 = call i1 @cond() + br i1 %c1, label %for.body, label %end + +for.body: + %c = bitcast i32* %p to i8* + store i8 1, i8* %c, align 4 + %lv = load i32, i32* %p + br label %for.header + +end: + store i32 3, i32* %p, align 4 + ret void +} + +; We cannot eliminate the store in for.header, as the location is not overwritten +; in for.body and read afterwards. +define void @loop_multiple_def_uses_mayalias_write(i32* %p, i32* %q) { + +; CHECK-LABEL: @loop_multiple_def_uses_mayalias_write( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: store i32 1239491, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: store i32 1, i32* [[Q:%.*]], align 4 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[FOR_HEADER]] +; CHECK: end: +; CHECK-NEXT: store i32 3, i32* [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.header + +for.header: + store i32 1239491, i32* %p, align 4 + %c1 = call i1 @cond() + br i1 %c1, label %for.body, label %end + +for.body: + store i32 1, i32* %q, align 4 + %lv = load i32, i32* %p + br label %for.header + +end: + store i32 3, i32* %p, align 4 + ret void +} + diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll @@ -103,3 +103,73 @@ store i8 1, i8* %P2 ret void } + +declare void @hoge() + +; Check a function with a MemoryPhi with 3 incoming values. +define void @widget(i32* %Ptr, i1 %c1, i1 %c2, i32 %v1, i32 %v2, i32 %v3) { +; CHECK-LABEL: @widget( +; CHECK-NEXT: bb: +; CHECK-NEXT: tail call void @hoge() +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[C2:%.*]], label [[BB2:%.*]], label [[BB3]] +; CHECK: bb2: +; CHECK-NEXT: store i32 -1, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: switch i32 [[V1:%.*]], label [[BB8:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB5:%.*]] +; CHECK-NEXT: i32 1, label [[BB6:%.*]] +; CHECK-NEXT: i32 2, label [[BB7:%.*]] +; CHECK-NEXT: ] +; CHECK: bb5: +; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb6: +; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb7: +; CHECK-NEXT: store i32 2, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB4]] +; +bb: + tail call void @hoge() + br i1 %c1, label %bb3, label %bb1 + +bb1: ; preds = %bb + br i1 %c2, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + store i32 -1, i32* %Ptr, align 4 + br label %bb3 + +bb3: ; preds = %bb2, %bb1, %bb + br label %bb4 + +bb4: ; preds = %bb8, %bb3 + switch i32 %v1, label %bb8 [ + i32 0, label %bb5 + i32 1, label %bb6 + i32 2, label %bb7 + ] + +bb5: ; preds = %bb4 + store i32 0, i32* %Ptr, align 4 + br label %bb8 + +bb6: ; preds = %bb4 + store i32 1, i32* %Ptr, align 4 + br label %bb8 + +bb7: ; preds = %bb4 + store i32 2, i32* %Ptr, align 4 + br label %bb8 + +bb8: ; preds = %bb7, %bb6, %bb5, %bb4 + br label %bb4 +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll @@ -32,14 +32,13 @@ define void @second_store_bigger(i32* noalias %P) { ; CHECK-LABEL: @second_store_bigger( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P]] to i64* +; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P:%.*]] to i64* ; CHECK-NEXT: store i64 0, i64* [[P_I64]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -6,14 +6,13 @@ define void @test2(i32* noalias %P) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; store i32 1, i32* %P @@ -53,7 +52,6 @@ define void @test7(i32* noalias %P, i32* noalias %Q) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: store i32 1, i32* [[Q:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P:%.*]] @@ -61,7 +59,7 @@ ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[Q]] +; CHECK-NEXT: store i32 0, i32* [[Q:%.*]] ; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: ret void ; @@ -114,3 +112,38 @@ store i32 0, i32* %P ret void } + +; We cannot eliminate `store i32 0, i32* %P`, as it is read by the later load. +; Make sure that we check the uses of `store i32 1, i32* %P.1 which does not +; alias %P. Note that uses point to the *first* def that may alias. +define void @overlapping_read(i32* %P) { +; CHECK-LABEL: @overlapping_read( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: [[P_1:%.*]] = getelementptr i32, i32* [[P]], i32 1 +; CHECK-NEXT: store i32 1, i32* [[P_1]] +; CHECK-NEXT: [[P_64:%.*]] = bitcast i32* [[P]] to i64* +; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[P_64]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: store i32 2, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + %P.1 = getelementptr i32, i32* %P, i32 1 + store i32 1, i32* %P.1 + + %P.64 = bitcast i32* %P to i64* + %lv = load i64, i64* %P.64 + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ret void +bb3: + store i32 2, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -294,8 +294,7 @@ ; The memmove is dead, because memcpy arguments cannot overlap. define void @test38(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; @@ -307,8 +306,7 @@ ; The memmove is dead, because memcpy arguments cannot overlap. define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38_atomic( -; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; @@ -379,7 +377,9 @@ define void @test41(i32* noalias %P) { ; CHECK-LABEL: @test41( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: store i32 2, i32* [[P]] ; CHECK-NEXT: call void @free(i8* [[P2]]) ; CHECK-NEXT: ret void ;